diff --git a/fern/apis/atoms/generators.yml b/fern/apis/atoms/generators.yml index 1e858ed..6127b9f 100644 --- a/fern/apis/atoms/generators.yml +++ b/fern/apis/atoms/generators.yml @@ -1,44 +1,8 @@ # yaml-language-server: $schema=https://schema.buildwithfern.dev/generators-yml.json +# NOTE: SDK generation for atoms is now handled by the unified config (fern/apis/unified/generators.yml). +# This file only retains the API definition for reference by the unified config and docs. + api: specs: - openapi: openapi/openapi.yaml - -groups: - ts-sdk: - generators: - - name: fernapi/fern-typescript-node-sdk - version: 2.6.3 - output: - location: npm - package-name: smallest-ai-atoms-sdk - github: - repository: fern-demo/smallest-ai-atoms-ts-sdk - config: - namespaceExport: SmallestAtoms - python-sdk: - generators: - - name: fernapi/fern-python-sdk - version: 4.25.6 - output: - location: pypi - package-name: smallest-ai-atoms-sdk - github: - repository: fern-demo/smallest-ai-atoms-python-sdk - config: - client: - class_name: SmallestAtoms - filename: client.py - pydantic_config: - skip_validation: true - exclude_types_from_init_exports: true - go-sdk: - generators: - - name: fernapi/fern-go-sdk - version: 1.1.0 - # output: - # location: local-file-system - # path: ../../../sdks/atoms/go - github: - repository: fern-demo/smallest-ai-atoms-go-sdk - config: - packageName: smallestatoms + overrides: openapi/openapi-overrides.yaml diff --git a/fern/apis/atoms/openapi/ai_examples_override.yml b/fern/apis/atoms/openapi/ai_examples_override.yml new file mode 100644 index 0000000..1c71e5d --- /dev/null +++ b/fern/apis/atoms/openapi/ai_examples_override.yml @@ -0,0 +1,602 @@ +paths: + /campaign/id: + delete: + x-fern-examples: + - path-parameters: + id: c9f1a7d2-3b4e-4f6a-9d2e-8a7b5c1d2e3f + response: + body: + status: true + get: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + data: + campaign: + _id: 63f1a2b4e4b0c72a9d8f1234 + name: Spring Product Launch + description: Campaign to promote the new spring product line + organization: org_987654321 + agentId: agent_123456789 + createdBy: user_456789123 + audienceId: aud_321654987 + participantsCount: 1200 + status: running + maxRetries: 3 + retryDelay: 15 + scheduledAt: '2024-04-01T08:00:00Z' + createdAt: '2024-03-15T10:00:00Z' + updatedAt: '2024-03-20T15:45:00Z' + events: + - _id: evt_789456123 + campaignId: 63f1a2b4e4b0c72a9d8f1234 + triggerSource: system_scheduler + eventAction: campaign_started + createdAt: '2024-04-01T08:00:00Z' + metrics: + total_participants: 1200 + contacts_called: 850 + contacts_connected: 600 + /knowledgebase: + get: + x-fern-examples: + - response: + body: + status: true + data: + - _id: kb_63f9a1b2e4b0c123456789ab + name: Customer Support FAQs + description: A comprehensive knowledge base containing frequently asked questions and answers for customer support. + organization: org_9876543210abcdef12345678 + createdBy: user_1234567890abcdef12345678 + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-01-15T09:30:00Z' + post: + x-fern-examples: + - request: + name: Customer Support Knowledge Base + response: + body: + status: true + data: 60d0fe4f5311236168a109ca + /campaign: + get: + x-fern-examples: + - response: + body: + status: true + data: + campaigns: + - _id: 5f8d0d55b54764421b7156c1 + name: Spring Product Launch + description: Campaign to promote the new spring product line across social media and email. + organization: org_12345 + agentId: agent_67890 + createdBy: user_54321 + audienceId: aud_98765 + participantsCount: 1500 + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-02-10T14:45:00Z' + isCampaignInProgress: true + isCampaignCompleted: false + post: + x-fern-examples: + - request: + name: Spring Product Launch + audienceId: 60d0fe4f5311236168a109ca + agentId: 60d0fe4f5311236168a109cb + response: + body: + status: true + data: + _id: 642f1b2e9c1a4b3f8d7e4c12 + name: Spring Product Launch + description: Campaign targeting new customers for the spring product line + organization: org_987654321 + agentId: 60d0fe4f5311236168a109cb + createdBy: user_123456789 + audienceId: 60d0fe4f5311236168a109ca + participantsCount: 1500 + createdAt: '2024-04-10T08:45:00Z' + updatedAt: '2024-04-10T08:45:00Z' + /campaign/id/pause: + post: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + /agent/agentId/webhook-subscriptions: + get: + x-fern-examples: + - path-parameters: + agentId: agentId + response: + body: + status: true + data: + - _id: 63f9a1b2e4b0c123456789ab + webhookId: wh_9876543210abcdef12345678 + agentId: agent_1234567890abcdef123456 + eventType: pre-conversation + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-01-15T09:30:00Z' + delete: + x-fern-examples: + - path-parameters: + agentId: agentId + response: + body: + status: true + data: Subscriptions deleted successfully + /knowledgebase/id: + delete: + x-fern-examples: + - path-parameters: + id: kb-9f8e7d6c5b4a3 + response: + body: + status: true + get: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + data: + _id: kb_63f8a1b2e4b0c123456789ab + name: Customer Support FAQs + description: A comprehensive knowledge base containing frequently asked questions and troubleshooting guides for customer support. + organization: org_9f8e7d6c5b4a3210fedcba98 + createdBy: user_1234567890abcdef12345678 + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-01-15T09:30:00Z' + /agent/id/workflow: + get: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + data: + type: workflow_graph + data: + nodes: + - id: node-1 + type: default_node + position: + x: 100.5 + 'y': 200.75 + data: + title: Welcome Message + message: Hello! How can I assist you today? + - id: node-2 + type: pre_call_api + position: + x: 300 + 'y': 200 + data: + apiEndpoint: https://api.example.com/userinfo + method: GET + description: Fetch user information before proceeding + - id: node-3 + type: end_call + position: + x: 500 + 'y': 200 + data: + message: Thank you for contacting us. Goodbye! + edges: + - id: edge-1 + source: node-1 + target: node-2 + type: default + - id: edge-2 + source: node-2 + target: node-3 + type: default + /workflow/60d0fe4f5311236168a109ca: + patch: + x-fern-examples: + - path-parameters: + id: 60d0fe4f5311236168a109ca + response: + body: + status: true + data: Workflow updated successfully + /organization: + get: + x-fern-examples: + - response: + body: + status: true + data: + _id: 60d21b4667d0d8992e610c85 + name: Acme Corporation + members: + - _id: 60d21b4967d0d8992e610c86 + userEmail: jane.doe@acmecorp.com + - _id: 60d21b4f67d0d8992e610c87 + userEmail: john.smith@acmecorp.com + subscription: + planId: premium-annual-2024 + /agent/template: + get: + x-fern-examples: + - response: + body: + status: true + data: + - id: a1b2c3d4-e5f6-7890-ab12-cd34ef567890 + name: Customer Support Agent + description: Template for handling customer support inquiries with polite and helpful responses. + avatarUrl: https://example.com/avatars/support-agent.png + referenceUrl: https://docs.smallest.ai/agent-templates/customer-support + category: Support + /audience/60d0fe4f5311236168a109ca/members: + delete: + x-fern-examples: + - path-parameters: + id: 60d0fe4f5311236168a109ca + request: + memberIds: + - 60d0fe4f5311236168a109cd + - 60d0fe4f5311236168a109ce + - 60d0fe4f5311236168a109cf + response: + body: + status: true + data: + deletedCount: 3 + /knowledgebase/id/items/upload-media: + post: + x-fern-examples: + - path-parameters: + id: id + request: + media: QmFzZTY0RW5jb2RlZFBkZkZpbGVEYXRh + response: + body: + status: true + /webhook/id: + delete: + x-fern-examples: + - path-parameters: + id: a3f47b9e-8c2d-4f1a-9b7e-2d5f6c8a1e4b + response: + body: + status: true + /webhook: + get: + x-fern-examples: + - response: + body: + status: true + data: + _id: 642f1c9e8a4b2f0012345678 + url: https://hooks.example.com/webhook/receive + description: Order processing webhook for e-commerce platform + status: enabled + organizationId: org_9a8b7c6d5e4f3210 + createdBy: user_1234567890abcdef + subscriptions: + - _id: sub_abcdef1234567890 + webhookId: 642f1c9e8a4b2f0012345678 + agentId: + _id: agent_0987654321fedcba + name: Order Fulfillment Agent + description: Handles order fulfillment events + eventType: pre-conversation + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-01-15T09:30:00Z' + decryptedSecretKey: s3cr3tK3yForSigning123! + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-01-15T09:30:00Z' + /user: + get: + x-fern-examples: + - response: + body: + status: true + data: + _id: 642f1c3e9b1e8a001f4d2a7b + firstName: Emily + lastName: Johnson + userEmail: emily.johnson@example.com + authProvider: google + isEmailVerified: true + organizationId: org_9f8b7c6d5e4a3b2c1d0e + /agent/id: + delete: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + patch: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + data: 60d0fe4f5311236168a109ca + get: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + data: + _id: a1b2c3d4e5f6789012345678 + name: Customer Support Agent + description: Handles customer inquiries and support requests + backgroundSound: call_center + organization: org_987654321 + workflowId: wf_123456789 + workflowType: workflow_graph + createdBy: user_456789123 + globalKnowledgeBaseId: kb_1122334455 + language: + enabled: en + switching: + isEnabled: true + minWordsForDetection: 3 + strongSignalThreshold: 0.85 + weakSignalThreshold: 0.5 + minConsecutiveForWeakThresholdSwitch: 2 + supported: + - en + - hi + - ta + synthesizer: + voiceConfig: + model: waves_lightning_large + voiceId: nyah + gender: female + speed: 1.2 + consistency: 0.5 + similarity: 0 + enhancement: 1 + slmModel: electron + defaultVariables: + timezone: UTC + greetingMessage: Hello! How can I assist you today? + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-01-15T09:30:00Z' + /agent/from-template: + post: + x-fern-examples: + - request: + agentName: Customer Support Bot + templateId: template-12345abcde + response: + body: + status: true + data: 60d0fe4f5311236168a109ca + /campaign/id/start: + post: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + /knowledgebase/knowledgeBaseId/items/knowledgeBaseItemId: + delete: + x-fern-examples: + - path-parameters: + knowledgeBaseId: knowledgeBaseId + knowledgeBaseItemId: knowledgeBaseItemId + response: + body: + status: true + /knowledgebase/id/items: + get: + x-fern-examples: + - path-parameters: + id: id + response: + body: + status: true + data: + - _id: 64b8f9a2e4b0c123456789ab + itemType: file + metadata: + author: Jane Doe + description: User guide for the new software release + knowledgeBaseId: kb_9876543210abcdef + processingStatus: completed + fileName: User_Guide_v2.pdf + contentType: application/pdf + size: 245760 + key: files/kb_9876543210abcdef/User_Guide_v2.pdf + title: Software Release User Guide + content: '' + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-01-20T14:45:00Z' + /audience/60d0fe4f5311236168a109ca: + delete: + x-fern-examples: + - path-parameters: + id: 60d0fe4f5311236168a109ca + response: + body: + status: true + data: [] + /agent: + post: + x-fern-examples: + - request: + name: Customer Support Agent + response: + body: + status: true + data: 60d0fe4f5311236168a109ca + get: + x-fern-examples: + - response: + body: + status: true + data: + agents: + - _id: 64b8f9a2e4b0c123456789ab + name: Customer Support Bot + description: Handles customer queries and support tickets + backgroundSound: call_center + organization: org_987654321 + workflowId: wf_123abc456def + workflowType: workflow_graph + createdBy: user_123456789 + globalKnowledgeBaseId: kb_456def789abc + language: + enabled: en + switching: + isEnabled: true + minWordsForDetection: 3 + strongSignalThreshold: 0.85 + weakSignalThreshold: 0.5 + minConsecutiveForWeakThresholdSwitch: 2 + supported: + - en + - hi + synthesizer: + voiceConfig: + model: waves_lightning_large + voiceId: nyah + gender: female + speed: 1.2 + consistency: 0.5 + similarity: 0 + enhancement: 1 + slmModel: electron + defaultVariables: + greeting: Hello! How can I assist you today? + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-04-10T14:45:00Z' + total: 1 + /conversation: + get: + x-fern-examples: + - query-parameters: + page: 2 + limit: 10 + agentIds: 60d0fe4f5311236168a109ca,60d0fe4f5311236168a109cb + campaignIds: 70d1fe4f5311236168a209ca,70d1fe4f5311236168a209cb + callTypes: telephony_outbound,telephony_inbound + search: '+15551234567' + statusFilter: completed,failed + disconnectReasonFilter: user_hangup,agent_hangup + callAttemptFilter: initial + durationFilter: 0-30,30-60 + response: + body: + status: true + data: + logs: + - _id: 60d0fe4f5311236168a109ca + callId: CALL-1737000000123-xyz789 + status: completed + duration: 185 + from: '+15551234567' + to: '+15559876543' + type: telephony_outbound + agentId: 60d0fe4f5311236168a109ca + agentName: John Doe + recordingUrl: https://storage.atoms.ai/recordings/call-xyz789.mp3 + recordingDualUrl: https://storage.atoms.ai/recordings/call-xyz789-dual.mp3 + disconnectionReason: user_hangup + retryCount: 1 + createdAt: '2025-01-15T10:30:00.000Z' + dispositionMetrics: + interested: 'yes' + follow_up_needed: 'no' + agentDispositionConfig: + - identifier: interested + type: boolean + - identifier: follow_up_needed + type: boolean + pagination: + total: 150 + page: 2 + limit: 10 + hasMore: true + totalPages: 15 + dispositionMetricsConfig: + - identifier: interested + type: boolean + - identifier: follow_up_needed + type: boolean + /conversation/CALL-1737000000000-abc123: + get: + x-fern-examples: + - path-parameters: + id: CALL-1737000000000-abc123 + response: + body: + status: true + data: + _id: CALL-1737000000000-abc123 + callId: CALL-1737000000000-abc123 + agent: + _id: agent789 + name: Support Agent Alpha + description: Handles inbound customer support calls + backgroundSound: call_center + organization: org456 + workflowId: wf123 + workflowType: workflow_graph + createdBy: user321 + globalKnowledgeBaseId: kb987 + language: + enabled: en + switching: + isEnabled: true + minWordsForDetection: 3 + strongSignalThreshold: 0.85 + weakSignalThreshold: 0.5 + minConsecutiveForWeakThresholdSwitch: 2 + supported: + - en + - hi + synthesizer: + voiceConfig: + model: waves_lightning_large + voiceId: nyah + gender: female + speed: 1.2 + consistency: 0.5 + similarity: 0 + enhancement: 1 + slmModel: electron + defaultVariables: {} + createdAt: '2024-01-15T09:30:00Z' + updatedAt: '2024-01-15T09:30:00Z' + status: completed + duration: 320 + recordingUrl: https://recordings.example.com/calls/CALL-1737000000000-abc123.mp3 + from: '+14155552671' + to: '+14155559876' + transcript: + - Hello, thank you for calling Support. How can I assist you today? + - Hi, I'm having trouble accessing my account. + - I can help you with that. Can you please provide your account number? + - Sure, it's 123456789. + - Thank you. Let me check your account details. + average_transcriber_latency: 0.75 + average_agent_latency: 1.2 + average_synthesizer_latency: 0.9 + type: telephony_inbound + voiceConfigUsed: + model: waves_lightning_large + voiceId: nyah + gender: female + slmModelUsed: electron diff --git a/fern/apis/atoms/openapi/openapi-overrides.yaml b/fern/apis/atoms/openapi/openapi-overrides.yaml new file mode 100644 index 0000000..ca3207a --- /dev/null +++ b/fern/apis/atoms/openapi/openapi-overrides.yaml @@ -0,0 +1,15 @@ +# Override file for atoms openapi.yaml +# Ignores the "*" parameter which causes TypeScript code generation issues +# The "*" parameter was meant to document dynamic query params but generates invalid TS code + +paths: + /audience/{id}/members/search: + get: + parameters: + - name: id + in: path + - name: query + in: query + - name: "*" + in: query + x-fern-ignore: true diff --git a/fern/apis/atoms/openapi/openapi.yaml b/fern/apis/atoms/openapi/openapi.yaml index ab5a6df..035538c 100644 --- a/fern/apis/atoms/openapi/openapi.yaml +++ b/fern/apis/atoms/openapi/openapi.yaml @@ -207,8 +207,8 @@ paths: name: offset schema: type: integer - default: 5 - description: Number of items to return + default: 10 + description: Number of items to return per page - in: query name: search schema: @@ -340,8 +340,8 @@ paths: properties: enabled: type: string - enum: [en, hi, ta, kn] - description: The language of the agent. You can choose from the list of supported languages. + enum: [en, hi, ta] + description: "The language of the agent. Supported: 'en' (English), 'hi' (Hindi), 'ta' (Tamil)." default: en switching: type: object @@ -377,7 +377,7 @@ paths: default: 2 synthesizer: type: object - description: Synthesizer configuration for the agent. You can configure the synthesizer to use different voices and models. Currently we support 3 types of models for the synthesizer. Waves, Waves Lightning Large and Waves Lightning Large Voice Clone. You can clone your voice using waves platform https://waves.smallest.ai/voice-clone and use the voiceId for this field and select the model as waves_lightning_large_voice_clone to use your cloned voice. When updating the synthesizer configuration to voice clone model, you have to provide model and voiceId and gender all are required fields but when selecting the model as waves or waves and waves_lightning_large, you have to provide only model field and voiceId. + description: Synthesizer configuration for the agent. You can configure the synthesizer to use different voices and models. Currently we support 3 types of models for the synthesizer. Waves, Waves Lightning Large and Waves Lightning Large Voice Clone. You can clone your voice using waves platform https://app.smallest.ai/waves/voice-clone and use the voiceId for this field and select the model as waves_lightning_large_voice_clone to use your cloned voice. When updating the synthesizer configuration to voice clone model, you have to provide model and voiceId and gender all are required fields but when selecting the model as waves or waves and waves_lightning_large, you have to provide only model field and voiceId. properties: voiceConfig: oneOf: @@ -635,9 +635,293 @@ paths: "500": $ref: "#/components/responses/InternalServerErrorResponse" + /conversation: + get: + summary: Get all conversation logs + description: Retrieve paginated conversation logs with support for various filters. Returns call logs for agents belonging to the authenticated user's organization. + tags: + - Logs + security: + - BearerAuth: [] + parameters: + - in: query + name: page + schema: + type: integer + default: 1 + minimum: 1 + description: Page number for pagination + example: 1 + - in: query + name: limit + schema: + type: integer + default: 5 + minimum: 1 + description: Number of items per page + example: 10 + - in: query + name: agentIds + schema: + type: string + description: Comma-separated list of agent IDs to filter by + example: "60d0fe4f5311236168a109ca,60d0fe4f5311236168a109cb" + - in: query + name: campaignIds + schema: + type: string + description: Comma-separated list of campaign IDs to filter by + example: "60d0fe4f5311236168a109ca,60d0fe4f5311236168a109cb" + - in: query + name: callTypes + schema: + type: string + enum: + - telephony_inbound + - telephony_outbound + - chat + description: Comma-separated list of call types to filter by + example: "telephony_outbound,telephony_inbound" + - in: query + name: search + schema: + type: string + description: Search query to filter by callId, fromNumber, or toNumber + example: "+1234567890" + - in: query + name: statusFilter + schema: + type: string + description: | + Comma-separated list of call statuses to filter by. + Available statuses: pending, in_progress, completed, failed, no_answer, cancelled, busy + example: "completed,failed" + - in: query + name: disconnectReasonFilter + schema: + type: string + description: | + Comma-separated list of disconnect reasons to filter by. + Available reasons: user_hangup, agent_hangup, connection_error, timeout, system_error, transfer_complete + example: "user_hangup,agent_hangup" + - in: query + name: callAttemptFilter + schema: + type: string + description: | + Comma-separated list of call attempt types to filter by. + Available filters: initial (first attempt calls), retry (retry attempt calls), all (all calls) + example: "initial" + - in: query + name: durationFilter + schema: + type: string + description: | + Comma-separated list of duration ranges to filter by. + Available ranges: 0-30 (0-30 seconds), 30-60 (30-60 seconds), 1-5 (1-5 minutes), 5+ (more than 5 minutes) + example: "0-30,30-60" + responses: + "200": + description: Successful response + content: + application/json: + schema: + type: object + properties: + status: + type: boolean + example: true + data: + type: object + properties: + logs: + type: array + items: + type: object + properties: + _id: + type: string + description: The database ID of the call log + example: "60d0fe4f5311236168a109ca" + callId: + type: string + description: The unique call identifier + example: "CALL-1737000000000-abc123" + status: + type: string + description: The status of the call + enum: [pending, in_progress, completed, failed, no_answer, cancelled, busy] + example: "completed" + duration: + type: number + description: The duration of the call in seconds + example: 120 + from: + type: string + description: The phone number the call was made from + example: "+15551234567" + to: + type: string + description: The phone number the call was made to + example: "+15559876543" + type: + type: string + description: The type of call + enum: [telephony_inbound, telephony_outbound, chat] + example: "telephony_outbound" + agentId: + type: string + description: The ID of the agent that handled the call + example: "60d0fe4f5311236168a109ca" + agentName: + type: string + description: The name of the agent + example: "Sales Agent" + recordingUrl: + type: string + description: URL to the call recording (if available) + example: "https://storage.example.com/recordings/call-123.mp3" + recordingDualUrl: + type: string + description: URL to the dual-channel call recording (if available) + example: "https://storage.example.com/recordings/call-123-dual.mp3" + disconnectionReason: + type: string + description: The reason the call was disconnected + example: "user_hangup" + retryCount: + type: integer + description: Number of retry attempts for this call + example: 0 + createdAt: + type: string + format: date-time + description: When the call was created + example: "2025-01-15T10:30:00.000Z" + dispositionMetrics: + type: object + description: Custom disposition metrics for the call + additionalProperties: + type: string + example: + interested: "yes" + follow_up_needed: "no" + agentDispositionConfig: + type: array + description: Configuration for disposition metrics + items: + type: object + properties: + identifier: + type: string + type: + type: string + pagination: + type: object + properties: + total: + type: integer + description: Total number of matching call logs + example: 150 + page: + type: integer + description: Current page number + example: 1 + limit: + type: integer + description: Number of items per page (page size) + example: 10 + hasMore: + type: boolean + description: Whether there are more pages available + example: true + totalPages: + type: integer + description: Total number of pages + example: 15 + dispositionMetricsConfig: + type: array + description: Global disposition metrics configuration + items: + type: object + properties: + identifier: + type: string + type: + type: string + "400": + $ref: "#/components/responses/BadRequestError" + "401": + $ref: "#/components/responses/UnauthorizedErrorResponse" + "500": + $ref: "#/components/responses/InternalServerErrorResponse" + + /conversation/search: + post: + summary: Search conversation logs by call IDs + description: | + Fetch specific conversation logs by their callIds. This endpoint allows you to retrieve up to 100 specific calls at once. + Only returns calls that belong to agents in your organization (security check enforced). + Unlike the GET /conversation endpoint, this endpoint can also return retry calls (non-root calls). + tags: + - Logs + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - callIds + properties: + callIds: + type: array + items: + type: string + minItems: 1 + maxItems: 100 + description: Array of callIds to fetch + example: ["CALL-1737000000000-abc123", "CALL-1737000000001-def456"] + responses: + "200": + description: Successful response + content: + application/json: + schema: + type: object + properties: + status: + type: boolean + example: true + data: + type: object + properties: + logs: + type: array + items: + type: object + description: Call log details (same structure as GET /conversation) + total: + type: integer + description: Number of logs returned + example: 2 + requestedCount: + type: integer + description: Number of callIds requested + example: 3 + "400": + $ref: "#/components/responses/BadRequestError" + "401": + $ref: "#/components/responses/UnauthorizedErrorResponse" + "500": + $ref: "#/components/responses/InternalServerErrorResponse" + /conversation/{id}: get: - summary: Get conversation logs + summary: Get conversation log by ID + description: Retrieve detailed information about a specific conversation including transcript, events, and latency metrics. tags: - Logs security: @@ -646,9 +930,10 @@ paths: - in: path name: id required: true - description: The callId of the conversation. You can get the callId from the conversation logs. + description: The callId of the conversation. You can get the callId from the conversation logs endpoint. schema: type: string + example: "CALL-1737000000000-abc123" responses: "200": description: Successful response @@ -791,6 +1076,128 @@ paths: "500": $ref: "#/components/responses/InternalServerErrorResponse" + /events: + get: + summary: Subscribe to live call events (SSE) + description: | + Real-time streaming of user speech (STT) and agent speech (TTS) events for an active call via Server-Sent Events. + + The connection is real-time — events stream directly from the call runtime as they are produced. The SSE connection auto-closes when the call ends (`sse_close` event). Only active calls can be subscribed to; completed calls return a 400 error. + + **Transcript event types:** + + - `user_interim_transcription` — Partial, in-progress transcription as the user speaks. Use for live preview only; will be superseded by `user_transcription`. + - `user_transcription` — Final transcription for a completed user speech turn. + - `tts_completed` — Fired when the agent finishes speaking a TTS segment. Includes the spoken text and optionally TTS latency. + + **Lifecycle events:** + + - `sse_init` — Sent immediately when the SSE connection is established. + - `sse_close` — Sent when the call ends, right before the server closes the connection. + + Other event types (e.g. `call_start`, `call_end`, `turn_latency`, metrics) are also sent on this stream. + tags: + - Calls + security: + - BearerAuth: [] + parameters: + - name: callId + in: query + required: true + description: The call ID to subscribe events for + schema: + type: string + example: "CALL-1758124225863-80752e" + responses: + "200": + description: SSE event stream established successfully + content: + text/event-stream: + schema: + type: object + description: | + Events are sent as `data: \n\n`. Each event has an `event_type` field. + properties: + event_type: + type: string + description: The type of event + enum: + - sse_init + - user_interim_transcription + - user_transcription + - tts_completed + - sse_close + event_id: + type: string + description: Unique identifier for the event + timestamp: + type: string + format: date-time + description: ISO 8601 timestamp of the event + call_id: + type: string + description: The call ID this event belongs to + interim_transcription_text: + type: string + description: Partial transcription text (only for `user_interim_transcription`) + user_transcription_text: + type: string + description: Final transcription text (only for `user_transcription`) + tts_text: + type: string + description: Text spoken by the agent (only for `tts_completed`) + tts_latency: + type: integer + description: TTS latency in milliseconds (only for `tts_completed`) + examples: + sse_init: + summary: SSE connection initialized + value: + event_type: sse_init + event_time: "2026-03-02T10:00:00.000Z" + user_interim_transcription: + summary: Partial user speech + value: + event_type: user_interim_transcription + event_id: evt_abc123 + timestamp: "2026-03-02T10:00:01.123Z" + call_id: "CALL-1758124225863-80752e" + interim_transcription_text: "I wanted to ask about my" + user_transcription: + summary: Final user speech + value: + event_type: user_transcription + event_id: evt_abc456 + timestamp: "2026-03-02T10:00:02.456Z" + call_id: "CALL-1758124225863-80752e" + user_transcription_text: "I wanted to ask about my recent order" + tts_completed: + summary: Agent finished speaking + value: + event_type: tts_completed + event_id: evt_abc789 + timestamp: "2026-03-02T10:00:03.789Z" + call_id: "CALL-1758124225863-80752e" + tts_latency: 245 + tts_text: "Sure, I can help you with your recent order. Could you provide your order number?" + sse_close: + summary: Call ended + value: + event_type: sse_close + event_time: "2026-03-02T10:05:00.000Z" + "400": + description: Missing or invalid callId, or call is already completed + content: + application/json: + schema: + $ref: "#/components/schemas/BadRequestErrorResponse" + "404": + description: Not authorized (org mismatch) or call/agent not found + content: + application/json: + schema: + $ref: "#/components/schemas/ApiResponse" + /campaign: get: summary: Retrieve all campaigns @@ -1495,6 +1902,92 @@ paths: "500": $ref: "#/components/responses/InternalServerErrorResponse" + /product/import-phone-number: + post: + summary: Import a SIP phone number + description: | + Bring your own SIP trunk by importing an existing phone number with its SIP termination URL. + Atoms creates both inbound and outbound SIP trunks so your number works for making and receiving calls through the platform. + tags: + - Phone Numbers + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - phoneNumber + - sipTerminationUrl + properties: + phoneNumber: + type: string + description: Your existing phone number in E.164 format + example: "+14155551234" + sipTerminationUrl: + type: string + description: The SIP URI where calls should be routed to your infrastructure + example: "sip:trunk.your-provider.com" + name: + type: string + description: A friendly display name for this number + example: "Main Support Line" + sipUsername: + type: string + description: Username for SIP authentication (if your trunk requires it) + example: "my-sip-user" + sipPassword: + type: string + description: Password for SIP authentication (if your trunk requires it) + example: "my-sip-password" + example: + phoneNumber: "+14155551234" + sipTerminationUrl: "sip:trunk.your-provider.com" + name: "Main Support Line" + sipUsername: "" + sipPassword: "" + responses: + "200": + description: Phone number imported successfully + content: + application/json: + schema: + type: object + properties: + status: + type: boolean + example: true + data: + type: object + properties: + phoneNumber: + type: string + description: The imported phone number + example: "+14155551234" + isActive: + type: boolean + description: Whether the number is active and ready to use + example: true + "400": + description: Bad request — missing required fields or phone number already exists + content: + application/json: + schema: + type: object + properties: + status: + type: boolean + example: false + error: + type: string + example: "Phone number is required" + "401": + $ref: "#/components/responses/UnauthorizedErrorResponse" + "500": + $ref: "#/components/responses/InternalServerErrorResponse" + /webhook: get: summary: Get webhooks @@ -2203,7 +2696,7 @@ paths: minimum: 1 default: 1 example: 1 - - name: offset + - name: limit in: query required: false description: Number of items per page (default is 5) @@ -2709,9 +3202,10 @@ components: description: type: string backgroundSound: - type: boolean - default: false - description: Whether to add ambient background sound during calls. Currently provides office ambience by default. Additional sound options available upon request. + type: string + enum: ["", "office", "cafe", "call_center", "static"] + default: "" + description: "Ambient background sound during calls. Options: '' (none), 'office', 'cafe', 'call_center', 'static'." # visibleToEveryone: # type: boolean # default: false @@ -2721,8 +3215,8 @@ components: properties: enabled: type: string - enum: [en, hi, ta, kn] - description: The language of the agent. You can choose from the list of supported languages. + enum: [en, hi, ta] + description: "The language of the agent. Supported: 'en' (English), 'hi' (Hindi), 'ta' (Tamil)." default: en switching: type: object @@ -2758,7 +3252,7 @@ components: default: 2 synthesizer: type: object - description: Synthesizer configuration for the agent. You can configure the synthesizer to use different voices and models. Currently we support 3 types of models for the synthesizer. Waves, Waves Lightning Large and Waves Lightning Large Voice Clone. You can clone your voice using waves platform https://waves.smallest.ai/voice-clone and use the voiceId for this field and select the model as waves_lightning_large_voice_clone to use your cloned voice. When updating the synthesizer configuration to voice clone model, you have to provide model and voiceId and gender all are required fields but when selecting the model as waves or waves and waves_lightning_large, you have to provide only model field and voiceId. + description: Synthesizer configuration for the agent. You can configure the synthesizer to use different voices and models. Currently we support 3 types of models for the synthesizer. Waves, Waves Lightning Large and Waves Lightning Large Voice Clone. You can clone your voice using waves platform https://app.smallest.ai/waves/voice-clone and use the voiceId for this field and select the model as waves_lightning_large_voice_clone to use your cloned voice. When updating the synthesizer configuration to voice clone model, you have to provide model and voiceId and gender all are required fields but when selecting the model as waves or waves and waves_lightning_large, you have to provide only model field and voiceId. properties: voiceConfig: oneOf: @@ -2846,8 +3340,9 @@ components: type: string description: The description of the agent backgroundSound: - type: boolean - description: Whether ambient background sound is enabled during calls + type: string + enum: ["", "office", "cafe", "call_center", "static"] + description: "Ambient background sound during calls. Options: '' (none), 'office', 'cafe', 'call_center', 'static'." organization: type: string description: The organization ID of the agent @@ -2871,7 +3366,7 @@ components: enabled: type: string description: The language of the agent - enum: [en, hi, ta, kn] + enum: [en, hi, ta] switching: type: object description: Language switching configuration for the agent @@ -3450,15 +3945,16 @@ tags: description: Operations related to agents - name: Workflows description: Operations related to workflow configuration and management - - name: Call Logs - description: Operations related to agent call logs + - name: Logs + description: Operations related to conversation logs, call history, and recordings. Supports filtering by agents, campaigns, call IDs, status, duration, and more. + - name: Calls + description: Operations related to initiating and managing calls, call logs, and live transcripts (SSE). - name: Phone Numbers description: Operations related to phone numbers - name: Webhooks description: Operations related to webhooks and webhook subscriptions - name: Audience description: Operations related to audience management and CSV uploads - servers: - - url: https://atoms-api.smallest.ai/api/v1 + - url: https://api.smallest.ai/atoms/v1 description: Production server diff --git a/fern/apis/unified/generators.yml b/fern/apis/unified/generators.yml index 59a8bb5..04e248a 100644 --- a/fern/apis/unified/generators.yml +++ b/fern/apis/unified/generators.yml @@ -1,26 +1,55 @@ # yaml-language-server: $schema=https://schema.buildwithfern.dev/generators-yml.json # Unified SDK configuration that combines both atoms and waves APIs as sub-resources +replay: + enabled: true + +auth-schemes: + bearer-auth: + scheme: bearer + token: + env: SMALLEST_AI_TOKEN api: + auth: bearer-auth + default-environment: Production + default-url: atoms + environments: + Production: + urls: + atoms: https://api.smallest.ai/atoms/v1 + waves: https://api.smallest.ai + waves-ws: wss://api.smallest.ai specs: # Atoms API - Agent Management - namespace: atoms openapi: ../atoms/openapi/openapi.yaml + overrides: ../atoms/openapi/openapi-overrides.yaml # Waves API - Speech/TTS endpoints - namespace: waves openapi: ../waves/openapi/waves-api.yaml + overrides: ../waves/openapi/waves-api-overrides.yaml - namespace: waves openapi: ../waves/openapi/get-voices-openapi.yaml + overrides: ../waves/openapi/get-voices-openapi-overrides.yaml - namespace: waves openapi: ../waves/openapi/add-voice-openapi.yaml + overrides: ../waves/openapi/add-voice-openapi-overrides.yaml - namespace: waves openapi: ../waves/openapi/get-cloned-voices-openapi.yaml + overrides: ../waves/openapi/get-cloned-voices-openapi-overrides.yaml - namespace: waves openapi: ../waves/openapi/delete-cloned-voice-openapi.yaml + overrides: ../waves/openapi/delete-cloned-voice-openapi-overrides.yaml - namespace: waves openapi: ../waves/openapi/asr-openapi.yaml overrides: ../waves/openapi/asr-openapi-overrides.yaml + - namespace: waves + openapi: ../waves/openapi/lightning-v3.1-openapi.yaml + overrides: ../waves/openapi/lightning-v3.1-openapi-overrides.yaml + - namespace: waves + openapi: ../waves/openapi/pulse-stt-openapi.yaml + overrides: ../waves/openapi/pulse-stt-openapi-overrides.yaml # Waves AsyncAPI specs - WebSocket streaming - namespace: waves @@ -35,41 +64,74 @@ api: - namespace: waves asyncapi: ../waves/asyncapi/lightning-v2-ws.yaml overrides: ../waves/asyncapi/lightning-v2-ws-overrides.yml + - namespace: waves + asyncapi: ../waves/asyncapi/lightning-v3.1-ws.yaml + overrides: ../waves/asyncapi/lightning-v3.1-ws-overrides.yml + - namespace: waves + asyncapi: ../waves/asyncapi/pulse-stt-ws.yaml + overrides: ../waves/asyncapi/pulse-stt-ws-overrides.yml groups: python-sdk: generators: - name: fernapi/fern-python-sdk - version: 4.25.6 + version: 4.61.3 output: location: pypi - package-name: smallest-ai + package-name: smallestai github: repository: fern-demo/smallest-ai-python-sdk + mode: pull-request config: + package_name: smallestai + enable_wire_tests: true client: class_name: SmallestAI filename: client.py pydantic_config: skip_validation: true exclude_types_from_init_exports: true + should_generate_websocket_clients: true + extra_dependencies: + fastapi: ">=0.115.0" + uvicorn: ">=0.32.0" + loguru: ">=0.7.0" + openai: ">=2.0.0" + python-dotenv: ">=1.0.0" + mypy_exclude: + - src/smallestai/atoms/agent + - src/smallestai/atoms/helpers + - src/smallestai/waves/text_to_speech/client.py + - src/smallestai/waves/text_to_speech/raw_client.py + additional_init_exports: + - from: atoms.client + imports: + - AtomsClient + - AsyncAtomsClient + - from: waves.client + imports: + - WavesClient + - AsyncWavesClient ts-sdk: generators: - name: fernapi/fern-typescript-node-sdk - version: 2.6.3 + version: 3.53.3 output: location: npm - package-name: smallest-ai + package-name: smallestai github: repository: fern-demo/smallest-ai-ts-sdk config: namespaceExport: SmallestAI + packageManager: yarn + testFramework: jest + generateSubpackageExports: true go-sdk: generators: - name: fernapi/fern-go-sdk - version: 1.1.0 + version: 1.26.1 github: repository: fern-demo/smallest-ai-go-sdk config: diff --git a/fern/apis/waves-v4/generators.yml b/fern/apis/waves-v4/generators.yml new file mode 100644 index 0000000..c0605a5 --- /dev/null +++ b/fern/apis/waves-v4/generators.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://schema.buildwithfern.dev/generators-yml.json +# V4-only API definition for docs. No SDK generator groups - does not affect SDK generation. +api: + default-environment: Production + default-url: waves + auth: BearerAuth + auth-schemes: + BearerAuth: + header: Authorization + prefix: Bearer + environments: + Production: + urls: + waves: https://api.smallest.ai + specs: + # OpenAPI specs - REST endpoints + - openapi: ../waves/openapi/waves-api.yaml + overrides: overrides/waves-api-overrides.yaml + - openapi: ../waves/openapi/lightning-v3.1-openapi.yaml + overrides: overrides/lightning-v3.1-openapi-overrides.yaml + - openapi: ../waves/openapi/get-voices-openapi.yaml + overrides: overrides/get-voices-openapi-overrides.yaml + - openapi: ../waves/openapi/add-voice-openapi.yaml + overrides: overrides/add-voice-openapi-overrides.yaml + - openapi: ../waves/openapi/get-cloned-voices-openapi.yaml + overrides: overrides/get-cloned-voices-openapi-overrides.yaml + - openapi: ../waves/openapi/delete-cloned-voice-openapi.yaml + overrides: overrides/delete-cloned-voice-openapi-overrides.yaml + - openapi: ../waves/openapi/pulse-stt-openapi.yaml + overrides: overrides/pulse-stt-openapi-overrides.yaml + + # AsyncAPI 3.0.0 specs - WebSocket streaming (docs-only, uses waves/asyncapi with v4 overrides) + - asyncapi: ../waves/asyncapi/lightning-v2-ws.yaml + overrides: overrides/lightning-v2-ws-overrides.yml + - asyncapi: ../waves/asyncapi/lightning-v3.1-ws.yaml + overrides: overrides/lightning-v3.1-ws-overrides.yml + - asyncapi: ../waves/asyncapi/pulse-stt-ws.yaml + overrides: overrides/pulse-stt-ws-overrides.yml diff --git a/fern/apis/waves-v4/overrides/add-voice-openapi-overrides.yaml b/fern/apis/waves-v4/overrides/add-voice-openapi-overrides.yaml new file mode 100644 index 0000000..8b02791 --- /dev/null +++ b/fern/apis/waves-v4/overrides/add-voice-openapi-overrides.yaml @@ -0,0 +1,8 @@ +paths: + /waves/v1/lightning-large/add_voice: + post: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + summary: Add your Voice + description: Add your voice using the Waves API. \ No newline at end of file diff --git a/fern/apis/waves-v4/overrides/delete-cloned-voice-openapi-overrides.yaml b/fern/apis/waves-v4/overrides/delete-cloned-voice-openapi-overrides.yaml new file mode 100644 index 0000000..934124b --- /dev/null +++ b/fern/apis/waves-v4/overrides/delete-cloned-voice-openapi-overrides.yaml @@ -0,0 +1,8 @@ +paths: + /waves/v1/lightning-large: + delete: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + summary: Delete Cloned Voice + description: Delete a cloned voice using the new Waves API. diff --git a/fern/apis/waves-v4/overrides/get-cloned-voices-openapi-overrides.yaml b/fern/apis/waves-v4/overrides/get-cloned-voices-openapi-overrides.yaml new file mode 100644 index 0000000..83b2937 --- /dev/null +++ b/fern/apis/waves-v4/overrides/get-cloned-voices-openapi-overrides.yaml @@ -0,0 +1,8 @@ +paths: + /waves/v1/lightning-large/get_cloned_voices: + get: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + summary: Get your cloned Voices + description: Retrieve your cloned voices. diff --git a/fern/apis/waves-v4/overrides/get-voices-openapi-overrides.yaml b/fern/apis/waves-v4/overrides/get-voices-openapi-overrides.yaml new file mode 100644 index 0000000..d6d9b4a --- /dev/null +++ b/fern/apis/waves-v4/overrides/get-voices-openapi-overrides.yaml @@ -0,0 +1,17 @@ +paths: + /waves/v1/{model}/get_voices: + get: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + description: Get voices supported for a given model using the new Waves API. + parameters: + - in: path + name: model + schema: + enum: + - lightning + - lightning-large + - lightning-v2 + - lightning-v3.1 + default: lightning-v3.1 diff --git a/fern/apis/waves-v4/overrides/lightning-v2-ws-overrides.yml b/fern/apis/waves-v4/overrides/lightning-v2-ws-overrides.yml new file mode 100644 index 0000000..c44ff74 --- /dev/null +++ b/fern/apis/waves-v4/overrides/lightning-v2-ws-overrides.yml @@ -0,0 +1,28 @@ +# Override file for lightning-v2-ws.yaml (v4 docs) +# Adds server mapping, audience tags, and unique message names + +servers: + production: + x-fern-server-name: waves + +channels: + lightningV2Stream: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + x-fern-sdk-group-name: Text to Speech V2 + title: Lightning v2 WebSocket + description: The Lightning v2 WebSocket API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. Perfect for interactive applications, voice assistants, and real-time communication systems that require immediate audio feedback. For an end-to-end example of how to use the Lightning v2 WebSocket API, check out [Text to Speech (WS) Example](https://github.com/smallest-inc/waves-examples/tree/main/lightning_v2/ws_streaming) + servers: + - $ref: "#/servers/production" + messages: + lightningV2TtsRequest.message: + name: V4LightningV2TtsRequest + lightningV2TtsResponse.message: + name: V4LightningV2TtsResponse + +operations: + LightningV2TtsRequest: + action: send + LightningV2TtsResponse: + action: receive diff --git a/fern/apis/waves-v4/overrides/lightning-v3.1-openapi-overrides.yaml b/fern/apis/waves-v4/overrides/lightning-v3.1-openapi-overrides.yaml new file mode 100644 index 0000000..cbbd31c --- /dev/null +++ b/fern/apis/waves-v4/overrides/lightning-v3.1-openapi-overrides.yaml @@ -0,0 +1,71 @@ +paths: + /waves/v1/lightning-v3.1/get_speech: + post: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + tags: + - Text to Speech + summary: Lightning v3.1 + description: | + Get speech for given text using the Lightning v3.1 model + + ## Overview + + Lightning v3.1 is a 44 kHz text-to-speech model that delivers natural, expressive, and realistic speech synthesis. + + ## Key Features + + * **Voice Cloning Support**: Compatible with cloned voices + * **Ultra-Low Latency**: Optimized for real-time applications + * **Multi-Language**: Supports English (en) and Hindi (hi) + * **Multiple Output Formats**: PCM, MP3, WAV, and mulaw + * **Flexible Sample Rates**: 8000 Hz to 44100 Hz + * **Speed Control**: Adjustable from 0.5x to 2x speed + /waves/v1/lightning-v3.1/stream: + post: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + tags: + - Text to Speech + summary: Lightning v3.1 SSE + description: | + Stream speech for given text using the Lightning v3.1 SSE API + + ## Overview + + The Lightning v3.1 SSE API provides real-time text-to-speech streaming capabilities with natural, expressive voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. + + Lightning v3.1 is a 44 kHz model that produces natural, expressive, and realistic speech, with support for voice cloning. + + ## When to Use + + * **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses + * **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays + * **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency + * **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + + ## How It Works + + 1. **Make a POST Request**: Send your text and voice settings to the API endpoint + 2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size + 3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially + 4. **End of Stream**: The API sends a completion event when all audio has been delivered + +components: + schemas: + LightningV31Request: + properties: + output_format: + enum: + - mp3 + - pcm + - wav + - ulaw + - alaw + sample_rate: + enum: + - 8000 + - 16000 + - 24000 diff --git a/fern/apis/waves-v4/overrides/lightning-v3.1-ws-overrides.yml b/fern/apis/waves-v4/overrides/lightning-v3.1-ws-overrides.yml new file mode 100644 index 0000000..abd520d --- /dev/null +++ b/fern/apis/waves-v4/overrides/lightning-v3.1-ws-overrides.yml @@ -0,0 +1,79 @@ +# Override file for lightning-v3.1-ws.yaml (v4 docs) +# Adds server mapping, audience tags, and unique message names + +servers: + production: + x-fern-server-name: waves + +channels: + lightningV31Stream: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + x-fern-sdk-group-name: Text to Speech V3.1 + title: Lightning v3.1 WebSocket + description: | + # Lightning v3.1 WebSocket + + The Lightning v3.1 WebSocket API provides real-time text-to-speech streaming capabilities with natural, expressive voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. + + ## Overview + + The Lightning v3.1 WebSocket API delivers state-of-the-art text-to-speech synthesis through a persistent WebSocket connection, providing ultra-low latency audio streaming for applications that demand real-time voice generation. Unlike traditional HTTP-based TTS APIs, this WebSocket implementation streams audio chunks as they're processed, significantly reducing perceived latency and enabling seamless user experiences. + + Lightning v3.1 is a 44 kHz model that produces natural, expressive, and realistic speech, with support for voice cloning. + + ## Key Benefits + + * **Ultra-Low Latency**: Audio chunks delivered as soon as they're generated + * **Real-Time Streaming**: Continuous audio delivery without waiting for complete text processing + * **Natural Speech**: Expressive synthesis that sounds realistic + * **Voice Cloning Support**: Compatible with cloned voices + * **Persistent Connection**: Maintains connection for multiple requests, reducing connection overhead + * **Interactive Applications**: Perfect for chatbots, voice assistants, and live communication systems + + ## Use Cases + + * **Voice Assistants**: Real-time response generation for conversational AI + * **Interactive Chatbots**: Immediate audio feedback for user interactions + * **Live Streaming**: Real-time narration and commentary + * **Accessibility Tools**: Screen readers and text-to-speech applications + * **Gaming**: Dynamic voice generation for characters and narration + * **Customer Service**: Automated voice responses with natural speech patterns + + ## Concurrency and Rate Limits + + This WebSocket API is subject to concurrency limits to ensure optimal performance for all users. Here's how it works: + + * **1 Concurrency Unit** = 1 active TTS request that can be processed at any given time + * **5 WebSocket Connections** can be established per concurrency unit + * **Total Connections** = Your concurrency limit × 5 + + **Examples:** + + * **1 concurrency** = Up to 5 WebSocket connections, but only 1 active request + * **3 concurrency** = Up to 15 WebSocket connections, but only 3 active requests simultaneously + * **5 concurrency** = Up to 25 WebSocket connections, but only 5 active requests simultaneously + + While you can maintain multiple WebSocket connections, only your concurrency limit number of requests can be actively processed at once. Additional requests sent through any connection while at the concurrency limit will be rejected with an error. + + For detailed information about concurrency limits, rate limiting, and best practices for handling these constraints, see our [Concurrency and Limits](/waves/api-reference/api-references/concurrency-and-limits) documentation. + + + When multiple requests are sent simultaneously beyond your concurrency limit, + additional requests will be rejected with an error. Implement proper error + handling and request queuing to manage concurrency effectively. + + servers: + - $ref: "#/servers/production" + messages: + lightningV31TtsRequest.message: + name: V4LightningV31TtsRequest + lightningV31TtsResponse.message: + name: V4LightningV31TtsResponse + +operations: + LightningV31TtsRequest: + action: send + LightningV31TtsResponse: + action: receive diff --git a/fern/apis/waves-v4/overrides/pulse-stt-openapi-overrides.yaml b/fern/apis/waves-v4/overrides/pulse-stt-openapi-overrides.yaml new file mode 100644 index 0000000..db7ba7a --- /dev/null +++ b/fern/apis/waves-v4/overrides/pulse-stt-openapi-overrides.yaml @@ -0,0 +1,38 @@ +paths: + /waves/v1/pulse/get_text: + post: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + x-fern-sdk-group-name: Speech to Text + x-fern-sdk-method-name: pulse + operationId: pulseSpeechToText + tags: + - Speech to Text + summary: Pulse (Pre-Recorded) + description: | + Convert speech to text using file upload with the Pulse STT POST API + + The STT POST API allows you to convert speech to text using two different input methods: + + 1. **Raw Audio Bytes** (`application/octet-stream`) - Send raw audio data with all parameters as query parameters + 2. **Audio URL** (`application/json`) - Provide only a URL to an audio file in the JSON body, with all other parameters as query parameters + + Both methods use our Pulse STT model with automatic language detection across 30+ languages. + responses: + "413": + content: + application/json: + example: + error: + code: "413" + message: "File size exceeds maximum limit of 25MB" + status: error + "429": + content: + application/json: + example: + error: + code: "429" + message: "Rate limit exceeded. Please try again later." + status: error diff --git a/fern/apis/waves-v4/overrides/pulse-stt-ws-overrides.yml b/fern/apis/waves-v4/overrides/pulse-stt-ws-overrides.yml new file mode 100644 index 0000000..9a690ba --- /dev/null +++ b/fern/apis/waves-v4/overrides/pulse-stt-ws-overrides.yml @@ -0,0 +1,153 @@ +# Override file for pulse-stt-ws.yaml (v4 docs) +# Adds server mapping, audience tags, unique message names, and query params as channel parameters +# Query params are in server bindings in the base spec, but Fern only extracts channel parameters + +servers: + production: + x-fern-server-name: waves + +channels: + pulseStream: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + x-fern-sdk-group-name: Speech to Text + title: Pulse (Realtime) + description: | + ## Overview + The Pulse STT WebSocket API provides real-time speech-to-text transcription capabilities with streaming audio input. This API uses WebSocket to deliver transcription results as audio is processed, enabling low-latency transcription without waiting for the entire audio file to upload. Perfect for live transcription, voice assistants, and real-time communication systems that require immediate speech recognition. Supports multiple languages, word-level timestamps, sentence-level timestamps (utterances), PII and PCI redaction, cumulative transcripts, and more advanced features. + servers: + - $ref: "#/servers/production" + parameters: + language: + description: "Language code for transcription. Use 'multi' for automatic language detection" + enum: + - it + - es + - en + - pt + - hi + - de + - fr + - uk + - ru + - kn + - ml + - pl + - mr + - gu + - cs + - sk + - te + - or + - nl + - bn + - lv + - et + - ro + - pa + - fi + - sv + - bg + - ta + - hu + - da + - lt + - mt + - multi + default: en + required: false + location: $message.header#/language + encoding: + description: "Audio encoding format" + enum: + - linear16 + - linear32 + - alaw + - mulaw + - opus + - ogg_opus + default: linear16 + required: false + location: $message.header#/encoding + sample_rate: + description: "Audio sample rate in Hz. Supported values: 8000, 16000, 22050, 24000, 44100, 48000" + default: "16000" + required: false + location: $message.header#/sample_rate + word_timestamps: + description: "Include word-level timestamps in transcription" + enum: + - "true" + - "false" + default: "true" + required: false + location: $message.header#/word_timestamps + full_transcript: + description: "Include cumulative transcript received till now in responses where is_final is true" + enum: + - "true" + - "false" + default: "false" + required: false + location: $message.header#/full_transcript + sentence_timestamps: + description: "Include sentence-level timestamps (utterances) in transcription" + enum: + - "true" + - "false" + default: "false" + required: false + location: $message.header#/sentence_timestamps + redact_pii: + description: "Redact personally identifiable information (name, surname, address, etc)" + enum: + - "true" + - "false" + default: "false" + required: false + location: $message.header#/redact_pii + redact_pci: + description: "Redact payment card information (credit card, CVV, zip, account number, etc)" + enum: + - "true" + - "false" + default: "false" + required: false + location: $message.header#/redact_pci + numerals: + description: "Convert spoken numerals into digit form (e.g., 'twenty five' to '25'). 'auto' enables automatic detection based on context." + enum: + - "true" + - "false" + - auto + default: auto + required: false + location: $message.header#/numerals + diarize: + description: "Enable speaker diarization to identify different speakers in the audio" + enum: + - "true" + - "false" + default: "false" + required: false + location: $message.header#/diarize + keywords: + description: "Comma-separated list of words/phrases to boost, each optionally followed by :INTENSIFIER (e.g. NVIDIA:5,Jensen). Intensifier defaults to 1.0 if omitted. Max 100 keywords per session." + required: false + location: $message.header#/keywords + messages: + audioData.message: + name: V4PulseAudioData + endSignal.message: + name: V4PulseEndSignal + transcriptionResponse.message: + name: V4PulseTranscriptionResponse + +operations: + sendAudioData: + action: send + sendEndSignal: + action: send + receiveTranscription: + action: receive diff --git a/fern/apis/waves-v4/overrides/waves-api-overrides.yaml b/fern/apis/waves-v4/overrides/waves-api-overrides.yaml new file mode 100644 index 0000000..18163c2 --- /dev/null +++ b/fern/apis/waves-v4/overrides/waves-api-overrides.yaml @@ -0,0 +1,107 @@ +paths: + /waves/v1/pronunciation-dicts: + get: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + tags: + - Pronunciation Dictionaries + summary: List + post: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + tags: + - Pronunciation Dictionaries + summary: Create + put: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + tags: + - Pronunciation Dictionaries + summary: Update + delete: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + tags: + - Pronunciation Dictionaries + summary: Delete + /waves/v1/lightning/get_speech: + post: + x-fern-server-name: waves + x-fern-audiences: + - exclude + tags: + - Lightning + /waves/v1/lightning-large/get_speech: + post: + x-fern-server-name: waves + x-fern-audiences: + - exclude + tags: + - Lightning Large + /waves/v1/lightning-large/stream: + post: + x-fern-server-name: waves + x-fern-audiences: + - exclude + tags: + - Lightning Large + /waves/v1/lightning-v2/get_speech: + post: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + tags: + - Text to Speech + summary: Lightning v2 + description: Get speech for given text using the Waves API + requestBody: + content: + application/json: + schema: + properties: + output_format: + enum: + - pcm + - wav + - ulaw + - alaw + /waves/v1/lightning-v2/stream: + post: + x-fern-server-name: waves + x-fern-audiences: + - v4docs + tags: + - Text to Speech + summary: Lightning v2 SSE + description: | + The Lightning v2 SSE API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. + For an end-to-end example of how to use the Lightning v2 SSE API, check out [Text to Speech (SSE) Example](https://github.com/smallest-inc/waves-examples/blob/main/lightning_v2/http_streaming/http_streaming_api.py) + + ## When to Use + + - **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses + - **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays + - **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency + - **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + + ## How It Works + + 1. **Make a POST Request**: Send your text and voice settings to the API endpoint + 2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size + 3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially + 4. **End of Stream**: The API sends a completion event when all audio has been delivered + requestBody: + content: + application/json: + schema: + properties: + output_format: + enum: + - pcm + - wav + - ulaw + - alaw diff --git a/fern/apis/waves/asyncapi/asr-ws-overrides.yml b/fern/apis/waves/asyncapi/asr-ws-overrides.yml index 2c3a4af..d964dbe 100644 --- a/fern/apis/waves/asyncapi/asr-ws-overrides.yml +++ b/fern/apis/waves/asyncapi/asr-ws-overrides.yml @@ -1,36 +1,30 @@ -# Override file for asr-ws.json -# Restructures the spec to avoid $ref to #/channels/ locations which Fern doesn't support +# Override file for asr-ws.yaml +# Adds unique type names to prevent collisions with other AsyncAPI files +# Maps server reference to match generators.yml environment URLs + +servers: + waves-ws: + host: api.smallest.ai + pathname: /waves/v1/asr + protocol: wss + +channels: + /waves/v1/asr: + x-fern-sdk-group-name: ASR Streaming + servers: + - $ref: "#/servers/waves-ws" + messages: + audioRequest.message: + x-fern-type-name: AsrAudioRequest + name: AsrAudioRequest + transcriptionResponse.message: + x-fern-type-name: AsrTranscriptionResponse + name: AsrTranscriptionResponse -# Override operations to remove channel refs and use component message refs operations: audioRequest: - channel: null - messages: - - $ref: "#/components/messages/AudioRequest" + action: send + x-fern-sdk-method-name: transcribe_streaming_asr transcriptionResponse: - channel: null - messages: - - $ref: "#/components/messages/TranscriptionResponse" - -# Add messages to components -components: - messages: - AudioRequest: - name: AudioRequest - contentType: application/octet-stream - payload: - type: string - format: binary - description: Raw audio data in the specified encoding format - TranscriptionResponse: - name: TranscriptionResponse - contentType: application/json - payload: - type: object - properties: - text: - type: string - description: The transcribed text content - isEndOfTurn: - type: boolean - description: Indicates if this marks the end of a speech turn + action: receive + x-fern-sdk-method-name: receive_transcribe_streaming_asr diff --git a/fern/apis/waves/asyncapi/asr-ws.yaml b/fern/apis/waves/asyncapi/asr-ws.yaml index 9f33908..c8fa7c0 100644 --- a/fern/apis/waves/asyncapi/asr-ws.yaml +++ b/fern/apis/waves/asyncapi/asr-ws.yaml @@ -2,37 +2,37 @@ asyncapi: 3.0.0 info: title: Waves ASR (Speech-to-Text) API version: 1.0.0 - description: WebSocket-based streaming automatic speech recognition API for real-time + description: + WebSocket-based streaming automatic speech recognition API for real-time speech transcription contact: name: Waves API Support - url: https://waves-api.smallest.ai/support email: support@smallest.ai license: name: Proprietary servers: production: - host: waves-api.smallest.ai - pathname: /api/v1/asr + host: api.smallest.ai + pathname: /waves/v1/asr protocol: wss description: Production WebSocket ASR endpoint security: - - $ref: '#/components/securitySchemes/bearerAuth' + - $ref: "#/components/securitySchemes/bearerAuth" channels: - /api/v1/asr: - address: /api/v1/asr + /waves/v1/asr: + address: /waves/v1/asr messages: - audioRequest.message: - name: AudioRequest + asrAudioRequest.message: + name: AsrAudioRequest contentType: application/octet-stream payload: type: string format: binary description: Raw audio data in the specified encoding format examples: - - Binary audio data chunk - transcriptionResponse.message: - name: TranscriptionResponse + - Binary audio data chunk + asrTranscriptionResponse.message: + name: AsrTranscriptionResponse contentType: application/json payload: type: object @@ -44,25 +44,25 @@ channels: type: boolean description: Indicates if this marks the end of a speech turn examples: - - text: Hello, how are you doing today? - isEndOfTurn: false - - text: Thank you for using our service. - isEndOfTurn: true + - text: Hello, how are you doing today? + isEndOfTurn: false + - text: Thank you for using our service. + isEndOfTurn: true operations: audioRequest: action: receive channel: - $ref: '#/channels/~1api~1v1~1asr' + $ref: "#/channels/~1waves~1v1~1asr" summary: Send audio data for transcription messages: - - $ref: '#/channels/~1api~1v1~1asr/messages/audioRequest.message' + - $ref: "#/channels/~1waves~1v1~1asr/messages/asrAudioRequest.message" transcriptionResponse: action: send channel: - $ref: '#/channels/~1api~1v1~1asr' + $ref: "#/channels/~1waves~1v1~1asr" summary: Receive transcription results messages: - - $ref: '#/channels/~1api~1v1~1asr/messages/transcriptionResponse.message' + - $ref: "#/channels/~1waves~1v1~1asr/messages/asrTranscriptionResponse.message" components: securitySchemes: bearerAuth: @@ -86,7 +86,7 @@ components: status: type: string enum: - - error + - error error: type: object properties: diff --git a/fern/apis/waves/asyncapi/lightning-asr-ws-overrides.yml b/fern/apis/waves/asyncapi/lightning-asr-ws-overrides.yml index c7a135d..8adc6c7 100644 --- a/fern/apis/waves/asyncapi/lightning-asr-ws-overrides.yml +++ b/fern/apis/waves/asyncapi/lightning-asr-ws-overrides.yml @@ -1,120 +1,36 @@ -# Override file for lightning-asr-ws.json -# Restructures the spec to avoid $ref to #/channels/ locations which Fern doesn't support +# Override file for lightning-asr-ws.yaml +# Adds unique type names to prevent collisions with other AsyncAPI files +# Maps server reference to match generators.yml environment URLs + +servers: + waves-ws: + host: api.smallest.ai + pathname: /waves/v1/lightning/get_text + protocol: wss + +channels: + /waves/v1/lightning/get_text: + x-fern-sdk-group-name: Lightning ASR Streaming + servers: + - $ref: "#/servers/waves-ws" + messages: + audioData.message: + x-fern-type-name: LightningAudioData + name: LightningAudioData + endSignal.message: + x-fern-type-name: LightningEndSignal + name: LightningEndSignal + transcriptionResponse.message: + x-fern-type-name: LightningTranscriptionResponse + name: LightningTranscriptionResponse -# Override operations to remove channel refs and use component message refs operations: sendAudioData: - channel: null - messages: - - $ref: "#/components/messages/AudioData" + action: send + x-fern-sdk-method-name: transcribe_streaming_lightning_send_audio sendEndSignal: - channel: null - messages: - - $ref: "#/components/messages/EndSignal" + action: send + x-fern-sdk-method-name: transcribe_streaming_lightning_send_end_signal receiveTranscription: - channel: null - messages: - - $ref: "#/components/messages/TranscriptionResponse" - -# Add messages to components -components: - messages: - AudioData: - name: AudioData - contentType: application/octet-stream - payload: - type: string - format: binary - description: Raw audio data chunk, transmitted in binary format using the selected encoding - EndSignal: - name: EndSignal - contentType: application/json - payload: - type: object - required: - - type - properties: - type: - type: string - enum: [end] - description: Signal to indicate end of audio stream - TranscriptionResponse: - name: TranscriptionResponse - contentType: application/json - payload: - type: object - required: - - session_id - properties: - session_id: - type: string - description: Unique identifier for the transcription session - transcript: - type: string - description: Partial or complete transcription text for the current segment - full_transcript: - type: string - description: Complete transcription text accumulated so far - is_final: - type: boolean - default: false - description: Indicates if this is the final transcription for the current segment - is_last: - type: boolean - default: false - description: Indicates if this is the last transcription in the session - words: - type: array - description: Word-level timestamps (when word_timestamps=true) - items: - type: object - properties: - word: - type: string - description: The transcribed word - start: - type: number - description: Start time in seconds - end: - type: number - description: End time in seconds - confidence: - type: number - description: Confidence score (0.0 to 1.0) - speaker: - type: integer - description: Speaker label (when diarization is enabled) - speaker_confidence: - type: number - description: Speaker confidence score (0.0 to 1.0) - utterances: - type: array - description: Sentence-level timestamps (when sentence_timestamps=true) - items: - type: object - properties: - text: - type: string - description: The transcribed sentence - start: - type: number - description: Start time in seconds - end: - type: number - description: End time in seconds - speaker: - type: integer - description: Speaker label (when diarization is enabled) - language: - type: string - description: Detected primary language code - languages: - type: array - description: List of languages detected in the audio - items: - type: string - redacted_entities: - type: array - description: List of redacted entity placeholders - items: - type: string + action: receive + x-fern-sdk-method-name: receive_transcribe_streaming_lightning diff --git a/fern/apis/waves/asyncapi/lightning-asr-ws.yaml b/fern/apis/waves/asyncapi/lightning-asr-ws.yaml index b6a56f9..459fece 100644 --- a/fern/apis/waves/asyncapi/lightning-asr-ws.yaml +++ b/fern/apis/waves/asyncapi/lightning-asr-ws.yaml @@ -2,22 +2,22 @@ asyncapi: 3.0.0 info: title: Waves Lightning STT (Speech-to-Text) API version: 1.0.0 - description: WebSocket-based streaming automatic speech recognition API for real-time + description: + WebSocket-based streaming automatic speech recognition API for real-time speech transcription using the Lightning STT model contact: name: Waves API Support - url: https://waves-api.smallest.ai/support email: support@smallest.ai license: name: Proprietary servers: production: - host: waves-api.smallest.ai - pathname: /api/v1/lightning/get_text + host: api.smallest.ai + pathname: /waves/v1/lightning/get_text protocol: wss description: Production WebSocket Lightning STT endpoint security: - - $ref: '#/components/securitySchemes/bearerAuth' + - $ref: "#/components/securitySchemes/bearerAuth" bindings: ws: query: @@ -26,186 +26,181 @@ servers: language: type: string enum: - - it - - es - - en - - pt - - hi - - de - - fr - - uk - - ru - - kn - - ml - - pl - - mr - - gu - - cs - - sk - - te - - or - - nl - - bn - - lv - - et - - ro - - pa - - fi - - sv - - bg - - ta - - hu - - da - - lt - - mt - - multi + - it + - es + - en + - pt + - hi + - de + - fr + - uk + - ru + - kn + - ml + - pl + - mr + - gu + - cs + - sk + - te + - or + - nl + - bn + - lv + - et + - ro + - pa + - fi + - sv + - bg + - ta + - hu + - da + - lt + - mt + - multi default: en - description: Language code for transcription. Use 'multi' for automatic + description: + Language code for transcription. Use 'multi' for automatic language detection encoding: type: string enum: - - linear16 - - linear32 - - alaw - - mulaw - - opus - - ogg_opus + - linear16 + - linear32 + - alaw + - mulaw + - opus + - ogg_opus default: linear16 description: Audio encoding format sample_rate: type: string enum: - - '8000' - - '16000' - - '22050' - - '24000' - - '44100' - - '48000' - default: '16000' + - "8000" + - "16000" + - "22050" + - "24000" + - "44100" + - "48000" + default: "16000" description: Audio sample rate in Hz word_timestamps: type: string enum: - - 'true' - - 'false' - default: 'true' + - "true" + - "false" + default: "true" description: Include word-level timestamps in transcription full_transcript: type: string enum: - - 'true' - - 'false' - default: 'false' - description: Include cumulative transcript received till now in responses + - "true" + - "false" + default: "false" + description: + Include cumulative transcript received till now in responses where is_final is true sentence_timestamps: type: string enum: - - 'true' - - 'false' - default: 'false' + - "true" + - "false" + default: "false" description: Include sentence-level timestamps (utterances) in transcription redact_pii: type: string enum: - - 'true' - - 'false' - default: 'false' - description: Redact personally identifiable information (name, surname, + - "true" + - "false" + default: "false" + description: + Redact personally identifiable information (name, surname, address, etc) redact_pci: type: string enum: - - 'true' - - 'false' - default: 'false' - description: Redact payment card information (credit card, CVV, zip, + - "true" + - "false" + default: "false" + description: + Redact payment card information (credit card, CVV, zip, account number, etc) - keywords: - type: array - items: - type: string - pattern: ^[^:]+:[0-9]+(\.[0-9]+)?$ - description: List of keywords to boost during transcription. Each keyword - is formatted as 'word:weight' where weight is a positive number indicating - boost intensity (e.g., ['word:5.0', 'name:4.0']). Higher weights increase - the likelihood of the keyword being recognized. Recommended weight - range is 1 to 10; extremely high values may degrade transcription - accuracy. - examples: - - - product:5.0 - - company:4.0 - - technical:3.5 numerals: type: string enum: - - 'true' - - 'false' - - auto + - "true" + - "false" + - auto default: auto - description: Convert spoken numerals into digit form (e.g., 'twenty + description: + Convert spoken numerals into digit form (e.g., 'twenty five' to '25') and `auto` enables automatic detection based on context. diarize: type: string enum: - - 'true' - - 'false' - default: 'false' - description: Enable speaker diarization to identify different speakers + - "true" + - "false" + default: "false" + description: + Enable speaker diarization to identify different speakers in the audio. channels: - /api/v1/lightning/get_text: - address: /api/v1/lightning/get_text + /waves/v1/lightning/get_text: + address: /waves/v1/lightning/get_text messages: - audioData.message: - name: AudioData + lightningAudioData.message: + name: LightningAudioData contentType: application/octet-stream payload: type: string format: binary - description: Raw audio data chunk, transmitted in binary format using the + description: + Raw audio data chunk, transmitted in binary format using the selected encoding examples: - - Binary audio data chunk (4096 bytes recommended) - endSignal.message: - name: EndSignal + - Binary audio data chunk (4096 bytes recommended) + lightningEndSignal.message: + name: LightningEndSignal contentType: application/json payload: type: object required: - - type + - type properties: type: type: string enum: - - end + - finalize description: Signal to indicate end of audio stream examples: - - type: end - transcriptionResponse.message: - name: TranscriptionResponse + - type: finalize + lightningTranscriptionResponse.message: + name: LightningTranscriptionResponse contentType: application/json payload: type: object required: - - session_id + - session_id properties: session_id: type: string description: Unique identifier for the transcription session transcript: type: string - description: Partial or complete transcription text for the current + description: + Partial or complete transcription text for the current segment full_transcript: type: string - description: Complete transcription text accumulated so far. Only included + description: + Complete transcription text accumulated so far. Only included when `full_transcript` query parameter is set to true AND is_final=true is_final: type: boolean default: false - description: Indicates if this is the final transcription for the current + description: + Indicates if this is the final transcription for the current segment is_last: type: boolean @@ -234,7 +229,8 @@ channels: description: Speaker label (when diarization is enabled) speaker_confidence: type: number - description: Confidence score for the speaker assignment (0.0 + description: + Confidence score for the speaker assignment (0.0 to 1.0) utterances: type: array @@ -259,125 +255,128 @@ channels: description: Detected primary language code, only returned when `is_final=True` languages: type: array - description: List of codes of languages detected in the audio, only + description: + List of codes of languages detected in the audio, only returned when `is_final=True` items: type: string redacted_entities: type: array - description: List of redacted entity placeholders (when redact_pii or + description: + List of redacted entity placeholders (when redact_pii or redact_pci is enabled) items: type: string examples: - - session_id: sess_12345abcde - transcript: Hello, how are you? - is_final: true - is_last: false - language: en - - session_id: session_id_12346abcde - transcript: I paid using my card [CREDITCARDCVV_1] with expiry [TIME_2]. - is_final: true - is_last: true - full_transcript: Hello, my name is [FIRSTNAME_1] [FIRSTNAME_2]. I paid - using my card [CREDITCARDCVV_1] with expiry [TIME_2]. - language: en - languages: - - en - words: - - word: I - start: 0.0 - end: 0.2 - confidence: 0.98 - speaker: 0 - speaker_confidence: 0.95 - - word: paid - start: 0.2 - end: 0.5 - confidence: 1.0 - speaker: 0 - speaker_confidence: 0.59 - - word: using - start: 0.5 - end: 0.8 - confidence: 0.73 - speaker: 0 - speaker_confidence: 0.74 - - word: my - start: 0.8 - end: 1.0 - confidence: 0.88 - speaker: 0 - speaker_confidence: 0.92 - - word: card - start: 1.0 - end: 1.3 - confidence: 0.95 - speaker: 0 - speaker_confidence: 0.91 - - word: '[creditcardcvv_1]' - start: 1.3 - end: 1.8 - confidence: 0.99 - speaker: 0 - speaker_confidence: 0.85 - - word: with - start: 1.8 - end: 2.0 - confidence: 0.85 - speaker: 0 - speaker_confidence: 0.87 - - word: expiry - start: 2.0 - end: 2.4 - confidence: 0.9 - speaker: 0 - speaker_confidence: 0.67 - - word: '[time_2]' - start: 2.4 - end: 2.7 - confidence: 0.97 - speaker: 0 - speaker_confidence: 0.76 - utterances: - - text: Hello, my name is [FIRSTNAME_1] [FIRSTNAME_2]. - start: 0.0 - end: 2.2 - speaker: 0 - - text: I paid using my card [CREDITCARDCVV_1] with expiry [TIME_2]. - start: 2.2 - end: 5.0 - speaker: 0 - redacted_entities: - - '[FIRSTNAME_1]' - - '[FIRSTNAME_2]' - - '[CREDITCARDCVV_1]' - - '[TIME_2]' + - session_id: sess_12345abcde + transcript: Hello, how are you? + is_final: true + is_last: false + language: en + - session_id: session_id_12346abcde + transcript: I paid using my card [CREDITCARDCVV_1] with expiry [TIME_2]. + is_final: true + is_last: true + full_transcript: + Hello, my name is [FIRSTNAME_1] [FIRSTNAME_2]. I paid + using my card [CREDITCARDCVV_1] with expiry [TIME_2]. + language: en + languages: + - en + words: + - word: I + start: 0.0 + end: 0.2 + confidence: 0.98 + speaker: 0 + speaker_confidence: 0.95 + - word: paid + start: 0.2 + end: 0.5 + confidence: 1.0 + speaker: 0 + speaker_confidence: 0.59 + - word: using + start: 0.5 + end: 0.8 + confidence: 0.73 + speaker: 0 + speaker_confidence: 0.74 + - word: my + start: 0.8 + end: 1.0 + confidence: 0.88 + speaker: 0 + speaker_confidence: 0.92 + - word: card + start: 1.0 + end: 1.3 + confidence: 0.95 + speaker: 0 + speaker_confidence: 0.91 + - word: "[creditcardcvv_1]" + start: 1.3 + end: 1.8 + confidence: 0.99 + speaker: 0 + speaker_confidence: 0.85 + - word: with + start: 1.8 + end: 2.0 + confidence: 0.85 + speaker: 0 + speaker_confidence: 0.87 + - word: expiry + start: 2.0 + end: 2.4 + confidence: 0.9 + speaker: 0 + speaker_confidence: 0.67 + - word: "[time_2]" + start: 2.4 + end: 2.7 + confidence: 0.97 + speaker: 0 + speaker_confidence: 0.76 + utterances: + - text: Hello, my name is [FIRSTNAME_1] [FIRSTNAME_2]. + start: 0.0 + end: 2.2 + speaker: 0 + - text: I paid using my card [CREDITCARDCVV_1] with expiry [TIME_2]. + start: 2.2 + end: 5.0 + speaker: 0 + redacted_entities: + - "[FIRSTNAME_1]" + - "[FIRSTNAME_2]" + - "[CREDITCARDCVV_1]" + - "[TIME_2]" operations: sendAudioData: action: receive channel: - $ref: '#/channels/~1api~1v1~1lightning~1get_text' + $ref: "#/channels/~1waves~1v1~1lightning~1get_text" summary: Send audio data for transcription description: Stream audio data in chunks for real-time transcription messages: - - $ref: '#/channels/~1api~1v1~1lightning~1get_text/messages/audioData.message' + - $ref: "#/channels/~1waves~1v1~1lightning~1get_text/messages/lightningAudioData.message" sendEndSignal: action: receive channel: - $ref: '#/channels/~1api~1v1~1lightning~1get_text' + $ref: "#/channels/~1waves~1v1~1lightning~1get_text" summary: Send end of stream signal description: Signal that audio streaming is complete messages: - - $ref: '#/channels/~1api~1v1~1lightning~1get_text/messages/endSignal.message' + - $ref: "#/channels/~1waves~1v1~1lightning~1get_text/messages/lightningEndSignal.message" receiveTranscription: action: send channel: - $ref: '#/channels/~1api~1v1~1lightning~1get_text' + $ref: "#/channels/~1waves~1v1~1lightning~1get_text" summary: Receive transcription results description: Get real-time transcription results as audio is processed messages: - - $ref: '#/channels/~1api~1v1~1lightning~1get_text/messages/transcriptionResponse.message' + - $ref: "#/channels/~1waves~1v1~1lightning~1get_text/messages/lightningTranscriptionResponse.message" components: securitySchemes: bearerAuth: @@ -410,7 +409,7 @@ components: status: type: string enum: - - error + - error error: type: object properties: diff --git a/fern/apis/waves/asyncapi/lightning-v2-ws-overrides.yml b/fern/apis/waves/asyncapi/lightning-v2-ws-overrides.yml index 379bae0..b89c135 100644 --- a/fern/apis/waves/asyncapi/lightning-v2-ws-overrides.yml +++ b/fern/apis/waves/asyncapi/lightning-v2-ws-overrides.yml @@ -1,103 +1,32 @@ -# Override file for lightning-v2-ws.json -# Restructures the spec to avoid $ref to #/channels/ locations which Fern doesn't support +# Override file for lightning-v2-ws.yaml +# Adds unique type names to prevent collisions with other AsyncAPI files +# Maps server reference to match generators.yml environment URLs -# Override operations to remove channel refs and use component message refs -operations: - ttsRequest: - channel: null - messages: - - $ref: "#/components/messages/TTSRequest" - ttsResponse: - channel: null +servers: + waves-ws: + host: api.smallest.ai + pathname: /waves/v1/lightning-v2/get_speech/stream + protocol: wss + +channels: + lightningV2Stream: + x-fern-audiences: + - v2 + - v3 + - v4 + x-fern-sdk-group-name: Lightning V2 TTS + servers: + - $ref: "#/servers/waves-ws" messages: - - $ref: "#/components/messages/TTSResponse" + lightningV2TtsRequest: + name: LightningV2TtsRequest + lightningV2TtsResponse: + name: LightningV2TtsResponse -# Add messages to components -components: - messages: - TTSRequest: - name: TTSRequest - contentType: application/json - payload: - type: object - required: - - voice_id - - text - properties: - voice_id: - type: string - description: The ID of the voice to use - text: - type: string - description: The text to convert to speech - max_buffer_flush_ms: - type: integer - description: Maximum time (in ms) to wait for more input before generating output - default: 0 - maximum: 1000 - minimum: 0 - continue: - type: boolean - description: Whether to buffer and wait for more input - default: false - flush: - type: boolean - description: Whether to flush the current buffer - default: false - complete_backoff_ms: - type: number - description: Time in ms to wait after the last chunk before sending complete response - default: 4000 - minimum: 0 - maximum: 10000 - language: - type: string - description: "Language code: en, hi, mr, kn, ta, bn, gu, de, fr, es, it, pl, nl, ru, ar, he" - default: en - sample_rate: - type: integer - description: Audio sample rate in Hz - default: 24000 - speed: - type: number - description: Speaking speed multiplier - minimum: 0.1 - maximum: 5 - default: 1 - consistency: - type: number - description: Voice consistency parameter - minimum: 0 - maximum: 1 - default: 0.5 - enhancement: - type: integer - description: Audio enhancement level - minimum: 0 - maximum: 2 - default: 1 - similarity: - type: number - description: Voice similarity parameter - minimum: 0 - maximum: 1 - default: 0 - TTSResponse: - name: TTSResponse - contentType: application/json - payload: - type: object - properties: - request_id: - type: string - description: Unique identifier for the TTS request - status: - type: string - enum: [chunk, complete] - description: "Status: chunk for audio data, complete for end of stream" - data: - type: object - properties: - audio: - type: string - description: Base64-encoded audio chunk +operations: + LightningV2TtsRequest: + action: send + x-fern-sdk-method-name: synthesize_streaming_lightning_v2 + LightningV2TtsResponse: + action: receive + x-fern-sdk-method-name: receive_synthesize_streaming_lightning_v2 diff --git a/fern/apis/waves/asyncapi/lightning-v2-ws.yaml b/fern/apis/waves/asyncapi/lightning-v2-ws.yaml index c97bbf0..7dfb1ef 100644 --- a/fern/apis/waves/asyncapi/lightning-v2-ws.yaml +++ b/fern/apis/waves/asyncapi/lightning-v2-ws.yaml @@ -2,34 +2,40 @@ asyncapi: 3.0.0 info: title: Waves Text-to-Speech API version: 1.0.0 - description: WebSocket-based streaming text-to-speech API for generating realistic + description: + WebSocket-based streaming text-to-speech API for generating realistic voices contact: name: Waves API Support - url: https://waves-api.smallest.ai/support email: support@smallest.ai license: name: Proprietary servers: production: - host: waves-api.smallest.ai - pathname: /api/v1/lightning-v2/get_speech/stream + host: api.smallest.ai + pathname: /waves/v1/lightning-v2/get_speech/stream protocol: wss description: Production WebSocket API endpoint security: - - $ref: '#/components/securitySchemes/bearerAuth' + - $ref: "#/components/securitySchemes/bearerAuth" channels: - /api/v1/lightning-v2/get_speech/stream: - address: /api/v1/lightning-v2/get_speech/stream + lightningV2Stream: + address: /waves/v1/lightning-v2/get_speech/stream + parameters: + Authorization: + description: "Bearer token for authentication. Format: Bearer YOUR_API_KEY" + location: $message.header#/Authorization + servers: + - $ref: "#/servers/production" messages: - ttsRequest.message: - name: TTSRequest + lightningV2TtsRequest.message: + name: LightningV2TtsRequest contentType: application/json payload: type: object required: - - voice_id - - text + - voice_id + - text properties: voice_id: type: string @@ -39,7 +45,8 @@ channels: description: The text to convert to speech max_buffer_flush_ms: type: integer - description: The maximum time (in ms) to wait for more input before + description: + The maximum time (in ms) to wait for more input before generating output. It flushes when either this time is reached or enough input is received for optimal output—whichever comes first. This is useful for input streams. Deafults to 0 @@ -48,31 +55,35 @@ channels: minimum: 0 continue: type: boolean - description: This setting controls whether the system should buffer + description: + This setting controls whether the system should buffer and wait for more input after receiving the current one. If not set, it assumes no more input is coming. default: false flush: type: boolean - description: This setting controls whether the system should flush the + description: + This setting controls whether the system should flush the current buffer. default: false complete_backoff_ms: type: number - description: The time in ms to wait after the last chunk is sent before + description: + The time in ms to wait after the last chunk is sent before sending the complete response. Default is 4000ms. Maximum is 10000ms. default: 4000 minimum: 0 maximum: 10000 language: type: string - description: 'The language code, available options: `en`, `hi`, `mr`, + description: + "The language code, available options: `en`, `hi`, `mr`, `kn`, `ta`, `bn`, `gu`, `de`, `fr`, `es`, `it`, `pl`, `nl`, `ru`, - `ar`, `he`' + `ar`, `he`" default: en sample_rate: type: integer - description: Audio sample rate in Hz + description: "Audio sample rate in Hz. Supported values: 8000, 16000, 24000, 44100" default: 24000 speed: type: number @@ -98,20 +109,19 @@ channels: minimum: 0 maximum: 1 default: 0 - examples: - - voice_id: ryan - text: Hello, this is a sample text to convert to speech. - max_buffer_flush_ms: 0 - continue: false - flush: false - language: en - sample_rate: 24000 - speed: 1 - consistency: 0.5 - enhancement: 1 - similarity: 0 - ttsResponse.message: - name: TTSResponse + examples: + - name: basicTts + payload: + voice_id: ryan + text: Hello, this is a sample text to convert to speech. + sample_rate: 24000 + language: en + speed: 1 + consistency: 0.5 + enhancement: 1 + similarity: 0 + lightningV2TtsResponse.message: + name: LightningV2TtsResponse contentType: application/json payload: type: object @@ -122,9 +132,10 @@ channels: status: type: string enum: - - chunk - - complete - description: Status of the TTS request, `chunk` indicates incoming audio + - chunk + - complete + description: + Status of the TTS request, `chunk` indicates incoming audio chunk, `complete` indicates completion. data: type: object @@ -132,21 +143,34 @@ channels: audio: type: string description: Base64-encoded audio chunk + examples: + - name: audioChunk + payload: + request_id: req_abc123 + status: chunk + data: + audio: SGVsbG8gV29ybGQ= + - name: completionStatus + payload: + request_id: req_abc123 + status: complete operations: - ttsRequest: - action: receive + LightningV2TtsRequest: + action: send channel: - $ref: '#/channels/~1api~1v1~1lightning-v2~1get_speech~1stream' + $ref: "#/channels/lightningV2Stream" summary: Send text-to-speech request + description: Send a JSON message with voice_id, text, and optional parameters to generate speech audio. messages: - - $ref: '#/channels/~1api~1v1~1lightning-v2~1get_speech~1stream/messages/ttsRequest.message' - ttsResponse: - action: send + - $ref: "#/channels/lightningV2Stream/messages/lightningV2TtsRequest.message" + LightningV2TtsResponse: + action: receive channel: - $ref: '#/channels/~1api~1v1~1lightning-v2~1get_speech~1stream' + $ref: "#/channels/lightningV2Stream" summary: Receive audio stream chunks + description: Receive audio data chunks and completion status from the server. messages: - - $ref: '#/channels/~1api~1v1~1lightning-v2~1get_speech~1stream/messages/ttsResponse.message' + - $ref: "#/channels/lightningV2Stream/messages/lightningV2TtsResponse.message" components: securitySchemes: bearerAuth: @@ -173,7 +197,7 @@ components: status: type: string enum: - - complete + - complete description: Indicates that the streaming is complete ErrorResponse: type: object @@ -181,7 +205,7 @@ components: status: type: string enum: - - error + - error error: type: object properties: diff --git a/fern/apis/waves/asyncapi/lightning-v3.1-ws-overrides.yml b/fern/apis/waves/asyncapi/lightning-v3.1-ws-overrides.yml new file mode 100644 index 0000000..73da2f2 --- /dev/null +++ b/fern/apis/waves/asyncapi/lightning-v3.1-ws-overrides.yml @@ -0,0 +1,30 @@ +# Override file for lightning-v3.1-ws.yaml +# Adds unique type names to prevent collisions with other AsyncAPI files +# Maps server reference to match generators.yml environment URLs + +servers: + waves-ws: + host: api.smallest.ai + pathname: /waves/v1/lightning-v3.1/get_speech/stream + protocol: wss + production: + x-fern-server-name: waves + +channels: + lightningV31Stream: + x-fern-sdk-group-name: Lightning V3.1 TTS + servers: + - $ref: "#/servers/waves-ws" + messages: + lightningV31TtsRequest.message: + name: LightningV31TtsRequest + lightningV31TtsResponse.message: + name: LightningV31TtsResponse + +operations: + LightningV31TtsRequest: + action: send + x-fern-sdk-method-name: synthesize_streaming_lightning_v3_1 + LightningV31TtsResponse: + action: receive + x-fern-sdk-method-name: receive_synthesize_streaming_lightning_v3_1 diff --git a/fern/apis/waves/asyncapi/lightning-v3.1-ws.yaml b/fern/apis/waves/asyncapi/lightning-v3.1-ws.yaml new file mode 100644 index 0000000..bed91b2 --- /dev/null +++ b/fern/apis/waves/asyncapi/lightning-v3.1-ws.yaml @@ -0,0 +1,179 @@ +asyncapi: 3.0.0 +info: + title: Lightning V3.1 Text-to-Speech WebSocket API + version: 3.1.0 + description: WebSocket-based streaming text-to-speech API for Lightning V3.1 model + contact: + name: Waves API Support + email: support@smallest.ai + license: + name: Proprietary +servers: + production: + host: api.smallest.ai + pathname: /waves/v1/lightning-v3.1/get_speech/stream + protocol: wss + description: Production WebSocket API endpoint + security: + - $ref: "#/components/securitySchemes/bearerAuth" +channels: + lightningV31Stream: + address: /waves/v1/lightning-v3.1/get_speech/stream + parameters: + Authorization: + description: "Bearer token for authentication. Format: Bearer YOUR_API_KEY" + location: $message.header#/Authorization + servers: + - $ref: "#/servers/production" + messages: + lightningV31TtsRequest.message: + name: LightningV31TtsRequest + contentType: application/json + payload: + type: object + required: + - voice_id + - text + properties: + voice_id: + type: string + description: The ID of the voice to use + text: + type: string + description: The text to convert to speech + max_buffer_flush_ms: + type: integer + description: The maximum time (in ms) to wait for more input before generating output. It flushes when either this time is reached or enough input is received for optimal output—whichever comes first. This is useful for input streams. Defaults to 0 + default: 0 + maximum: 1000 + minimum: 0 + continue: + type: boolean + description: This setting controls whether the system should buffer and wait for more input after receiving the current one. If not set, it assumes no more input is coming. + default: false + flush: + type: boolean + description: This setting controls whether the system should flush the current buffer. + default: false + complete_backoff_ms: + type: number + description: The time in ms to wait after the last chunk is sent before sending the complete response. Default is 4000ms. Maximum is 10000ms. + default: 4000 + minimum: 0 + maximum: 10000 + language: + type: string + description: "The language code, available options: `en`, `hi`" + default: en + enum: + - en + - hi + sample_rate: + type: integer + description: "Audio sample rate in Hz. Supported values: 8000, 16000, 24000, 44100" + default: 44100 + speed: + type: number + description: Speaking speed multiplier + minimum: 0.5 + maximum: 2.0 + default: 1 + examples: + - name: basicTts + payload: + voice_id: ryan + text: Hello, this is a sample text to convert to speech. + sample_rate: 24000 + language: en + speed: 1 + lightningV31TtsResponse.message: + name: LightningV31TtsResponse + contentType: application/json + payload: + type: object + properties: + request_id: + type: string + description: Unique identifier for the TTS request + status: + type: string + enum: + - chunk + - complete + description: Status of the TTS request, `chunk` indicates incoming audio chunk, `complete` indicates completion. + data: + type: object + properties: + audio: + type: string + description: Base64-encoded audio chunk + examples: + - name: audioChunk + payload: + request_id: req_abc123 + status: chunk + data: + audio: SGVsbG8gV29ybGQ= + - name: completionStatus + payload: + request_id: req_abc123 + status: complete +operations: + LightningV31TtsRequest: + action: send + channel: + $ref: "#/channels/lightningV31Stream" + summary: Send text-to-speech request + description: Send a JSON message with voice_id, text, and optional parameters to generate speech audio. + messages: + - $ref: "#/channels/lightningV31Stream/messages/lightningV31TtsRequest.message" + LightningV31TtsResponse: + action: receive + channel: + $ref: "#/channels/lightningV31Stream" + summary: Receive audio stream chunks + description: Receive audio data chunks and completion status from the server. + messages: + - $ref: "#/channels/lightningV31Stream/messages/lightningV31TtsResponse.message" +components: + securitySchemes: + bearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + description: Bearer token authentication + schemas: + AudioChunk: + type: object + properties: + data: + type: object + properties: + audio: + type: string + description: Base64-encoded audio chunk + CompletionStatus: + type: object + properties: + request_id: + type: string + description: Unique identifier for the TTS request + status: + type: string + enum: + - complete + description: Indicates that the streaming is complete + ErrorResponse: + type: object + properties: + status: + type: string + enum: + - error + error: + type: object + properties: + message: + type: string + code: + type: string diff --git a/fern/apis/waves/asyncapi/pulse-stt-ws-overrides.yml b/fern/apis/waves/asyncapi/pulse-stt-ws-overrides.yml new file mode 100644 index 0000000..09556e7 --- /dev/null +++ b/fern/apis/waves/asyncapi/pulse-stt-ws-overrides.yml @@ -0,0 +1,147 @@ +# Override file for pulse-stt-ws.yaml +# Adds unique type names to prevent collisions with other AsyncAPI files +# Maps server reference to match generators.yml environment URLs + +servers: + waves-ws: + host: api.smallest.ai + pathname: /waves/v1/pulse/get_text + protocol: wss + bindings: + ws: + query: + type: object + properties: + language: + type: string + enum: + - it + - es + - en + - pt + - hi + - de + - fr + - uk + - ru + - kn + - ml + - pl + - mr + - gu + - cs + - sk + - te + - or + - nl + - bn + - lv + - et + - ro + - pa + - fi + - sv + - bg + - ta + - hu + - da + - lt + - mt + - multi + default: en + description: Language code for transcription. Use 'multi' for automatic language detection + encoding: + type: string + enum: + - linear16 + - linear32 + - alaw + - mulaw + - opus + - ogg_opus + default: linear16 + description: Audio encoding format + sample_rate: + type: integer + description: "Audio sample rate in Hz. Supported values: 8000, 16000, 22050, 24000, 44100, 48000" + default: 16000 + word_timestamps: + type: string + enum: + - "true" + - "false" + default: "true" + description: Include word-level timestamps in transcription + full_transcript: + type: string + enum: + - "true" + - "false" + default: "false" + description: Include cumulative transcript received till now in responses where is_final is true + sentence_timestamps: + type: string + enum: + - "true" + - "false" + default: "false" + description: Include sentence-level timestamps (utterances) in transcription + redact_pii: + type: string + enum: + - "true" + - "false" + default: "false" + description: Redact personally identifiable information (name, surname, address, etc) + redact_pci: + type: string + enum: + - "true" + - "false" + default: "false" + description: Redact payment card information (credit card, CVV, zip, account number, etc) + numerals: + type: string + enum: + - "true" + - "false" + - auto + default: auto + description: Convert spoken numerals into digit form (e.g., 'twenty five' to '25') and `auto` enables automatic detection based on context. + diarize: + type: string + enum: + - "true" + - "false" + default: "false" + description: Enable speaker diarization to identify different speakers in the audio. + keywords: + type: string + description: "Comma-separated list of words/phrases to boost, each optionally followed by :INTENSIFIER (e.g. NVIDIA:5,Jensen). Intensifier defaults to 1.0 if omitted. Max 100 keywords per session." + +channels: + pulseStream: + x-fern-sdk-group-name: Pulse STT Streaming + servers: + - $ref: "#/servers/waves-ws" + messages: + audioData.message: + x-fern-type-name: PulseAudioData + name: PulseAudioData + endSignal.message: + x-fern-type-name: PulseEndSignal + name: PulseEndSignal + transcriptionResponse: + x-fern-type-name: PulseTranscriptionResponse + name: PulseTranscriptionResponse + +operations: + sendAudioData: + action: send + x-fern-sdk-method-name: transcribe_streaming_pulse_send_audio + sendEndSignal: + action: send + x-fern-sdk-method-name: transcribe_streaming_pulse_send_end_signal + receiveTranscription: + action: receive + x-fern-sdk-method-name: receive_transcribe_streaming_pulse diff --git a/fern/apis/waves/asyncapi/pulse-stt-ws.yaml b/fern/apis/waves/asyncapi/pulse-stt-ws.yaml new file mode 100644 index 0000000..533d9b4 --- /dev/null +++ b/fern/apis/waves/asyncapi/pulse-stt-ws.yaml @@ -0,0 +1,389 @@ +asyncapi: 3.0.0 +info: + title: Waves Pulse STT (Speech-to-Text) API + version: 1.0.0 + description: WebSocket-based streaming automatic speech recognition API for real-time speech transcription using the Pulse STT model + contact: + name: Waves API Support + email: support@smallest.ai + license: + name: Proprietary +servers: + production: + host: api.smallest.ai + pathname: /waves/v1/pulse/get_text + protocol: wss + description: Production WebSocket Pulse STT endpoint + security: + - $ref: "#/components/securitySchemes/bearerAuth" + bindings: + ws: + query: + type: object + properties: + language: + type: string + enum: + - it + - es + - en + - pt + - hi + - de + - fr + - uk + - ru + - kn + - ml + - pl + - mr + - gu + - cs + - sk + - te + - or + - nl + - bn + - lv + - et + - ro + - pa + - fi + - sv + - bg + - ta + - hu + - da + - lt + - mt + - multi + default: en + description: Language code for transcription. Use 'multi' for automatic language detection + encoding: + type: string + enum: + - linear16 + - linear32 + - alaw + - mulaw + - opus + - ogg_opus + default: linear16 + description: Audio encoding format + sample_rate: + type: integer + description: "Audio sample rate in Hz. Supported values: 8000, 16000, 22050, 24000, 44100, 48000" + default: 16000 + word_timestamps: + type: string + enum: + - "true" + - "false" + default: "true" + description: Include word-level timestamps in transcription + full_transcript: + type: string + enum: + - "true" + - "false" + default: "false" + description: Include cumulative transcript received till now in responses where is_final is true + sentence_timestamps: + type: string + enum: + - "true" + - "false" + default: "false" + description: Include sentence-level timestamps (utterances) in transcription + redact_pii: + type: string + enum: + - "true" + - "false" + default: "false" + description: Redact personally identifiable information (name, surname, address, etc) + redact_pci: + type: string + enum: + - "true" + - "false" + default: "false" + description: Redact payment card information (credit card, CVV, zip, account number, etc) + numerals: + type: string + enum: + - "true" + - "false" + - auto + default: auto + description: Convert spoken numerals into digit form (e.g., 'twenty five' to '25') and `auto` enables automatic detection based on context. + diarize: + type: string + enum: + - "true" + - "false" + default: "false" + description: Enable speaker diarization to identify different speakers in the audio. + keywords: + type: string + description: "Comma-separated list of words/phrases to boost, each optionally followed by :INTENSIFIER (e.g. NVIDIA:5,Jensen). Intensifier defaults to 1.0 if omitted. Max 100 keywords per session." +channels: + pulseStream: + address: /waves/v1/pulse/get_text + parameters: + Authorization: + description: "Bearer token for authentication. Format: Bearer YOUR_API_KEY" + location: $message.header#/Authorization + messages: + pulseAudioData.message: + name: PulseAudioData + contentType: application/octet-stream + payload: + type: string + format: binary + description: Raw audio data chunk, transmitted in binary format using the selected encoding + examples: + - Binary audio data chunk (4096 bytes recommended) + pulseEndSignal.message: + name: PulseEndSignal + contentType: application/json + payload: + type: object + required: + - type + properties: + type: + type: string + enum: + - finalize + description: Signal to indicate end of audio stream + examples: + - type: finalize + pulseTranscriptionResponse.message: + name: PulseTranscriptionResponse + contentType: application/json + payload: + type: object + required: + - session_id + properties: + session_id: + type: string + description: Unique identifier for the transcription session + transcript: + type: string + description: Partial or complete transcription text for the current segment + full_transcript: + type: string + description: Complete transcription text accumulated so far. Only included when `full_transcript` query parameter is set to true AND is_final=true + is_final: + type: boolean + default: false + description: Indicates if this is the final transcription for the current segment + is_last: + type: boolean + default: false + description: Indicates if this is the last transcription in the session + words: + type: array + description: Word-level timestamps (when word_timestamps=true) + items: + type: object + properties: + word: + type: string + description: The transcribed word + start: + type: number + description: Start time in seconds + end: + type: number + description: End time in seconds + confidence: + type: number + description: Confidence score for the word (0.0 to 1.0) + speaker: + type: integer + description: Speaker label (when diarization is enabled) + speaker_confidence: + type: number + description: Confidence score for the speaker assignment (0.0 to 1.0) + utterances: + type: array + description: Sentence-level timestamps (when sentence_timestamps=true) + items: + type: object + properties: + text: + type: string + description: The transcribed sentence + start: + type: number + description: Start time in seconds + end: + type: number + description: End time in seconds + speaker: + type: integer + description: Speaker label (when diarization is enabled) + language: + type: string + description: Detected primary language code, only returned when `is_final=True` + languages: + type: array + description: List of codes of languages detected in the audio, only returned when `is_final=True` + items: + type: string + redacted_entities: + type: array + description: List of redacted entity placeholders (when redact_pii or redact_pci is enabled) + items: + type: string + examples: + - session_id: sess_12345abcde + transcript: Hello, how are you? + is_final: true + is_last: false + language: en + - session_id: session_id_12346abcde + transcript: I paid using my card [CREDITCARDCVV_1] with expiry [TIME_2]. + is_final: true + is_last: true + full_transcript: Hello, my name is [FIRSTNAME_1] [FIRSTNAME_2]. I paid using my card [CREDITCARDCVV_1] with expiry [TIME_2]. + language: en + languages: + - en + words: + - word: I + start: 0.0 + end: 0.2 + confidence: 0.98 + speaker: 0 + speaker_confidence: 0.95 + - word: paid + start: 0.2 + end: 0.5 + confidence: 1.0 + speaker: 0 + speaker_confidence: 0.59 + - word: using + start: 0.5 + end: 0.8 + confidence: 0.73 + speaker: 0 + speaker_confidence: 0.74 + - word: my + start: 0.8 + end: 1.0 + confidence: 0.88 + speaker: 0 + speaker_confidence: 0.92 + - word: card + start: 1.0 + end: 1.3 + confidence: 0.95 + speaker: 0 + speaker_confidence: 0.91 + - word: "[creditcardcvv_1]" + start: 1.3 + end: 1.8 + confidence: 0.99 + speaker: 0 + speaker_confidence: 0.85 + - word: with + start: 1.8 + end: 2.0 + confidence: 0.85 + speaker: 0 + speaker_confidence: 0.87 + - word: expiry + start: 2.0 + end: 2.4 + confidence: 0.9 + speaker: 0 + speaker_confidence: 0.67 + - word: "[time_2]" + start: 2.4 + end: 2.7 + confidence: 0.97 + speaker: 0 + speaker_confidence: 0.76 + utterances: + - text: Hello, my name is [FIRSTNAME_1] [FIRSTNAME_2]. + start: 0.0 + end: 2.2 + speaker: 0 + - text: I paid using my card [CREDITCARDCVV_1] with expiry [TIME_2]. + start: 2.2 + end: 5.0 + speaker: 0 + redacted_entities: + - "[FIRSTNAME_1]" + - "[FIRSTNAME_2]" + - "[CREDITCARDCVV_1]" + - "[TIME_2]" +operations: + sendAudioData: + action: send + channel: + $ref: "#/channels/pulseStream" + summary: Send audio data for transcription + description: Stream audio data in chunks for real-time transcription + messages: + - $ref: "#/channels/pulseStream/messages/pulseAudioData.message" + sendEndSignal: + action: send + channel: + $ref: "#/channels/pulseStream" + summary: Send end of stream signal + description: Signal that audio streaming is complete + messages: + - $ref: "#/channels/pulseStream/messages/pulseEndSignal.message" + receiveTranscription: + action: receive + channel: + $ref: "#/channels/pulseStream" + summary: Receive transcription results + description: Get real-time transcription results as audio is processed + messages: + - $ref: "#/channels/pulseStream/messages/pulseTranscriptionResponse.message" +components: + securitySchemes: + bearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + description: Bearer token authentication using Smallest AI API key + schemas: + TranscriptionResult: + type: object + properties: + session_id: + type: string + description: Unique identifier for the transcription session + transcript: + type: string + description: Transcribed text + full_transcript: + type: string + description: Complete transcription accumulated + is_final: + type: boolean + description: Final transcription flag + is_last: + type: boolean + description: Last transcription flag + ErrorResponse: + type: object + properties: + status: + type: string + enum: + - error + error: + type: object + properties: + message: + type: string + code: + type: string diff --git a/fern/apis/waves/asyncapi/stream-tts-ws-overrides.yml b/fern/apis/waves/asyncapi/stream-tts-ws-overrides.yml index 18965a6..588471a 100644 --- a/fern/apis/waves/asyncapi/stream-tts-ws-overrides.yml +++ b/fern/apis/waves/asyncapi/stream-tts-ws-overrides.yml @@ -1,116 +1,148 @@ -# Override file for stream-tts-ws.json -# Restructures the spec to avoid $ref to #/channels/ locations which Fern doesn't support +# Override file for stream-tts-ws.yaml +# Adds unique type names to prevent collisions with other AsyncAPI files +# Maps server reference to match generators.yml environment URLs + +servers: + waves-ws: + host: api.smallest.ai + pathname: /waves/v1/streaming-tts/stream + protocol: wss + +channels: + /waves/v1/streaming-tts/stream: + x-fern-sdk-group-name: Streaming TTS + servers: + - $ref: "#/servers/waves-ws" + messages: + streamingTTSRequest.message: + name: StreamTtsRequest + streamingTTSResponse.message: + name: StreamTtsResponse + description: | + The Lightning SSE API allows you to stream text-to-speech audio in real-time. This is particularly useful for applications requiring low-latency audio generation. + + ### Connection + + Connect to the SSE endpoint: + + ```javascript + const eventSource = new EventSource( + 'https://api.smallest.ai/waves/v1/lightning/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } + ); + ``` + + ### Request Format + + Send a POST request with the following JSON structure: + + ```json + { + "text": "Your text to be converted to speech", + "voice_id": "voice_id_here", + "speed": 1, + "sample_rate": 24000 + } + ``` + + #### Parameters + + | Parameter | Type | Required | Description | + | ------------- | ------ | -------- | --------------------------------------------------- | + | `text` | string | Yes | The text to convert to speech (max 1000 characters) | + | `voice_id` | string | Yes | ID of the voice to use | + | `speed` | number | No | Speech speed multiplier (default: 1) | + | `sample_rate` | number | No | Audio sample rate in Hz (default: 24000) | + + ### Example Usage + + ```javascript + const eventSource = new EventSource( + 'https://api.smallest.ai/waves/v1/lightning/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } + ); + + // Handle incoming audio chunks + eventSource.onmessage = (event) => { + const response = JSON.parse(event.data); + + if (response.status === 'chunk') { + // Decode and play audio + const audioData = atob(response.data.audio); + // Process audio data... + } else if (response.status === 'complete' && response.done) { + console.log('All audio chunks received'); + eventSource.close(); + } else if (response.status === 'error') { + console.error('Error:', response.message); + eventSource.close(); + } + }; + + eventSource.onerror = (error) => { + console.error('SSE error:', error); + eventSource.close(); + }; + ``` + + ### Response Events + + The server will send events with the following formats: + + #### Chunk Event + + ```json + { + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "chunk", + "data": { + "audio": "base64_encoded_audio_data" + } + } + ``` + + #### Complete Event + + ```json + { + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "complete", + "message": "All chunks sent", + "done": true + } + ``` + + #### Error Event + + ```json + { + "status": "error", + "message": "Error message", + "errors": [ + /* detailed error information */ + ] + } + ``` + + ### Notes + + - The API automatically chunks long text and streams each chunk separately + - Credits are deducted based on the length of the input text + - The SSE connection will remain open until all chunks are sent or an error occurs + - For optimal performance, keep individual requests under 1000 characters -# Override operations to remove channel refs and use component message refs operations: streamingTTSRequest: - channel: null - messages: - - $ref: "#/components/messages/StreamingTTSRequest" + action: send + x-fern-sdk-method-name: synthesize_streaming streamingTTSResponse: - channel: null - messages: - - $ref: "#/components/messages/StreamingTTSResponse" - -# Add messages to components -components: - messages: - StreamingTTSRequest: - name: StreamingTTSRequest - contentType: application/json - payload: - type: object - required: - - voice_id - - text - properties: - voice_id: - type: string - description: "Voice identifier (e.g., 'aditi', 'male-1', 'female-2')" - text: - type: string - description: The text to convert to speech. Can be sent incrementally for streaming input. - is_streaming: - type: boolean - description: Indicates if this is part of a streaming text input sequence - default: false - is_final: - type: boolean - description: Indicates if this is the final chunk in a streaming sequence - default: true - language: - type: string - description: Language code for synthesis - default: en - enum: [en, hi, mr, kn, ta, bn, gu, de, fr, es, it, pl, nl, ru, ar, he] - sample_rate: - type: integer - description: Audio sample rate in Hz - default: 24000 - enum: [16000, 22050, 24000, 44100, 48000] - speed: - type: number - description: Speech speed multiplier - minimum: 0.1 - maximum: 5.0 - default: 1.0 - consistency: - type: number - description: Voice consistency parameter - minimum: 0.0 - maximum: 1.0 - default: 0.5 - enhancement: - type: integer - description: Audio enhancement level - minimum: 0 - maximum: 2 - default: 1 - similarity: - type: number - description: Voice similarity parameter - minimum: 0.0 - maximum: 1.0 - default: 0.0 - max_buffer_flush_ms: - type: integer - description: Maximum buffer time in milliseconds before forcing audio output - minimum: 0 - maximum: 5000 - default: 0 - StreamingTTSResponse: - name: StreamingTTSResponse - contentType: application/json - payload: - type: object - properties: - request_id: - type: string - description: Unique identifier for the streaming TTS request - status: - type: string - enum: [chunk, complete, error] - description: "Status: 'chunk' for audio data, 'complete' for end of stream, 'error' for failures" - data: - type: object - properties: - audio: - type: string - description: Base64-encoded PCM audio chunk - format: - type: string - description: Audio format specification - default: pcm_16bit_mono - sample_rate: - type: integer - description: Sample rate of the audio chunk - default: 24000 - error: - type: object - properties: - message: - type: string - description: Error message description - code: - type: string - description: Error code identifier + action: receive + x-fern-sdk-method-name: receive_synthesize_streaming diff --git a/fern/apis/waves/asyncapi/stream-tts-ws.yaml b/fern/apis/waves/asyncapi/stream-tts-ws.yaml index 95f14ff..fd6f7e4 100644 --- a/fern/apis/waves/asyncapi/stream-tts-ws.yaml +++ b/fern/apis/waves/asyncapi/stream-tts-ws.yaml @@ -2,46 +2,47 @@ asyncapi: 3.0.0 info: title: Waves Streaming Text-to-Speech API version: 1.0.0 - description: WebSocket-based streaming text-to-speech API with real-time synthesis + description: + WebSocket-based streaming text-to-speech API with real-time synthesis for low-latency applications contact: name: Waves API Support - url: https://waves-api.smallest.ai/support email: support@smallest.ai license: name: Proprietary servers: production: - host: waves-api.smallest.ai - pathname: /api/v1/streaming-tts/stream + host: api.smallest.ai + pathname: /waves/v1/streaming-tts/stream protocol: wss description: Production WebSocket API endpoint for streaming TTS security: - - $ref: '#/components/securitySchemes/bearerAuth' + - $ref: "#/components/securitySchemes/bearerAuth" channels: - /api/v1/streaming-tts/stream: - address: /api/v1/streaming-tts/stream + /waves/v1/streaming-tts/stream: + address: /waves/v1/streaming-tts/stream messages: - streamingTTSRequest.message: - name: StreamingTTSRequest + streamTtsRequest.message: + name: StreamTtsRequest contentType: application/json payload: type: object required: - - voice_id - - text + - voice_id + - text properties: voice_id: type: string description: Voice identifier (e.g., 'aditi', 'male-1', 'female-2') examples: - - aditi - - ryan - - male-1 - - female-2 + - aditi + - ryan + - male-1 + - female-2 text: type: string - description: The text to convert to speech. Can be sent incrementally + description: + The text to convert to speech. Can be sent incrementally for streaming input. is_streaming: type: boolean @@ -56,35 +57,36 @@ channels: description: Language code for synthesis default: en enum: - - en - - hi - - mr - - kn - - ta - - bn - - gu - - de - - fr - - es - - it - - pl - - nl - - ru - - ar - - he + - en + - hi + - mr + - kn + - ta + - bn + - gu + - de + - fr + - es + - it + - pl + - nl + - ru + - ar + - he sample_rate: type: integer description: Audio sample rate in Hz default: 24000 enum: - - 16000 - - 22050 - - 24000 - - 44100 - - 48000 + - 16000 + - 22050 + - 24000 + - 44100 + - 48000 speed: type: number - description: Speech speed multiplier (0.5 = half speed, 1.0 = normal + description: + Speech speed multiplier (0.5 = half speed, 1.0 = normal speed, 2.0 = double speed) minimum: 0.1 maximum: 5.0 @@ -109,33 +111,34 @@ channels: default: 0.0 max_buffer_flush_ms: type: integer - description: Maximum buffer time in milliseconds before forcing audio + description: + Maximum buffer time in milliseconds before forcing audio output for low-latency applications minimum: 0 maximum: 5000 default: 0 examples: - - voice_id: aditi - text: Hello world, this is a test of the Smallest AI streaming TTS. - is_streaming: false - is_final: true - language: en - sample_rate: 24000 - speed: 1.0 - consistency: 0.5 - enhancement: 1 - similarity: 0.0 - max_buffer_flush_ms: 100 - - voice_id: aditi - text: 'Streaming ' - is_streaming: true - is_final: false - language: en - sample_rate: 24000 - speed: 1.0 - max_buffer_flush_ms: 100 - streamingTTSResponse.message: - name: StreamingTTSResponse + - voice_id: aditi + text: Hello world, this is a test of the Smallest AI streaming TTS. + is_streaming: false + is_final: true + language: en + sample_rate: 24000 + speed: 1.0 + consistency: 0.5 + enhancement: 1 + similarity: 0.0 + max_buffer_flush_ms: 100 + - voice_id: aditi + text: "Streaming " + is_streaming: true + is_final: false + language: en + sample_rate: 24000 + speed: 1.0 + max_buffer_flush_ms: 100 + streamTtsResponse.message: + name: StreamTtsResponse contentType: application/json payload: type: object @@ -146,11 +149,12 @@ channels: status: type: string enum: - - chunk - - complete - - error - description: 'Status of the streaming TTS request: ''chunk'' for audio - data, ''complete'' for end of stream, ''error'' for failures' + - chunk + - complete + - error + description: + "Status of the streaming TTS request: 'chunk' for audio + data, 'complete' for end of stream, 'error' for failures" data: type: object properties: @@ -178,21 +182,23 @@ operations: streamingTTSRequest: action: receive channel: - $ref: '#/channels/~1api~1v1~1streaming-tts~1stream' + $ref: "#/channels/~1waves~1v1~1streaming-tts~1stream" summary: Send streaming text-to-speech request - description: Send text for real-time synthesis with configurable streaming parameters. + description: + Send text for real-time synthesis with configurable streaming parameters. Supports both single requests and incremental streaming input. messages: - - $ref: '#/channels/~1api~1v1~1streaming-tts~1stream/messages/streamingTTSRequest.message' + - $ref: "#/channels/~1waves~1v1~1streaming-tts~1stream/messages/streamTtsRequest.message" streamingTTSResponse: action: send channel: - $ref: '#/channels/~1api~1v1~1streaming-tts~1stream' + $ref: "#/channels/~1waves~1v1~1streaming-tts~1stream" summary: Receive streaming audio chunks - description: Receive real-time PCM audio chunks as they are synthesized. Audio + description: + Receive real-time PCM audio chunks as they are synthesized. Audio can be played immediately or saved to files. messages: - - $ref: '#/channels/~1api~1v1~1streaming-tts~1stream/messages/streamingTTSResponse.message' + - $ref: "#/channels/~1waves~1v1~1streaming-tts~1stream/messages/streamTtsResponse.message" components: securitySchemes: bearerAuth: @@ -210,7 +216,7 @@ components: status: type: string enum: - - chunk + - chunk data: type: object properties: @@ -224,11 +230,11 @@ components: type: integer description: Audio sample rate in Hz required: - - audio + - audio required: - - request_id - - status - - data + - request_id + - status + - data CompletionStatus: type: object properties: @@ -238,11 +244,11 @@ components: status: type: string enum: - - complete + - complete description: Indicates that the streaming synthesis is complete required: - - request_id - - status + - request_id + - status ErrorResponse: type: object properties: @@ -252,7 +258,7 @@ components: status: type: string enum: - - error + - error error: type: object properties: @@ -263,12 +269,12 @@ components: type: string description: Machine-readable error code required: - - message - - code + - message + - code required: - - request_id - - status - - error + - request_id + - status + - error StreamingTTSConfig: type: object description: Configuration object for streaming TTS synthesis @@ -298,4 +304,4 @@ components: type: integer description: Maximum buffer time before forcing output required: - - voice_id + - voice_id diff --git a/fern/apis/waves/generators.yml b/fern/apis/waves/generators.yml index 44a3e0b..c0f12f2 100644 --- a/fern/apis/waves/generators.yml +++ b/fern/apis/waves/generators.yml @@ -1,14 +1,33 @@ # yaml-language-server: $schema=https://schema.buildwithfern.dev/generators-yml.json +# NOTE: SDK generation for waves is now handled by the unified config (fern/apis/unified/generators.yml). +# This file only retains the API definition for reference by the unified config and docs. + api: + default-environment: Production + default-url: waves + environments: + Production: + urls: + waves: https://api.smallest.ai + waves-ws: wss://api.smallest.ai specs: # OpenAPI specs - REST endpoints - openapi: openapi/waves-api.yaml + overrides: openapi/waves-api-overrides.yaml - openapi: openapi/get-voices-openapi.yaml + overrides: openapi/get-voices-openapi-overrides.yaml - openapi: openapi/add-voice-openapi.yaml + overrides: openapi/add-voice-openapi-overrides.yaml - openapi: openapi/get-cloned-voices-openapi.yaml + overrides: openapi/get-cloned-voices-openapi-overrides.yaml - openapi: openapi/delete-cloned-voice-openapi.yaml + overrides: openapi/delete-cloned-voice-openapi-overrides.yaml - openapi: openapi/asr-openapi.yaml overrides: openapi/asr-openapi-overrides.yaml + - openapi: openapi/lightning-v3.1-openapi.yaml + overrides: openapi/lightning-v3.1-openapi-overrides.yaml + - openapi: openapi/pulse-stt-openapi.yaml + overrides: openapi/pulse-stt-openapi-overrides.yaml # AsyncAPI specs - WebSocket streaming - asyncapi: asyncapi/stream-tts-ws.yaml @@ -19,43 +38,7 @@ api: overrides: asyncapi/lightning-asr-ws-overrides.yml - asyncapi: asyncapi/lightning-v2-ws.yaml overrides: asyncapi/lightning-v2-ws-overrides.yml - -groups: - ts-sdk: - generators: - - name: fernapi/fern-typescript-node-sdk - version: 2.6.3 - output: - location: npm - package-name: smallest-ai-waves-sdk - github: - repository: fern-demo/smallest-ai-ts-sdk - config: - namespaceExport: SmallestWaves - python-sdk: - generators: - - name: fernapi/fern-python-sdk - version: 4.25.6 - output: - location: pypi - package-name: smallest-ai-waves-sdk - github: - repository: fern-demo/smallest-ai-python-sdk - config: - client: - class_name: SmallestWaves - filename: client.py - pydantic_config: - skip_validation: true - exclude_types_from_init_exports: true - go-sdk: - generators: - - name: fernapi/fern-go-sdk - version: 1.1.0 - # output: - # location: local-file-system - # path: ../../../sdks/waves/go - github: - repository: fern-demo/smallest-ai-go-sdk - config: - packageName: smallestwaves + - asyncapi: asyncapi/lightning-v3.1-ws.yaml + overrides: asyncapi/lightning-v3.1-ws-overrides.yml + - asyncapi: asyncapi/pulse-stt-ws.yaml + overrides: asyncapi/pulse-stt-ws-overrides.yml diff --git a/fern/apis/waves/openapi/add-voice-openapi-overrides.yaml b/fern/apis/waves/openapi/add-voice-openapi-overrides.yaml new file mode 100644 index 0000000..74644bb --- /dev/null +++ b/fern/apis/waves/openapi/add-voice-openapi-overrides.yaml @@ -0,0 +1,13 @@ +paths: + /waves/v1/lightning-large/add_voice: + post: + operationId: add_voice + x-fern-sdk-method-name: add_voice + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + summary: Add your Voice + description: | + Add your voice using the Waves API. diff --git a/fern/apis/waves/openapi/add-voice-openapi.yaml b/fern/apis/waves/openapi/add-voice-openapi.yaml index 150a000..cbb51ad 100644 --- a/fern/apis/waves/openapi/add-voice-openapi.yaml +++ b/fern/apis/waves/openapi/add-voice-openapi.yaml @@ -6,11 +6,11 @@ info: Currently featuring our two models, Lightning & Lightning Large, with more models coming soon. version: 1.0.0 servers: - - url: https://waves-api.smallest.ai + - url: https://api.smallest.ai description: Waves API server paths: - /api/v1/lightning-large/add_voice: + /waves/v1/lightning-large/add_voice: post: tags: - Voice Cloning diff --git a/fern/apis/waves/openapi/ai_examples_override.yml b/fern/apis/waves/openapi/ai_examples_override.yml new file mode 100644 index 0000000..edf66f6 --- /dev/null +++ b/fern/apis/waves/openapi/ai_examples_override.yml @@ -0,0 +1,137 @@ +paths: + /waves/v1/lightning-large: + delete: + x-fern-examples: + - request: + voiceId: a1b2c3d4-e5f6-7890-ab12-cd34ef567890 + response: + body: + success: true + voiceId: a1b2c3d4-e5f6-7890-ab12-cd34ef567890 + /waves/v1/lightning-large/stream: + post: + x-fern-examples: + - request: + text: Welcome to the Waves API. This is a demonstration of real-time text-to-speech streaming using the Lightning Large model. + voice_id: en_us_male_01 + /waves/v1/lightning-v2/stream: + post: + x-fern-examples: + - request: + text: Welcome to the Waves Lightning v2 text-to-speech streaming API. This service provides real-time audio synthesis for your applications. + voice_id: en_us_male_01 + /waves/v1/lightning/get_speech: + post: + x-fern-examples: + - request: + text: Welcome to the Waves text-to-speech API. This is a sample sentence to demonstrate speech synthesis. + voice_id: lightning_en_us_male_01 + sample_rate: 22050 + speed: 1 + language: en + output_format: wav + /waves/v1/lightning/get_voices: + get: + x-fern-examples: + - path-parameters: + model: lightning + response: + body: + voices: + - voiceId: wave-voice-1234 + displayName: Emma + tags: + language: + - en-US + accent: American + gender: female + /waves/v1/lightning-large/add_voice: + post: + x-fern-examples: + - request: + displayName: John Doe Voice Sample + file: john_doe_sample.wav + response: + body: + message: Voice successfully added. + data: + voiceId: a1b2c3d4-e5f6-7890-abcd-ef1234567890 + model: lightning-large-v1 + status: processing + /waves/v1/lightning-v2/get_speech: + post: + x-fern-examples: + - request: + text: Hello, welcome to the Waves text-to-speech API. This is a sample sentence to demonstrate speech synthesis. + voice_id: en_us_001 + sample_rate: 22050 + speed: 1.1 + consistency: 0.6 + similarity: 0.3 + enhancement: 1 + language: en + output_format: wav + pronunciation_dicts: + - dict_english_us + /waves/v1/lightning-large/get_cloned_voices: + get: + x-fern-examples: + - response: + body: + voices: + - displayName: Emma Johnson + accent: British English + tags: + - friendly + - professional + - clear + voiceId: voice_123abc456def + model: neural-tts-v2 + status: active + createdAt: '2024-01-15T09:30:00Z' + /waves/v1/lightning-large/get_speech: + post: + x-fern-examples: + - request: + text: Welcome to the Waves text-to-speech API. This is a sample sentence to demonstrate speech synthesis. + voice_id: en_us_001 + sample_rate: 22050 + speed: 1.1 + consistency: 0.6 + similarity: 0.3 + enhancement: 1 + language: en + output_format: wav + pronunciation_dicts: + - tech_terms_v1 + - brand_names_2024 + response: + body: UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAIA+AAACABAAZGF0YQAAAAA= + /waves/v1/pronunciation-dicts: + delete: + x-fern-examples: + - request: + id: 64f9a3b2c1d4e5f67890abcd + response: + body: + id: 64f9a3b2c1d4e5f67890abcd + deleted: true + /waves/v1/lightning-v3.1/stream: + post: + x-fern-examples: + - request: + text: Welcome to our virtual assistant. How can I help you today? + voice_id: lightning_v3_1_female_en_us_001 + /waves/v1/lightning-v3.1/get_speech: + post: + x-fern-examples: + - request: + text: Hello, welcome to our text-to-speech service. How can I assist you today? + voice_id: lightning_v3.1_en_us_female_01 + response: + body: + audio_url: https://api.example.com/audio/123e4567-e89b-12d3-a456-426614174000.mp3 + format: mp3 + sample_rate: 44100 + duration_seconds: 5.2 + success: true diff --git a/fern/apis/waves/openapi/asr-openapi-overrides.yaml b/fern/apis/waves/openapi/asr-openapi-overrides.yaml index c58d1bc..c9e3713 100644 --- a/fern/apis/waves/openapi/asr-openapi-overrides.yaml +++ b/fern/apis/waves/openapi/asr-openapi-overrides.yaml @@ -1,9 +1,18 @@ # Override file for asr-openapi.yaml # Renames the 'transcription' property to 'text' in SpeechToTextResponse +# Specifies waves server for all endpoints +# Groups under Speech to Text sub-client with method name 'lightning' +# Assigns unique operationId to avoid naming conflict with pulse endpoint +# Fixes 413/429 examples to match the merged ErrorResponse schema paths: - /api/v1/lightning/get_text: + /waves/v1/lightning/get_text: post: + x-fern-server-name: waves + x-fern-audiences: + - v4 + x-fern-sdk-method-name: transcribe_lightning + operationId: transcribe_lightning responses: "200": content: @@ -12,3 +21,19 @@ paths: properties: transcription: x-fern-property-name: text + "413": + content: + application/json: + example: + error: + code: "413" + message: "File size exceeds maximum limit of 25MB" + status: error + "429": + content: + application/json: + example: + error: + code: "429" + message: "Rate limit exceeded. Please try again later." + status: error diff --git a/fern/apis/waves/openapi/asr-openapi.yaml b/fern/apis/waves/openapi/asr-openapi.yaml index 08b50fe..8335e54 100644 --- a/fern/apis/waves/openapi/asr-openapi.yaml +++ b/fern/apis/waves/openapi/asr-openapi.yaml @@ -6,11 +6,11 @@ info: Upload audio files and receive transcribed text using the Lightning model. version: 1.0.0 servers: - - url: https://waves-api.smallest.ai + - url: https://api.smallest.ai description: Waves API server paths: - /api/v1/lightning/get_text: + /waves/v1/lightning/get_text: post: tags: - Speech to Text @@ -41,14 +41,6 @@ paths: example: url: "https://example.com/audio.mp3" parameters: - - name: model - in: query - required: true - schema: - type: string - enum: [lightning] - example: lightning - description: The ASR model to use for transcription - name: language in: query required: false diff --git a/fern/apis/waves/openapi/delete-cloned-voice-openapi-overrides.yaml b/fern/apis/waves/openapi/delete-cloned-voice-openapi-overrides.yaml new file mode 100644 index 0000000..373438b --- /dev/null +++ b/fern/apis/waves/openapi/delete-cloned-voice-openapi-overrides.yaml @@ -0,0 +1,12 @@ +paths: + /waves/v1/lightning-large: + delete: + operationId: delete_voice + x-fern-sdk-method-name: delete_voice + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + summary: Delete Cloned Voice + description: Delete a cloned voice using the new Waves API. diff --git a/fern/apis/waves/openapi/delete-cloned-voice-openapi.yaml b/fern/apis/waves/openapi/delete-cloned-voice-openapi.yaml index d152ead..6647fee 100644 --- a/fern/apis/waves/openapi/delete-cloned-voice-openapi.yaml +++ b/fern/apis/waves/openapi/delete-cloned-voice-openapi.yaml @@ -6,11 +6,11 @@ info: Currently featuring our two models, Lightning & Lightning Large, with more models coming soon. version: 1.0.0 servers: - - url: https://waves-api.smallest.ai + - url: https://api.smallest.ai description: Waves API server paths: - /api/v1/lightning-large: + /waves/v1/lightning-large: delete: tags: - Voice Cloning diff --git a/fern/apis/waves/openapi/get-cloned-voices-openapi-overrides.yaml b/fern/apis/waves/openapi/get-cloned-voices-openapi-overrides.yaml new file mode 100644 index 0000000..3ae712c --- /dev/null +++ b/fern/apis/waves/openapi/get-cloned-voices-openapi-overrides.yaml @@ -0,0 +1,12 @@ +paths: + /waves/v1/lightning-large/get_cloned_voices: + get: + operationId: get_cloned_voices + x-fern-sdk-method-name: get_cloned_voices + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + summary: Get your cloned Voices + description: Retrieve your cloned voices. diff --git a/fern/apis/waves/openapi/get-cloned-voices-openapi.yaml b/fern/apis/waves/openapi/get-cloned-voices-openapi.yaml index b1c2081..b5d04e2 100644 --- a/fern/apis/waves/openapi/get-cloned-voices-openapi.yaml +++ b/fern/apis/waves/openapi/get-cloned-voices-openapi.yaml @@ -6,11 +6,11 @@ info: Currently featuring our voice cloning model, Lightning Large. version: 1.0.0 servers: - - url: https://waves-api.smallest.ai + - url: https://api.smallest.ai description: Waves API server paths: - /api/v1/lightning-large/get_cloned_voices: + /waves/v1/lightning-large/get_cloned_voices: get: tags: - Voice Cloning diff --git a/fern/apis/waves/openapi/get-voices-openapi-overrides.yaml b/fern/apis/waves/openapi/get-voices-openapi-overrides.yaml new file mode 100644 index 0000000..3a4901e --- /dev/null +++ b/fern/apis/waves/openapi/get-voices-openapi-overrides.yaml @@ -0,0 +1,25 @@ +# Override file for get-voices-openapi.yaml +# Adds lightning-v3.1 to the model enum so SDK users can discover voices for that model +# Specifies waves server for all endpoints + +paths: + /waves/v1/{model}/get_voices: + get: + operationId: get_voices + x-fern-sdk-method-name: get_voices + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + description: Get voices supported for a given model using the new Waves API. + parameters: + - in: path + name: model + schema: + enum: + - lightning + - lightning-large + - lightning-v2 + - lightning-v3.1 + default: lightning-v3.1 diff --git a/fern/apis/waves/openapi/get-voices-openapi.yaml b/fern/apis/waves/openapi/get-voices-openapi.yaml index a8f3783..882515f 100644 --- a/fern/apis/waves/openapi/get-voices-openapi.yaml +++ b/fern/apis/waves/openapi/get-voices-openapi.yaml @@ -6,11 +6,11 @@ info: Currently featuring our two models, Lightning & Lightning Large, with more models coming soon. version: 1.0.0 servers: - - url: https://waves-api.smallest.ai + - url: https://api.smallest.ai description: Waves API server paths: - /api/v1/{model}/get_voices: + /waves/v1/{model}/get_voices: get: tags: - Voices @@ -23,7 +23,8 @@ paths: required: true schema: type: string - enum: [lightning, lightning-large, lightning-v2] + enum: [lightning, lightning-large, lightning-v2, lightning-v3.1] + default: lightning-v3.1 description: The model to use for speech synthesis. responses: '200': diff --git a/fern/apis/waves/openapi/lightning-v3.1-openapi-overrides.yaml b/fern/apis/waves/openapi/lightning-v3.1-openapi-overrides.yaml new file mode 100644 index 0000000..4a78443 --- /dev/null +++ b/fern/apis/waves/openapi/lightning-v3.1-openapi-overrides.yaml @@ -0,0 +1,44 @@ +# Override file for lightning-v3.1-openapi.yaml +# Changes tag to Text to Speech for proper grouping +# Fixes output_format enum and sample_rate enum to match backend +# Specifies waves server for all endpoints + +paths: + /waves/v1/lightning-v3.1/get_speech: + post: + operationId: synthesize_lightning_v3_1 + x-fern-sdk-method-name: synthesize_lightning_v3_1 + x-fern-server-name: waves + x-fern-audiences: + - v4 + tags: + - Text to Speech + summary: Generate speech (Lightning v3.1) + /waves/v1/lightning-v3.1/stream: + post: + operationId: synthesize_sse_lightning_v3_1 + x-fern-sdk-method-name: synthesize_sse_lightning_v3_1 + x-fern-server-name: waves + x-fern-audiences: + - v4 + tags: + - Text to Speech + summary: Stream speech (Lightning v3.1 SSE) + +components: + schemas: + LightningV31Request: + properties: + # The following are the correct enum values for the output_format + output_format: + enum: + - mp3 + - pcm + - wav + - ulaw + - alaw + sample_rate: + enum: + - 8000 + - 16000 + - 24000 diff --git a/fern/apis/waves/openapi/lightning-v3.1-openapi.yaml b/fern/apis/waves/openapi/lightning-v3.1-openapi.yaml new file mode 100644 index 0000000..1b66e79 --- /dev/null +++ b/fern/apis/waves/openapi/lightning-v3.1-openapi.yaml @@ -0,0 +1,236 @@ +openapi: 3.0.1 +info: + title: Lightning V3.1 API + description: | + API for the Lightning V3.1 text-to-speech model. + Features improved speech synthesis with support for English and Hindi languages. + version: 3.1.0 +servers: + - url: https://api.smallest.ai + description: Waves API server + +paths: + /waves/v1/lightning-v3.1/get_speech: + post: + tags: + - Lightning V3.1 + operationId: synthesizeLightningV31Speech + summary: Generate speech from text (Lightning V3.1) + description: | + Converts provided text to speech using the Lightning V3.1 model. + Send `Accept: audio/wav` to receive binary WAV audio. Omitting it may result in empty or unplayable responses. + parameters: + - name: Accept + in: header + required: true + schema: + type: string + enum: [audio/wav] + default: audio/wav + description: Must be `audio/wav` to receive binary audio. Required for proper playback. + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/LightningV31Request" + example: + text: "Hey i am your a text to speech model" + voice_id: "daniel" + output_format: "wav" + sample_rate: 44100 + speed: 1.0 + responses: + "200": + description: Synthesized speech retrieved successfully. + content: + audio/wav: + schema: + type: string + format: binary + description: A PCM int16 WAV file at the specified sample rate. + "400": + description: Bad request. + content: + application/json: + schema: + type: object + properties: + error: + type: string + description: Error type. + message: + type: string + description: Error message. + example: + error: "InvalidRequest" + message: "The 'text' field is required." + "401": + description: Unauthorized. + content: + application/json: + schema: + type: object + properties: + error: + type: string + description: Error type. + message: + type: string + description: Error message. + example: + error: "Unauthorized" + message: "Bearer token is missing or invalid." + "500": + description: Server error occurred. + content: + application/json: + schema: + type: object + properties: + error: + type: string + description: Error type. + message: + type: string + description: Error message. + example: + error: "InternalServerError" + message: "An unexpected error occurred." + + /waves/v1/lightning-v3.1/stream: + post: + tags: + - Lightning V3.1 + operationId: streamLightningV31Speech + summary: Stream speech from text (Lightning V3.1) + description: Converts provided text to speech using the Lightning V3.1 model with streaming response. + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/LightningV31Request" + responses: + "200": + description: Synthesized speech retrieved successfully. + content: + text/event-stream: + example: + data: | + event: chunk + data: + done: false + + "400": + description: Bad request. + content: + application/json: + schema: + type: object + properties: + error: + type: string + description: Error type. + message: + type: string + description: Error message. + example: + error: "InvalidRequest" + message: "The 'text' field is required." + "401": + description: Unauthorized. + content: + application/json: + schema: + type: object + properties: + error: + type: string + description: Error type. + message: + type: string + description: Error message. + example: + error: "Unauthorized" + message: "Bearer token is missing or invalid." + "500": + description: Server error occurred. + content: + application/json: + schema: + type: object + properties: + error: + type: string + description: Error type. + message: + type: string + description: Error message. + example: + error: "InternalServerError" + message: "An unexpected error occurred." + +components: + schemas: + LightningV31Request: + type: object + required: + - text + - voice_id + properties: + text: + type: string + description: The text to convert to speech. + default: "Hey i am your a text to speech model" + voice_id: + type: string + description: The voice identifier to use for speech generation. + default: "daniel" + sample_rate: + type: integer + description: The sample rate for the generated audio. + enum: + - 8000 + - 16000 + - 24000 + - 44100 + default: 44100 + speed: + type: number + description: The speed of the generated speech. + minimum: 0.5 + maximum: 2 + default: 1.0 + language: + type: string + description: Determines how numbers are spelled out. If set to 'en', numbers will be read in English. If set to 'hi', numbers will be read in Hindi. + default: "en" + enum: + - en + - hi + output_format: + type: string + description: The format of the output audio. + default: "pcm" + enum: + - pcm + - mp3 + - wav + - mulaw + pronunciation_dicts: + type: array + items: + type: string + description: The ID of the pronunciation dictionary to use for speech generation. + description: The IDs of the pronunciation dictionaries to use for speech generation. + + securitySchemes: + bearerAuth: + type: http + scheme: bearer + bearerFormat: JWT diff --git a/fern/apis/waves/openapi/pulse-stt-openapi-overrides.yaml b/fern/apis/waves/openapi/pulse-stt-openapi-overrides.yaml new file mode 100644 index 0000000..893d46f --- /dev/null +++ b/fern/apis/waves/openapi/pulse-stt-openapi-overrides.yaml @@ -0,0 +1,36 @@ +# Override file for pulse-stt-openapi.yaml +# Ensures proper grouping under Speech to Text section +# Specifies waves server for all endpoints +# Assigns unique operationId to avoid naming conflict with lightning endpoint +# Fixes 413/429 examples to match the merged ErrorResponse schema +# (the REST ErrorResponse gets merged with the AsyncAPI ErrorResponse, +# so error must be an object with code/message, not a plain string) + +paths: + /waves/v1/pulse/get_text: + post: + x-fern-server-name: waves + x-fern-audiences: + - v4 + x-fern-sdk-method-name: transcribe_pulse + operationId: transcribe_pulse + tags: + - Speech to Text + summary: Pulse (Pre-Recorded) + responses: + "413": + content: + application/json: + example: + error: + code: "413" + message: "File size exceeds maximum limit of 25MB" + status: error + "429": + content: + application/json: + example: + error: + code: "429" + message: "Rate limit exceeded. Please try again later." + status: error diff --git a/fern/apis/waves/openapi/pulse-stt-openapi.yaml b/fern/apis/waves/openapi/pulse-stt-openapi.yaml new file mode 100644 index 0000000..a0153dc --- /dev/null +++ b/fern/apis/waves/openapi/pulse-stt-openapi.yaml @@ -0,0 +1,337 @@ +openapi: 3.0.1 +info: + title: Pulse ASR API + description: | + API for speech-to-text conversion using the Pulse ASR model. + Upload audio files and receive transcribed text using the Pulse model. + version: 1.0.0 +servers: + - url: https://api.smallest.ai + description: Waves API server + +paths: + /waves/v1/pulse/get_text: + post: + tags: + - Speech to Text + summary: Convert speech to text + description: Convert speech to text using the Pulse ASR model. Supports two input methods - raw audio bytes (application/octet-stream) with query parameters, or audio URL (application/json) with URL in body. + operationId: speechToText + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/octet-stream: + schema: + type: string + format: binary + description: Raw audio bytes. Content-Type header should specify the audio format (e.g., audio/wav, audio/mp3). All parameters are passed as query parameters. + application/json: + schema: + type: object + properties: + url: + type: string + format: uri + description: URL to the audio file to transcribe. Must be publicly accessible + example: "https://example.com/audio.mp3" + required: + - url + example: + url: "https://example.com/audio.mp3" + parameters: + - name: language + in: query + required: false + schema: + type: string + enum: + [ + "it", + "es", + "en", + "pt", + "hi", + "de", + "fr", + "uk", + "ru", + "kn", + "ml", + "pl", + "mr", + "gu", + "cs", + "sk", + "te", + "or", + "nl", + "bn", + "lv", + "et", + "ro", + "pa", + "fi", + "sv", + "bg", + "ta", + "hu", + "da", + "lt", + "mt", + "multi", + ] + default: en + description: Language of the audio file. Use `multi` for automatic language detection + - name: webhook_url + in: query + required: false + schema: + type: string + format: uri + description: URL to the webhook to receive the transcription results + example: "https://example.com/webhook" + - name: webhook_extra + in: query + required: false + schema: + type: string + description: Extra parameters to pass to the transcription. These will be added to the request body as a JSON object. Add comma separated key-value pairs to the query string. eg "custom_key:custom_value,custom_key2:custom_value2" + example: "custom_key:custom_value,custom_key2:custom_value2" + - name: word_timestamps + in: query + required: false + schema: + type: boolean + default: false + description: Whether to include word and utterance level timestamps in the response + - name: diarize + in: query + required: false + schema: + type: boolean + default: false + description: Whether to perform speaker diarization + - name: age_detection + in: query + required: false + schema: + type: string + enum: ["true", "false"] + default: "false" + description: Whether to predict age group of the speaker + - name: gender_detection + in: query + required: false + schema: + type: string + enum: ["true", "false"] + default: "false" + description: Whether to predict the gender of the speaker + - name: emotion_detection + in: query + required: false + schema: + type: string + enum: ["true", "false"] + default: "false" + description: Whether to predict speaker emotions + responses: + "200": + description: Speech transcribed successfully + content: + application/json: + schema: + type: object + properties: + status: + type: string + description: Status of the transcription request + example: success + transcription: + type: string + description: The transcribed text from the audio file + example: "Hello world." + audio_length: + type: number + description: Duration of the audio file in seconds + example: 1.7 + words: + type: array + description: Word-level timestamps in seconds. + items: + type: object + properties: + start: + type: number + example: 0.0 + end: + type: number + example: 0.5 + speaker: + type: string + description: Speaker if diarization is enabled + example: "speaker_0" + word: + type: string + example: "Hello" + utterances: + type: array + description: List of utterances with start and end times + items: + type: object + properties: + text: + type: string + example: "Hello world." + start: + type: number + example: 0.0 + end: + type: number + example: 0.9 + speaker: + type: string + description: Speaker if diarization is enabled + example: "speaker_0" + age: + type: string + description: Predicted age group of the speaker (e.g., infant, teenager, adult, old) + example: adult + enum: [infant, teenager, adult, old] + gender: + type: string + description: Predicted gender of the speaker if requested + example: male + enum: [male, female] + emotions: + type: object + description: Predicted emotions of the speaker if requested + properties: + happiness: + type: number + format: float + example: 0.8 + sadness: + type: number + format: float + example: 0.15 + disgust: + type: number + format: float + example: 0.02 + fear: + type: number + format: float + example: 0.03 + anger: + type: number + format: float + example: 0.05 + metadata: + type: object + description: Metadata about the transcription + properties: + filename: + type: string + description: Name of the audio file + example: "audio.mp3" + duration: + type: number + description: Duration of the audio file in minutes + example: 1.7 + fileSize: + type: number + description: Size of the audio file in bytes + example: 1000000 + example: + status: "success" + transcription: "Hello world." + words: + - start: 0.0 + end: 0.5 + speaker: "speaker_0" + word: "Hello" + - start: 0.6 + end: 0.9 + speaker: "speaker_0" + word: "world." + utterances: + - text: "Hello world." + start: 0.0 + end: 0.9 + speaker: "speaker_0" + age: adult + gender: "male" + emotions: + happiness: 0.8 + sadness: 0.15 + disgust: 0.02 + fear: 0.03 + anger: 0.05 + metadata: + filename: "audio.mp3" + duration: 1.7 + fileSize: 1000000 + "400": + description: Bad request - Invalid parameters or file format + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + error: "Invalid file format. Supported formats: audio/*" + "401": + description: Unauthorized - Invalid or missing authentication + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + error: "Unauthorized - Invalid API key" + "413": + description: Payload too large - File size exceeds limit + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + error: "File size exceeds maximum limit of 25MB" + "429": + description: Too many requests - Rate limit exceeded + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + error: "Rate limit exceeded. Please try again later." + "500": + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/ErrorResponse" + example: + error: "Internal server error" + +components: + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + description: | + API key authentication using Bearer token format. + Include your API key in the Authorization header as: `Bearer YOUR_API_KEY` + + schemas: + ErrorResponse: + type: object + properties: + error: + type: string + description: Error message describing what went wrong + required: + - error + diff --git a/fern/apis/waves/openapi/waves-api-overrides.yaml b/fern/apis/waves/openapi/waves-api-overrides.yaml new file mode 100644 index 0000000..58fe400 --- /dev/null +++ b/fern/apis/waves/openapi/waves-api-overrides.yaml @@ -0,0 +1,190 @@ +# Override file for waves-api.yaml +# Reorganizes tags to match desired section structure +# Fixes output_format enum to match backend (pcm, wav, ulaw, alaw instead of pcm, mp3, wav, mulaw) +# Specifies waves server for all endpoints + +paths: + /waves/v1/pronunciation-dicts: + get: + operationId: get_pronunciation_dicts + x-fern-sdk-method-name: get_pronunciation_dicts + x-fern-server-name: waves + x-fern-audiences: + - v4 + tags: + - Pronunciation Dictionaries + post: + operationId: create_pronunciation_dict + x-fern-sdk-method-name: create_pronunciation_dict + x-fern-server-name: waves + x-fern-audiences: + - v4 + tags: + - Pronunciation Dictionaries + put: + operationId: update_pronunciation_dict + x-fern-sdk-method-name: update_pronunciation_dict + x-fern-server-name: waves + x-fern-audiences: + - v4 + tags: + - Pronunciation Dictionaries + delete: + operationId: delete_pronunciation_dict + x-fern-sdk-method-name: delete_pronunciation_dict + x-fern-server-name: waves + x-fern-audiences: + - v4 + tags: + - Pronunciation Dictionaries + /waves/v1/lightning/get_speech: + post: + operationId: synthesize_lightning + x-fern-sdk-method-name: synthesize_lightning + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + tags: + - Lightning + summary: Text to speech + description: Get speech for given text using the Waves API + requestBody: + content: + application/json: + schema: + properties: + output_format: + enum: + - pcm + - wav + - ulaw + - alaw + /waves/v1/lightning-large/get_speech: + post: + operationId: synthesize_lightning_large + x-fern-sdk-method-name: synthesize_lightning_large + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + tags: + - Lightning Large + summary: Text to Speech + description: Get speech for given text using the Waves API + requestBody: + content: + application/json: + schema: + properties: + output_format: + enum: + - pcm + - wav + - ulaw + - alaw + /waves/v1/lightning-large/stream: + post: + operationId: synthesize_sse_lightning_large + x-fern-sdk-method-name: synthesize_sse_lightning_large + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + tags: + - Lightning Large + summary: Text to Speech (SSE) + description: | + The Lightning-Large SSE API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. + + ## When to Use + + - **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses + - **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays + - **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency + - **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + + ## How It Works + + 1. **Make a POST Request**: Send your text and voice settings to the API endpoint + 2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size + 3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially + 4. **End of Stream**: The API sends a completion event when all audio has been delivered + requestBody: + content: + application/json: + schema: + properties: + output_format: + enum: + - pcm + - wav + - ulaw + - alaw + /waves/v1/lightning-v2/get_speech: + post: + operationId: synthesize_lightning_v2 + x-fern-sdk-method-name: synthesize_lightning_v2 + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + tags: + - Lightning v2 + summary: Text to Speech + description: Get speech for given text using the Waves API + requestBody: + content: + application/json: + schema: + properties: + output_format: + enum: + - pcm + - wav + - ulaw + - alaw + /waves/v1/lightning-v2/stream: + post: + operationId: synthesize_sse_lightning_v2 + x-fern-sdk-method-name: synthesize_sse_lightning_v2 + x-fern-server-name: waves + x-fern-audiences: + - v2 + - v3 + - v4 + tags: + - Lightning v2 + summary: Text to Speech (SSE) + description: | + The Lightning v2 SSE API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. + For an end-to-end example of how to use the Lightning v2 SSE API, check out [Text to Speech (SSE) Example](https://github.com/smallest-inc/waves-examples/blob/main/lightning_v2/http_streaming/http_streaming_api.py) + + ## When to Use + + - **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses + - **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays + - **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency + - **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + + ## How It Works + + 1. **Make a POST Request**: Send your text and voice settings to the API endpoint + 2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size + 3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially + 4. **End of Stream**: The API sends a completion event when all audio has been delivered + requestBody: + content: + application/json: + schema: + properties: + output_format: + enum: + - pcm + - wav + - ulaw + - alaw diff --git a/fern/apis/waves/openapi/waves-api.yaml b/fern/apis/waves/openapi/waves-api.yaml index 3d1eba2..b0d0660 100644 --- a/fern/apis/waves/openapi/waves-api.yaml +++ b/fern/apis/waves/openapi/waves-api.yaml @@ -6,11 +6,11 @@ info: Currently featuring our two models, Lightning & Lightning Large, with more models coming soon. version: 1.0.0 servers: - - url: https://waves-api.smallest.ai + - url: https://api.smallest.ai description: Waves API server paths: - /api/v1/pronunciation-dicts: + /waves/v1/pronunciation-dicts: get: summary: Get pronunciation dictionaries description: Retrieve all pronunciation dictionaries for the authenticated user @@ -217,7 +217,7 @@ paths: $ref: "#/components/schemas/ErrorResponse" example: error: "Internal server error" - /api/v1/lightning/get_speech: + /waves/v1/lightning/get_speech: post: tags: - Lightning @@ -290,7 +290,7 @@ paths: error: "InternalServerError" message: "An unexpected error occurred." - /api/v1/lightning-large/get_speech: + /waves/v1/lightning-large/get_speech: post: tags: - Lightning Large @@ -363,7 +363,7 @@ paths: error: "InternalServerError" message: "An unexpected error occurred." - /api/v1/lightning-v2/get_speech: + /waves/v1/lightning-v2/get_speech: post: tags: - Lightning v2 @@ -436,7 +436,7 @@ paths: error: "InternalServerError" message: "An unexpected error occurred." - /api/v1/lightning-large/stream: + /waves/v1/lightning-large/stream: post: tags: - Lightning Large @@ -511,7 +511,7 @@ paths: error: "InternalServerError" message: "An unexpected error occurred." - /api/v1/lightning-v2/stream: + /waves/v1/lightning-v2/stream: post: tags: - Lightning v2 @@ -828,9 +828,11 @@ components: text: type: string description: The text to convert to speech. + default: "Hey i am your a text to speech model" voice_id: type: string description: The voice identifier to use for speech generation. + default: "malcom" sample_rate: type: integer description: The sample rate for the generated audio. diff --git a/fern/docs.yml b/fern/docs.yml index 3258f8e..f3765e4 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -2,6 +2,11 @@ instances: - url: smallest-ai.docs.buildwithfern.com + custom-domain: docs.smallest.ai + +redirects: + - source: "/context7.json" + destination: "https://raw.githubusercontent.com/fern-demo/smallest-ai-fern-config/main/fern/docs/assets/context7.json" title: Smallest AI Docs @@ -9,35 +14,27 @@ ai-search: location: - docs +css: + - docs/assets/styles/global-styling.css + layout: searchbar-placement: header page-width: full tabs-placement: header -# Product switcher configuration products: - - display-name: Atoms + - display-name: Voice Agents path: ./products/atoms.yml icon: fa-solid fa-robot slug: atoms - subtitle: AI Voice Agents Platform - - - display-name: Waves + - display-name: Models path: ./products/waves/versions/v4.0.0.yml icon: fa-solid fa-waveform-lines slug: waves - subtitle: Text-to-Speech & Speech-to-Text APIs - versions: - - display-name: v4.0.0 - path: ./products/waves/versions/v4.0.0.yml - - display-name: v3.0.1 - path: ./products/waves/versions/v3.0.1.yml - - display-name: v2.2.0 - path: ./products/waves/versions/v2.2.0.yml colors: accentPrimary: - dark: '#083b4d' + dark: '#2A9D8F' light: '#083b4d' background: dark: '#000000' @@ -46,10 +43,14 @@ colors: theme: page-actions: toolbar footer-nav: minimal + product-switcher: toggle + logo: dark: docs/assets/logo_light.png light: docs/assets/logo_dark.png - height: 30 + height: 51 favicon: docs/assets/favicon.svg + +default-language: python diff --git a/fern/docs/assets/context7.json b/fern/docs/assets/context7.json new file mode 100644 index 0000000..0575c0c --- /dev/null +++ b/fern/docs/assets/context7.json @@ -0,0 +1,4 @@ +{ + "url": "https://context7.com/llmstxt/smallest_ai_llms_txt", + "public_key": "pk_58nlANd9CKqswE2DosbzA" +} diff --git a/fern/docs/assets/images/agent-dashboard-conversions.png b/fern/docs/assets/images/agent-dashboard-conversions.png new file mode 100644 index 0000000..edf540d Binary files /dev/null and b/fern/docs/assets/images/agent-dashboard-conversions.png differ diff --git a/fern/docs/assets/images/agent-dashboard.png b/fern/docs/assets/images/agent-dashboard.png new file mode 100644 index 0000000..888291a Binary files /dev/null and b/fern/docs/assets/images/agent-dashboard.png differ diff --git a/fern/docs/assets/images/checks-passed.png b/fern/docs/assets/images/checks-passed.png new file mode 100644 index 0000000..3303c77 Binary files /dev/null and b/fern/docs/assets/images/checks-passed.png differ diff --git a/fern/docs/assets/images/conversions-list.png b/fern/docs/assets/images/conversions-list.png new file mode 100644 index 0000000..aa46a94 Binary files /dev/null and b/fern/docs/assets/images/conversions-list.png differ diff --git a/fern/docs/assets/images/create-audience.png b/fern/docs/assets/images/create-audience.png new file mode 100644 index 0000000..e1e062d Binary files /dev/null and b/fern/docs/assets/images/create-audience.png differ diff --git a/fern/docs/assets/images/create-campaign.png b/fern/docs/assets/images/create-campaign.png new file mode 100644 index 0000000..a7efb9d Binary files /dev/null and b/fern/docs/assets/images/create-campaign.png differ diff --git a/fern/docs/assets/images/create-conversion.png b/fern/docs/assets/images/create-conversion.png new file mode 100644 index 0000000..2e95316 Binary files /dev/null and b/fern/docs/assets/images/create-conversion.png differ diff --git a/fern/docs/assets/images/download.svg b/fern/docs/assets/images/download.svg new file mode 100644 index 0000000..f41f94e --- /dev/null +++ b/fern/docs/assets/images/download.svg @@ -0,0 +1,3 @@ + + + diff --git a/fern/docs/assets/images/hero-dark.svg b/fern/docs/assets/images/hero-dark.svg new file mode 100644 index 0000000..c6a30e8 --- /dev/null +++ b/fern/docs/assets/images/hero-dark.svg @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/docs/assets/images/hero-light.svg b/fern/docs/assets/images/hero-light.svg new file mode 100644 index 0000000..297d68f --- /dev/null +++ b/fern/docs/assets/images/hero-light.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/docs/assets/images/ivc-image-1.png b/fern/docs/assets/images/ivc-image-1.png new file mode 100644 index 0000000..5c18503 Binary files /dev/null and b/fern/docs/assets/images/ivc-image-1.png differ diff --git a/fern/docs/assets/images/ivc-image-2.png b/fern/docs/assets/images/ivc-image-2.png new file mode 100644 index 0000000..b4d3f32 Binary files /dev/null and b/fern/docs/assets/images/ivc-image-2.png differ diff --git a/fern/docs/assets/images/ivc-image-3.png b/fern/docs/assets/images/ivc-image-3.png new file mode 100644 index 0000000..0cf238e Binary files /dev/null and b/fern/docs/assets/images/ivc-image-3.png differ diff --git a/fern/docs/assets/images/ivc-image-4.png b/fern/docs/assets/images/ivc-image-4.png new file mode 100644 index 0000000..5ecf5d6 Binary files /dev/null and b/fern/docs/assets/images/ivc-image-4.png differ diff --git a/fern/docs/assets/images/lightning_cover.png b/fern/docs/assets/images/lightning_cover.png new file mode 100644 index 0000000..acaebb8 Binary files /dev/null and b/fern/docs/assets/images/lightning_cover.png differ diff --git a/fern/docs/assets/images/pvc_page.png b/fern/docs/assets/images/pvc_page.png new file mode 100644 index 0000000..8bf1c5e Binary files /dev/null and b/fern/docs/assets/images/pvc_page.png differ diff --git a/fern/docs/assets/images/save-campaign.png b/fern/docs/assets/images/save-campaign.png new file mode 100644 index 0000000..972b0df Binary files /dev/null and b/fern/docs/assets/images/save-campaign.png differ diff --git a/fern/docs/assets/images/smallest_cover.jpeg b/fern/docs/assets/images/smallest_cover.jpeg new file mode 100644 index 0000000..2f584f4 Binary files /dev/null and b/fern/docs/assets/images/smallest_cover.jpeg differ diff --git a/fern/docs/assets/images/started-campaign.png b/fern/docs/assets/images/started-campaign.png new file mode 100644 index 0000000..0c1857a Binary files /dev/null and b/fern/docs/assets/images/started-campaign.png differ diff --git a/fern/docs/assets/images/test-agent.png b/fern/docs/assets/images/test-agent.png new file mode 100644 index 0000000..f90fbec Binary files /dev/null and b/fern/docs/assets/images/test-agent.png differ diff --git a/fern/docs/assets/images/thunder.png b/fern/docs/assets/images/thunder.png new file mode 100644 index 0000000..2a64ee9 Binary files /dev/null and b/fern/docs/assets/images/thunder.png differ diff --git a/fern/docs/assets/images/thunder.svg b/fern/docs/assets/images/thunder.svg new file mode 100644 index 0000000..35f6a0e --- /dev/null +++ b/fern/docs/assets/images/thunder.svg @@ -0,0 +1,26 @@ + + + + + + + + + + image/svg+xml + + + + + Openclipart + + + + + + + + + + + diff --git a/fern/docs/assets/logo.png b/fern/docs/assets/logo.png deleted file mode 100644 index bd387f8..0000000 Binary files a/fern/docs/assets/logo.png and /dev/null differ diff --git a/fern/docs/assets/logo_dark.png b/fern/docs/assets/logo_dark.png index 09db854..d32834a 100644 Binary files a/fern/docs/assets/logo_dark.png and b/fern/docs/assets/logo_dark.png differ diff --git a/fern/docs/assets/logo_light.png b/fern/docs/assets/logo_light.png index e2d546b..1434d9f 100644 Binary files a/fern/docs/assets/logo_light.png and b/fern/docs/assets/logo_light.png differ diff --git a/fern/docs/assets/platform/building-agents/images/README.md b/fern/docs/assets/platform/building-agents/images/README.md new file mode 100644 index 0000000..bac4bf9 --- /dev/null +++ b/fern/docs/assets/platform/building-agents/images/README.md @@ -0,0 +1,178 @@ +# Building Agents — Image Guide + +Drop screenshots and diagrams here. Name them exactly as listed below. + +--- + +## Overview Page + +| Filename | What to capture | +|----------|-----------------| +| `create-agent-modal.png` | The "Create a new agent" modal showing all 3 options (Start from scratch, Start with Template, Create with AI) | + +--- + +## Single Prompt Section + +| Filename | What to capture | +|----------|-----------------| +| `sp-editor-full.png` | Full Single Prompt editor with a sample prompt filled in. Show top bar, prompt area, right sidebar. | +| `sp-editor-blank.png` | Blank Single Prompt editor (fresh agent) | +| `sp-test-modal.png` | "Test Agent" modal showing the 3 test modes | +| `sp-voice-picker.png` | Voice picker panel with filters visible | +| `sp-config-panel.png` | Right sidebar configuration panel with toggles visible | + +--- + +## Conversational Flow Section + +| Filename | What to capture | +|----------|-----------------| +| `cf-builder-full.png` | Workflow builder with a sample flow (5-7 nodes connected) | +| `cf-builder-blank.png` | Empty workflow builder canvas | +| `cf-node-palette.png` | Left panel node palette showing all node types | +| `cf-node-config.png` | A node selected with its configuration panel open | +| `cf-branch-example.png` | A node with multiple branches/conditions visible | + +--- + +## Create with AI + +| Filename | What to capture | +|----------|-----------------| +| `ai-create-full.png` | Full Create with AI page (left config panel + right prompts) | +| `ai-create-loading.png` | The loading modal ("Hold up, your agent is getting ready") | +| `ai-templates.png` | Template tabs visible (Real Estate, Credit Card, etc.) | + +--- + +## Configuring Section + +| Filename | What to capture | +|----------|-----------------| +| `config-editor-overview.png` | Full editor with all areas labeled/annotated | +| `config-model-dropdown.png` | Model selection dropdown expanded | +| `config-voice-picker.png` | Voice picker panel open | +| `end-call-modal.png` | Add End Call modal (Name + Description fields) | +| `transfer-call-modal.png` | Transfer Call configuration modal | +| `transfer-call-warm.png` | Warm transfer options (expanded modal) | +| `variables-user.png` | User Defined variables tab | +| `variables-system.png` | System variables tab | +| `variables-api.png` | API variables tab | +| `api-call-basic.png` | API Call modal - top section (Name, Description, LLM Params, URL, Method, Timeout) | +| `api-call-advanced.png` | API Call modal - bottom section (Headers, Query Params, Response Variable Extraction) | +| `config-webhook-modal.png` | Webhook configuration modal | + +### Voice Settings + +| Filename | What to capture | +|----------|-----------------| +| `voice-settings.png` | Full Voice tab with all sections visible | +| `add-pronunciation.png` | Add Pronunciation modal (Word + Pronunciation fields) | + +### Model Settings + +| Filename | What to capture | +|----------|-----------------| +| `model-settings-sp.png` | Model tab for Single Prompt agent | +| `model-settings-cf.png` | Model tab for Conversational Flow agent (shows Global Prompt + KB) | + +### Phone Number + +| Filename | What to capture | +|----------|-----------------| +| `phone-number-settings.png` | Phone Number tab showing dropdown | + +### General Settings + +| Filename | What to capture | +|----------|-----------------| +| `general-settings.png` | General tab showing 3 timeout fields | + +### Widget + +| Filename | What to capture | +|----------|-----------------| +| `widget-embed.png` | Embed code section | +| `widget-mode.png` | Mode selection (Chat/Voice) | +| `widget-allowlist.png` | Allowlist configuration | +| `widget-tiny.png` | Widget in Tiny variant | +| `widget-compact.png` | Widget in Compact variant | +| `widget-full.png` | Widget in Full variant | + +### Post-Call Metrics + +| Filename | What to capture | +|----------|-----------------| +| `post-call-metrics-list.png` | Landing page showing list of configured metrics | +| `post-call-disposition.png` | Disposition Metrics form (creating from scratch) | +| `post-call-templates.png` | Templates selection panel | + +--- + +## Testing & Launch + +| Filename | What to capture | +|----------|-----------------| +| `test-webcall.png` | Web Call test interface | +| `test-telephony.png` | Telephony test interface | +| `test-chat.png` | Chat test interface | +| `lock-agent.png` | Lock Agent toggle | +| `convo-logs-list.png` | Call Logs landing page with list of calls | +| `convo-logs-overview.png` | Call details - Overview tab | +| `convo-logs-transcript.png` | Call details - Transcript tab | +| `convo-logs-events.png` | Call details - Events tab | +| `convo-logs-metrics.png` | Call details - Metrics tab | + +--- + +## Deployment — Phone Numbers + +| Filename | What to capture | +|----------|-----------------| +| `phone-numbers.png` | Main Phone Numbers page with list | +| `rent-number.png` | Rent Number modal | +| `import-sip.png` | Import SIP Number modal | + +--- + +## Deployment — Audiences + +| Filename | What to capture | +|----------|-----------------| +| `audiences.png` | Main Audiences list page | +| `upload-csv.png` | Step 1: Upload CSV with consent warning | +| `map-phone.png` | Step 2: Map Phone Number with dropdown + CSV preview | +| `add-contacts.png` | Step 3: Audience Name and Description fields | +| `audience-members.png` | Individual audience view with members table | +| `add-manually.png` | Add members modal — "Add Manually" tab | +| `import-csv.png` | Add members modal — "Import CSV" tab | + +--- + +## Deployment — Campaigns + +| Filename | What to capture | +|----------|-----------------| +| `campaigns.png` | Main Campaigns list page | +| `create-campaign.png` | Create Campaign form with all fields | +| `campaign-call-logs.png` | Campaign detail — Call Logs tab | +| `campaign-events.png` | Campaign detail — Campaign Events tab | +| `campaign-executions.png` | Campaign detail — Executions tab | + +--- + +## Analytics + +| Filename | What to capture | +|----------|-----------------| +| `analytics.png` | Full Analytics dashboard with filters, summary cards, charts, and Most Called Agents table | + +--- + +## Notes + +- **Format:** PNG preferred, keep under 500KB each +- **Size:** Capture at 2x resolution if possible for retina displays +- **Annotations:** If you want callouts/arrows, add them before saving +- **Sensitive data:** Blur or use fake data for any real customer info diff --git "a/fern/docs/assets/platform/building-agents/images/Screenshot 2026-01-28 at 9.04.27\342\200\257AM.png" "b/fern/docs/assets/platform/building-agents/images/Screenshot 2026-01-28 at 9.04.27\342\200\257AM.png" new file mode 100644 index 0000000..229cbdd Binary files /dev/null and "b/fern/docs/assets/platform/building-agents/images/Screenshot 2026-01-28 at 9.04.27\342\200\257AM.png" differ diff --git a/fern/docs/assets/platform/building-agents/images/add-contacts.png b/fern/docs/assets/platform/building-agents/images/add-contacts.png new file mode 100644 index 0000000..3bfe0d1 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/add-contacts.png differ diff --git a/fern/docs/assets/platform/building-agents/images/add-manually.png b/fern/docs/assets/platform/building-agents/images/add-manually.png new file mode 100644 index 0000000..b65b0c7 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/add-manually.png differ diff --git a/fern/docs/assets/platform/building-agents/images/add-pronunciation.png b/fern/docs/assets/platform/building-agents/images/add-pronunciation.png new file mode 100644 index 0000000..8e89a3c Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/add-pronunciation.png differ diff --git a/fern/docs/assets/platform/building-agents/images/analytics.png b/fern/docs/assets/platform/building-agents/images/analytics.png new file mode 100644 index 0000000..f1916c0 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/analytics.png differ diff --git a/fern/docs/assets/platform/building-agents/images/api-call-advanced.png b/fern/docs/assets/platform/building-agents/images/api-call-advanced.png new file mode 100644 index 0000000..23fa11c Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/api-call-advanced.png differ diff --git a/fern/docs/assets/platform/building-agents/images/api-call-basic.png b/fern/docs/assets/platform/building-agents/images/api-call-basic.png new file mode 100644 index 0000000..9c08aa0 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/api-call-basic.png differ diff --git a/fern/docs/assets/platform/building-agents/images/audience-members.png b/fern/docs/assets/platform/building-agents/images/audience-members.png new file mode 100644 index 0000000..2c0b26b Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/audience-members.png differ diff --git a/fern/docs/assets/platform/building-agents/images/audiences.png b/fern/docs/assets/platform/building-agents/images/audiences.png new file mode 100644 index 0000000..5239ddc Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/audiences.png differ diff --git a/fern/docs/assets/platform/building-agents/images/campaign-call-logs.png b/fern/docs/assets/platform/building-agents/images/campaign-call-logs.png new file mode 100644 index 0000000..5fdfd8b Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/campaign-call-logs.png differ diff --git a/fern/docs/assets/platform/building-agents/images/campaign-events.png b/fern/docs/assets/platform/building-agents/images/campaign-events.png new file mode 100644 index 0000000..6cf0688 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/campaign-events.png differ diff --git a/fern/docs/assets/platform/building-agents/images/campaign-executions.png b/fern/docs/assets/platform/building-agents/images/campaign-executions.png new file mode 100644 index 0000000..b9ea357 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/campaign-executions.png differ diff --git a/fern/docs/assets/platform/building-agents/images/campaigns.png b/fern/docs/assets/platform/building-agents/images/campaigns.png new file mode 100644 index 0000000..81f65c4 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/campaigns.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-add-pronunciation.png b/fern/docs/assets/platform/building-agents/images/cf-add-pronunciation.png new file mode 100644 index 0000000..8e89a3c Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-add-pronunciation.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-branching-example.png b/fern/docs/assets/platform/building-agents/images/cf-branching-example.png new file mode 100644 index 0000000..d7f0cf3 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-branching-example.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-editor-settings-tab.png b/fern/docs/assets/platform/building-agents/images/cf-editor-settings-tab.png new file mode 100644 index 0000000..7744c07 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-editor-settings-tab.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-editor-workflow-tab.png b/fern/docs/assets/platform/building-agents/images/cf-editor-workflow-tab.png new file mode 100644 index 0000000..98417e1 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-editor-workflow-tab.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-general-settings.png b/fern/docs/assets/platform/building-agents/images/cf-general-settings.png new file mode 100644 index 0000000..76f653e Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-general-settings.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-languages-tab.png b/fern/docs/assets/platform/building-agents/images/cf-languages-tab.png new file mode 100644 index 0000000..045fd96 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-languages-tab.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-model-settings.png b/fern/docs/assets/platform/building-agents/images/cf-model-settings.png new file mode 100644 index 0000000..0d7f85d Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-model-settings.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-node-api-call.png b/fern/docs/assets/platform/building-agents/images/cf-node-api-call.png new file mode 100644 index 0000000..26f6a36 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-node-api-call.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-node-default.png b/fern/docs/assets/platform/building-agents/images/cf-node-default.png new file mode 100644 index 0000000..969e042 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-node-default.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-node-end-call.png b/fern/docs/assets/platform/building-agents/images/cf-node-end-call.png new file mode 100644 index 0000000..faafda4 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-node-end-call.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-node-post-call-api.png b/fern/docs/assets/platform/building-agents/images/cf-node-post-call-api.png new file mode 100644 index 0000000..7694840 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-node-post-call-api.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-node-pre-call-api.png b/fern/docs/assets/platform/building-agents/images/cf-node-pre-call-api.png new file mode 100644 index 0000000..a438661 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-node-pre-call-api.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-node-transfer-call.png b/fern/docs/assets/platform/building-agents/images/cf-node-transfer-call.png new file mode 100644 index 0000000..c5d1310 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-node-transfer-call.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-phone-number.png b/fern/docs/assets/platform/building-agents/images/cf-phone-number.png new file mode 100644 index 0000000..0042961 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-phone-number.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-variables-panel.png b/fern/docs/assets/platform/building-agents/images/cf-variables-panel.png new file mode 100644 index 0000000..969e042 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-variables-panel.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-voice-settings.png b/fern/docs/assets/platform/building-agents/images/cf-voice-settings.png new file mode 100644 index 0000000..3b5d8ac Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-voice-settings.png differ diff --git a/fern/docs/assets/platform/building-agents/images/cf-workflow-builder.png b/fern/docs/assets/platform/building-agents/images/cf-workflow-builder.png new file mode 100644 index 0000000..98417e1 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/cf-workflow-builder.png differ diff --git a/fern/docs/assets/platform/building-agents/images/convo-logs-events.png b/fern/docs/assets/platform/building-agents/images/convo-logs-events.png new file mode 100644 index 0000000..d3ab6d7 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/convo-logs-events.png differ diff --git a/fern/docs/assets/platform/building-agents/images/convo-logs-list.png b/fern/docs/assets/platform/building-agents/images/convo-logs-list.png new file mode 100644 index 0000000..6adfe14 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/convo-logs-list.png differ diff --git a/fern/docs/assets/platform/building-agents/images/convo-logs-metrics.png b/fern/docs/assets/platform/building-agents/images/convo-logs-metrics.png new file mode 100644 index 0000000..483f774 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/convo-logs-metrics.png differ diff --git a/fern/docs/assets/platform/building-agents/images/convo-logs-overview.png b/fern/docs/assets/platform/building-agents/images/convo-logs-overview.png new file mode 100644 index 0000000..63e7247 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/convo-logs-overview.png differ diff --git a/fern/docs/assets/platform/building-agents/images/convo-logs-transcript.png b/fern/docs/assets/platform/building-agents/images/convo-logs-transcript.png new file mode 100644 index 0000000..27848a5 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/convo-logs-transcript.png differ diff --git a/fern/docs/assets/platform/building-agents/images/create-agent-modal.png b/fern/docs/assets/platform/building-agents/images/create-agent-modal.png new file mode 100644 index 0000000..00e52d2 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/create-agent-modal.png differ diff --git a/fern/docs/assets/platform/building-agents/images/create-campaign.png b/fern/docs/assets/platform/building-agents/images/create-campaign.png new file mode 100644 index 0000000..c9b2158 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/create-campaign.png differ diff --git a/fern/docs/assets/platform/building-agents/images/create-with-ai.png b/fern/docs/assets/platform/building-agents/images/create-with-ai.png new file mode 100644 index 0000000..efe2dcc Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/create-with-ai.png differ diff --git a/fern/docs/assets/platform/building-agents/images/dashboard.png b/fern/docs/assets/platform/building-agents/images/dashboard.png new file mode 100644 index 0000000..18bf541 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/dashboard.png differ diff --git a/fern/docs/assets/platform/building-agents/images/end-call-modal.png b/fern/docs/assets/platform/building-agents/images/end-call-modal.png new file mode 100644 index 0000000..f22e0f7 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/end-call-modal.png differ diff --git a/fern/docs/assets/platform/building-agents/images/general-settings.png b/fern/docs/assets/platform/building-agents/images/general-settings.png new file mode 100644 index 0000000..93b4afc Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/general-settings.png differ diff --git a/fern/docs/assets/platform/building-agents/images/import-csv.png b/fern/docs/assets/platform/building-agents/images/import-csv.png new file mode 100644 index 0000000..c9c6250 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/import-csv.png differ diff --git a/fern/docs/assets/platform/building-agents/images/import-sip.png b/fern/docs/assets/platform/building-agents/images/import-sip.png new file mode 100644 index 0000000..4f7593c Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/import-sip.png differ diff --git a/fern/docs/assets/platform/building-agents/images/integrations.png b/fern/docs/assets/platform/building-agents/images/integrations.png new file mode 100644 index 0000000..23c3ee0 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/integrations.png differ diff --git a/fern/docs/assets/platform/building-agents/images/kb-add-document.png b/fern/docs/assets/platform/building-agents/images/kb-add-document.png new file mode 100644 index 0000000..cc9245a Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/kb-add-document.png differ diff --git a/fern/docs/assets/platform/building-agents/images/kb-create-modal.png b/fern/docs/assets/platform/building-agents/images/kb-create-modal.png new file mode 100644 index 0000000..c490b73 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/kb-create-modal.png differ diff --git a/fern/docs/assets/platform/building-agents/images/kb-with-documents.png b/fern/docs/assets/platform/building-agents/images/kb-with-documents.png new file mode 100644 index 0000000..a45c1f5 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/kb-with-documents.png differ diff --git a/fern/docs/assets/platform/building-agents/images/lock-agent.png b/fern/docs/assets/platform/building-agents/images/lock-agent.png new file mode 100644 index 0000000..4553f60 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/lock-agent.png differ diff --git a/fern/docs/assets/platform/building-agents/images/map-phone.png b/fern/docs/assets/platform/building-agents/images/map-phone.png new file mode 100644 index 0000000..44a82ca Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/map-phone.png differ diff --git a/fern/docs/assets/platform/building-agents/images/model-dropdown.png b/fern/docs/assets/platform/building-agents/images/model-dropdown.png new file mode 100644 index 0000000..915ccbf Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/model-dropdown.png differ diff --git a/fern/docs/assets/platform/building-agents/images/model-settings-cf.png b/fern/docs/assets/platform/building-agents/images/model-settings-cf.png new file mode 100644 index 0000000..f17300b Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/model-settings-cf.png differ diff --git a/fern/docs/assets/platform/building-agents/images/model-settings-sp.png b/fern/docs/assets/platform/building-agents/images/model-settings-sp.png new file mode 100644 index 0000000..2f7f993 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/model-settings-sp.png differ diff --git a/fern/docs/assets/platform/building-agents/images/phone-number-settings.png b/fern/docs/assets/platform/building-agents/images/phone-number-settings.png new file mode 100644 index 0000000..447ae5a Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/phone-number-settings.png differ diff --git a/fern/docs/assets/platform/building-agents/images/phone-numbers.png b/fern/docs/assets/platform/building-agents/images/phone-numbers.png new file mode 100644 index 0000000..3d17608 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/phone-numbers.png differ diff --git a/fern/docs/assets/platform/building-agents/images/post-call-disposition.png b/fern/docs/assets/platform/building-agents/images/post-call-disposition.png new file mode 100644 index 0000000..57c1ee0 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/post-call-disposition.png differ diff --git a/fern/docs/assets/platform/building-agents/images/post-call-metrics-list.png b/fern/docs/assets/platform/building-agents/images/post-call-metrics-list.png new file mode 100644 index 0000000..192b8cb Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/post-call-metrics-list.png differ diff --git a/fern/docs/assets/platform/building-agents/images/post-call-templates.png b/fern/docs/assets/platform/building-agents/images/post-call-templates.png new file mode 100644 index 0000000..404477e Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/post-call-templates.png differ diff --git a/fern/docs/assets/platform/building-agents/images/prompt-editor.png b/fern/docs/assets/platform/building-agents/images/prompt-editor.png new file mode 100644 index 0000000..9aa3bfa Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/prompt-editor.png differ diff --git a/fern/docs/assets/platform/building-agents/images/rent-number.png b/fern/docs/assets/platform/building-agents/images/rent-number.png new file mode 100644 index 0000000..7d08718 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/rent-number.png differ diff --git a/fern/docs/assets/platform/building-agents/images/salesforce-connect.png b/fern/docs/assets/platform/building-agents/images/salesforce-connect.png new file mode 100644 index 0000000..b7e7046 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/salesforce-connect.png differ diff --git a/fern/docs/assets/platform/building-agents/images/sp-editor.png b/fern/docs/assets/platform/building-agents/images/sp-editor.png new file mode 100644 index 0000000..9aa3bfa Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/sp-editor.png differ diff --git a/fern/docs/assets/platform/building-agents/images/template-gallery.png b/fern/docs/assets/platform/building-agents/images/template-gallery.png new file mode 100644 index 0000000..a0496be Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/template-gallery.png differ diff --git a/fern/docs/assets/platform/building-agents/images/test-chat.png b/fern/docs/assets/platform/building-agents/images/test-chat.png new file mode 100644 index 0000000..e61207d Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/test-chat.png differ diff --git a/fern/docs/assets/platform/building-agents/images/test-telephony.png b/fern/docs/assets/platform/building-agents/images/test-telephony.png new file mode 100644 index 0000000..81d904d Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/test-telephony.png differ diff --git a/fern/docs/assets/platform/building-agents/images/test-webcall.png b/fern/docs/assets/platform/building-agents/images/test-webcall.png new file mode 100644 index 0000000..166166f Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/test-webcall.png differ diff --git a/fern/docs/assets/platform/building-agents/images/transfer-call-modal.png b/fern/docs/assets/platform/building-agents/images/transfer-call-modal.png new file mode 100644 index 0000000..835c0a5 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/transfer-call-modal.png differ diff --git a/fern/docs/assets/platform/building-agents/images/transfer-call-warm.png b/fern/docs/assets/platform/building-agents/images/transfer-call-warm.png new file mode 100644 index 0000000..ffdabbe Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/transfer-call-warm.png differ diff --git a/fern/docs/assets/platform/building-agents/images/upload-csv.png b/fern/docs/assets/platform/building-agents/images/upload-csv.png new file mode 100644 index 0000000..919f358 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/upload-csv.png differ diff --git a/fern/docs/assets/platform/building-agents/images/variables-api.png b/fern/docs/assets/platform/building-agents/images/variables-api.png new file mode 100644 index 0000000..0cea222 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/variables-api.png differ diff --git a/fern/docs/assets/platform/building-agents/images/variables-system.png b/fern/docs/assets/platform/building-agents/images/variables-system.png new file mode 100644 index 0000000..9336235 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/variables-system.png differ diff --git a/fern/docs/assets/platform/building-agents/images/variables-user.png b/fern/docs/assets/platform/building-agents/images/variables-user.png new file mode 100644 index 0000000..1883754 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/variables-user.png differ diff --git a/fern/docs/assets/platform/building-agents/images/voice-picker.png b/fern/docs/assets/platform/building-agents/images/voice-picker.png new file mode 100644 index 0000000..c7fe423 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/voice-picker.png differ diff --git a/fern/docs/assets/platform/building-agents/images/voice-settings.png b/fern/docs/assets/platform/building-agents/images/voice-settings.png new file mode 100644 index 0000000..4c6b20f Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/voice-settings.png differ diff --git a/fern/docs/assets/platform/building-agents/images/webhook-agent.png b/fern/docs/assets/platform/building-agents/images/webhook-agent.png new file mode 100644 index 0000000..5cf932b Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/webhook-agent.png differ diff --git a/fern/docs/assets/platform/building-agents/images/webhook-create.png b/fern/docs/assets/platform/building-agents/images/webhook-create.png new file mode 100644 index 0000000..f227cf1 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/webhook-create.png differ diff --git a/fern/docs/assets/platform/building-agents/images/webhook-detail.png b/fern/docs/assets/platform/building-agents/images/webhook-detail.png new file mode 100644 index 0000000..683d4b9 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/webhook-detail.png differ diff --git a/fern/docs/assets/platform/building-agents/images/webhook-modal.png b/fern/docs/assets/platform/building-agents/images/webhook-modal.png new file mode 100644 index 0000000..03eaca7 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/webhook-modal.png differ diff --git a/fern/docs/assets/platform/building-agents/images/webhooks-dashboard.png b/fern/docs/assets/platform/building-agents/images/webhooks-dashboard.png new file mode 100644 index 0000000..747d8dd Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/webhooks-dashboard.png differ diff --git a/fern/docs/assets/platform/building-agents/images/widget-compact.png b/fern/docs/assets/platform/building-agents/images/widget-compact.png new file mode 100644 index 0000000..5cbcc0d Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/widget-compact.png differ diff --git a/fern/docs/assets/platform/building-agents/images/widget-full-variant.png b/fern/docs/assets/platform/building-agents/images/widget-full-variant.png new file mode 100644 index 0000000..c2398b4 Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/widget-full-variant.png differ diff --git a/fern/docs/assets/platform/building-agents/images/widget-full.png b/fern/docs/assets/platform/building-agents/images/widget-full.png new file mode 100644 index 0000000..e68035a Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/widget-full.png differ diff --git a/fern/docs/assets/platform/building-agents/images/widget-tiny.png b/fern/docs/assets/platform/building-agents/images/widget-tiny.png new file mode 100644 index 0000000..b5d95ed Binary files /dev/null and b/fern/docs/assets/platform/building-agents/images/widget-tiny.png differ diff --git a/fern/docs/assets/styles/global-styling.css b/fern/docs/assets/styles/global-styling.css new file mode 100644 index 0000000..0cc115d --- /dev/null +++ b/fern/docs/assets/styles/global-styling.css @@ -0,0 +1,19 @@ +/* Shrinks the product toggle */ +.product-dropdown-trigger { + font-size: var(--text-xs) !important; + padding: 2px 8px !important; +} + +@media (min-width: 1024px) { + + #fern-search-button { + margin-left: 1rem !important; + + } + + .fern-product-selector { + + margin-left: 1rem !important; + margin-right: 0.5rem !important; + } +} \ No newline at end of file diff --git a/fern/docs/assets/video/angry_gen_t.mp4 b/fern/docs/assets/video/angry_gen_t.mp4 new file mode 100644 index 0000000..0b3628e Binary files /dev/null and b/fern/docs/assets/video/angry_gen_t.mp4 differ diff --git a/fern/docs/assets/video/angry_ref_t.mp4 b/fern/docs/assets/video/angry_ref_t.mp4 new file mode 100644 index 0000000..aca4571 Binary files /dev/null and b/fern/docs/assets/video/angry_ref_t.mp4 differ diff --git a/fern/docs/assets/video/bg_ref_t.mp4 b/fern/docs/assets/video/bg_ref_t.mp4 new file mode 100644 index 0000000..7d1d723 Binary files /dev/null and b/fern/docs/assets/video/bg_ref_t.mp4 differ diff --git a/fern/docs/assets/video/fast_gen_t.mp4 b/fern/docs/assets/video/fast_gen_t.mp4 new file mode 100644 index 0000000..9c66deb Binary files /dev/null and b/fern/docs/assets/video/fast_gen_t.mp4 differ diff --git a/fern/docs/assets/video/fast_ref_t.mp4 b/fern/docs/assets/video/fast_ref_t.mp4 new file mode 100644 index 0000000..1be0d9c Binary files /dev/null and b/fern/docs/assets/video/fast_ref_t.mp4 differ diff --git a/fern/docs/assets/video/good_ref_t.mp4 b/fern/docs/assets/video/good_ref_t.mp4 new file mode 100644 index 0000000..8b16dec Binary files /dev/null and b/fern/docs/assets/video/good_ref_t.mp4 differ diff --git a/fern/docs/assets/video/inconsistent_ref_t.mp4 b/fern/docs/assets/video/inconsistent_ref_t.mp4 new file mode 100644 index 0000000..dec6109 Binary files /dev/null and b/fern/docs/assets/video/inconsistent_ref_t.mp4 differ diff --git a/fern/docs/assets/video/overlap_ref_t.mp4 b/fern/docs/assets/video/overlap_ref_t.mp4 new file mode 100644 index 0000000..ebc3285 Binary files /dev/null and b/fern/docs/assets/video/overlap_ref_t.mp4 differ diff --git a/fern/docs/assets/video/whisper_gen_t.mp4 b/fern/docs/assets/video/whisper_gen_t.mp4 new file mode 100644 index 0000000..2c6188a Binary files /dev/null and b/fern/docs/assets/video/whisper_gen_t.mp4 differ diff --git a/fern/docs/assets/video/whisper_ref_t.mp4 b/fern/docs/assets/video/whisper_ref_t.mp4 new file mode 100644 index 0000000..6a2edeb Binary files /dev/null and b/fern/docs/assets/video/whisper_ref_t.mp4 differ diff --git a/fern/fern.config.json b/fern/fern.config.json index 08cdf6f..ac9bf87 100644 --- a/fern/fern.config.json +++ b/fern/fern.config.json @@ -1,4 +1,4 @@ { "organization": "smallest-ai", - "version": "3.29.1" + "version": "4.31.1" } diff --git a/fern/intro/admin/images/api-keys.png b/fern/intro/admin/images/api-keys.png new file mode 100644 index 0000000..6435671 Binary files /dev/null and b/fern/intro/admin/images/api-keys.png differ diff --git a/fern/intro/admin/images/settings.png b/fern/intro/admin/images/settings.png new file mode 100644 index 0000000..1bb3e78 Binary files /dev/null and b/fern/intro/admin/images/settings.png differ diff --git a/fern/intro/admin/images/subscription.png b/fern/intro/admin/images/subscription.png new file mode 100644 index 0000000..add5bdd Binary files /dev/null and b/fern/intro/admin/images/subscription.png differ diff --git a/fern/products/atoms.yml b/fern/products/atoms.yml index 1127296..1c93cdd 100644 --- a/fern/products/atoms.yml +++ b/fern/products/atoms.yml @@ -1,41 +1,402 @@ tabs: - docs: - display-name: Docs - icon: home + platform: + display-name: Atoms Platform + icon: fa-solid fa-desktop + introduction: + display-name: Product Overview + icon: fa-solid fa-house + developer: + display-name: Developer Guide + icon: fa-solid fa-code api-reference: display-name: API Reference - icon: puzzle + icon: fa-solid fa-puzzle-piece + mcp: + display-name: MCP + icon: fa-solid fa-plug navigation: - - tab: docs + - tab: platform layout: - section: Get Started contents: - - page: Introduction - path: ./atoms/pages/introduction.mdx - - page: Quickstart - path: ./atoms/pages/quickstart.mdx - - section: Deep Dive - contents: - - page: Audience - path: ./atoms/pages/deep-dive/audience/audience.mdx - - page: Call Logs - path: ./atoms/pages/deep-dive/call-logs/what-is-a-call-log.mdx - - page: Campaign - path: ./atoms/pages/deep-dive/campaign/campaign.mdx + - page: Quick start + path: ./atoms/pages/platform/introduction/quick-start.mdx + icon: fa-solid fa-rocket + - page: Platform overview + path: ./atoms/pages/platform/introduction/intro.mdx + icon: fa-regular fa-book-open + - section: Single Prompt Agents + contents: + - page: Overview + path: ./atoms/pages/platform/single-prompt/overview.mdx + icon: fa-solid fa-circle-info + - section: Creating Your Agent + contents: + - page: Manual Setup + path: ./atoms/pages/platform/single-prompt/manual-setup.mdx + - page: From Template + path: ./atoms/pages/platform/single-prompt/from-template.mdx + - page: AI Assisted + path: ./atoms/pages/platform/single-prompt/ai-assisted.mdx + - section: Prompt Section + contents: + - page: Writing Prompts + path: ./atoms/pages/platform/single-prompt/writing-prompts.mdx + - page: Model Selection + path: ./atoms/pages/platform/single-prompt/config/model-selection.mdx + - page: Voice Selection + path: ./atoms/pages/platform/single-prompt/config/voice-selection.mdx + - section: Configuration Panel + contents: + - page: End Call + path: ./atoms/pages/platform/single-prompt/config/end-call.mdx + - page: Transfer Call + path: ./atoms/pages/platform/single-prompt/config/transfer-call.mdx + - page: Knowledge Base + path: ./atoms/pages/platform/single-prompt/config/knowledge-base.mdx + - page: Variables + path: ./atoms/pages/platform/single-prompt/config/variables.mdx + - page: API Calls + path: ./atoms/pages/platform/single-prompt/config/api-calls.mdx + - section: Agent Settings + contents: + - page: General Settings + path: ./atoms/pages/platform/single-prompt/config/general-settings.mdx + - page: Language Selection + path: ./atoms/pages/platform/single-prompt/config/language-selection.mdx + - page: Voice Settings + path: ./atoms/pages/platform/single-prompt/config/voice-settings.mdx + - page: Model Settings + path: ./atoms/pages/platform/single-prompt/config/model-settings.mdx + - page: Phone Number + path: ./atoms/pages/platform/single-prompt/config/phone-number.mdx + - page: Webhooks + path: ./atoms/pages/platform/single-prompt/config/webhooks.mdx + - section: Conversational Flow Agents + availability: deprecated + contents: + - page: Overview + path: ./atoms/pages/platform/convo-flow/overview.mdx + icon: fa-solid fa-circle-info + - section: Creating Your Agent + contents: + - page: Manual Setup + path: ./atoms/pages/platform/convo-flow/manual-setup.mdx + - page: From Template + path: ./atoms/pages/platform/convo-flow/from-template.mdx + - section: Workflow Tab + contents: + - page: Workflow Builder + path: ./atoms/pages/platform/convo-flow/workflow-builder.mdx + - page: Node Types + path: ./atoms/pages/platform/convo-flow/node-types.mdx + - page: Conditions + path: ./atoms/pages/platform/convo-flow/conditions.mdx + - page: Variables + path: ./atoms/pages/platform/convo-flow/config/variables.mdx + - section: Agent Settings + contents: + - page: General Settings + path: ./atoms/pages/platform/convo-flow/config/general-settings.mdx + - page: Languages + path: ./atoms/pages/platform/convo-flow/config/languages.mdx + - page: Voice Settings + path: ./atoms/pages/platform/convo-flow/config/voice-settings.mdx + - page: Model Settings + path: ./atoms/pages/platform/convo-flow/config/model-settings.mdx + - page: Phone Number + path: ./atoms/pages/platform/convo-flow/config/phone-number.mdx + - page: Webhooks + path: ./atoms/pages/platform/convo-flow/config/webhooks.mdx + - section: Features + contents: - page: Knowledge Base - path: ./atoms/pages/deep-dive/knowledge-base/knowledge-base.mdx - - page: Phone Number - path: ./atoms/pages/deep-dive/phone-number/phone-number.mdx + path: ./atoms/pages/platform/features/knowledge-base.mdx + icon: fa-solid fa-database - page: Webhooks - path: ./atoms/pages/deep-dive/webhooks/webhooks.mdx + path: ./atoms/pages/platform/features/webhooks.mdx + icon: fa-solid fa-bolt - page: Widget - path: ./atoms/pages/deep-dive/widget/widget.mdx - - section: FAQs + path: ./atoms/pages/platform/features/widget.mdx + icon: fa-solid fa-window-maximize + - page: Integrations + path: ./atoms/pages/platform/features/integrations.mdx + icon: fa-solid fa-plug + - page: Post-Call Metrics + path: ./atoms/pages/platform/features/post-call-metrics.mdx + icon: fa-solid fa-chart-line + - page: Variables + path: ./atoms/pages/platform/features/variables-reference.mdx + icon: fa-solid fa-code + - page: API Calls + path: ./atoms/pages/platform/features/api-calls-reference.mdx + icon: fa-solid fa-server + - section: Deployment + contents: + - page: Phone Numbers + path: ./atoms/pages/platform/deployment/phone-numbers.mdx + icon: fa-solid fa-phone + - page: Audiences + path: ./atoms/pages/platform/deployment/audiences.mdx + icon: fa-solid fa-users + - page: Campaigns + path: ./atoms/pages/platform/deployment/campaigns.mdx + icon: fa-solid fa-bullhorn + - section: Analytics & Logs + contents: + - page: Overview + path: ./atoms/pages/platform/analytics/overview.mdx + icon: fa-solid fa-chart-pie + - page: Testing + path: ./atoms/pages/platform/analytics/testing.mdx + icon: fa-solid fa-flask + - page: Conversation Logs + path: ./atoms/pages/platform/analytics/conversation-logs.mdx + icon: fa-solid fa-comments + - page: Locking + path: ./atoms/pages/platform/analytics/locking.mdx + icon: fa-solid fa-lock + - section: Cookbooks + contents: + - page: Using Cookbooks + path: ./atoms/pages/platform/cookbooks/using-cookbooks.mdx + icon: fa-solid fa-book + - section: Reference contents: - - page: Frequently Asked Questions - path: ./atoms/pages/faqs/frequently-asked-questions.mdx + - page: Quick Reference + path: ./atoms/pages/platform/introduction/quick-ref.mdx + icon: fa-solid fa-list + - page: Glossary + path: ./atoms/pages/platform/troubleshooting/glossary.mdx + icon: fa-solid fa-book + - section: Troubleshooting + contents: + - section: Common Issues + contents: + - page: Agent Issues + path: ./atoms/pages/platform/troubleshooting/common-issues/agent-issues.mdx + - page: Call Quality + path: ./atoms/pages/platform/troubleshooting/common-issues/call-quality.mdx + - page: Configuration + path: ./atoms/pages/platform/troubleshooting/common-issues/configuration.mdx + - page: FAQ + path: ./atoms/pages/platform/troubleshooting/faq.mdx + icon: fa-solid fa-circle-question + - page: Getting Help + path: ./atoms/pages/platform/troubleshooting/getting-help.mdx + icon: fa-solid fa-life-ring + + - tab: introduction + layout: + - section: Capabilities + contents: + - page: Welcome + path: ./atoms/pages/intro/welcome.mdx + icon: fa-regular fa-hand-wave + - page: Agents + path: ./atoms/pages/intro/capabilities/agents-explained.mdx + icon: fa-solid fa-robot + - page: Knowledge base + path: ./atoms/pages/intro/capabilities/knowledge-base.mdx + icon: fa-solid fa-database + - page: Voice and speech + path: ./atoms/pages/intro/capabilities/voice-speech.mdx + icon: fa-solid fa-microphone + - page: Campaigns + path: ./atoms/pages/intro/capabilities/campaigns.mdx + icon: fa-solid fa-bullhorn + - page: Telephony + path: ./atoms/pages/intro/capabilities/telephony.mdx + icon: fa-solid fa-phone + - page: Integrations + path: ./atoms/pages/intro/capabilities/integrations.mdx + icon: fa-solid fa-plug + - section: Admin + contents: + - page: Subscription & Plans + path: ./atoms/pages/intro/admin/billing.mdx + icon: fa-solid fa-credit-card + - page: API Keys + path: ./atoms/pages/intro/admin/api-keys.mdx + icon: fa-solid fa-key + - page: Settings & Team + path: ./atoms/pages/intro/admin/account-team.mdx + icon: fa-solid fa-users-gear + - section: Reference + contents: + - page: Changelog + path: ./atoms/pages/intro/reference/changelog.mdx + icon: fa-solid fa-clock-rotate-left + - page: Support + path: ./atoms/pages/intro/reference/support.mdx + icon: fa-solid fa-life-ring + + - tab: developer + layout: + - section: Get Started + contents: + - page: Quickstart + path: ./atoms/pages/dev/introduction/quickstart.mdx + icon: fa-solid fa-rocket + - page: Overview + path: ./atoms/pages/dev/introduction/overview.mdx + icon: fa-regular fa-book-open + - page: CLI + path: ./atoms/pages/dev/introduction/cli.mdx + icon: fa-solid fa-terminal + - section: Core Concepts + contents: + - page: Overview + path: ./atoms/pages/dev/introduction/core-concepts/overview.mdx + - page: Nodes + path: ./atoms/pages/dev/introduction/core-concepts/nodes.mdx + - page: Events + path: ./atoms/pages/dev/introduction/core-concepts/events.mdx + - page: Graphs + path: ./atoms/pages/dev/introduction/core-concepts/graphs.mdx + - page: Sessions + path: ./atoms/pages/dev/introduction/core-concepts/sessions.mdx + - section: Build + contents: + - section: Agents + contents: + - page: Overview + path: ./atoms/pages/dev/build/agents/overview.mdx + - section: LLM + contents: + - page: Overview + path: ./atoms/pages/dev/build/agents/agent-configuration/overview.mdx + - page: LLM Settings + path: ./atoms/pages/dev/build/agents/agent-configuration/llm-settings.mdx + - page: Prompts + path: ./atoms/pages/dev/build/agents/agent-configuration/prompts.mdx + - page: BYOM + path: ./atoms/pages/dev/build/agents/agent-configuration/byom.mdx + - section: Tools + contents: + - page: Overview + path: ./atoms/pages/dev/build/agents/tools-functions/overview.mdx + - page: Defining Tools + path: ./atoms/pages/dev/build/agents/tools-functions/defining-tools.mdx + - page: Executing Tools + path: ./atoms/pages/dev/build/agents/tools-functions/executing-tools.mdx + - page: Built-in Tools + path: ./atoms/pages/dev/build/agents/tools-functions/built-in-tools.mdx + - section: Patterns + contents: + - page: Streaming + path: ./atoms/pages/dev/build/agents/agent-patterns/streaming.mdx + - page: Interruption Handling + path: ./atoms/pages/dev/build/agents/agent-patterns/interruption-handling.mdx + - page: State Management + path: ./atoms/pages/dev/build/agents/agent-patterns/state-management.mdx + - page: Conversation Flow Design + path: ./atoms/pages/dev/build/agents/agent-patterns/conversation-flow-design.mdx + - page: Multi-Agent Orchestration + path: ./atoms/pages/dev/build/agents/agent-patterns/multi-agent-orchestration.mdx + - section: Knowledge Base + contents: + - page: Overview + path: ./atoms/pages/dev/build/knowledge-base/overview.mdx + - page: Usage + path: ./atoms/pages/dev/build/knowledge-base/usage.mdx + - section: Calling + contents: + - page: Overview + path: ./atoms/pages/dev/build/phone-calling/overview.mdx + - page: Outbound Calls + path: ./atoms/pages/dev/build/phone-calling/outbound-calls.mdx + - page: Call Control + path: ./atoms/pages/dev/build/phone-calling/call-control.mdx + - section: Campaigns + contents: + - page: Overview + path: ./atoms/pages/dev/build/campaigns/overview.mdx + - page: Managing Audiences + path: ./atoms/pages/dev/build/campaigns/managing-audiences.mdx + - page: Creating Campaigns + path: ./atoms/pages/dev/build/campaigns/creating-campaigns.mdx + - section: Operate + contents: + - section: Testing & Debugging + contents: + - page: Overview + path: ./atoms/pages/dev/build/agents/debugging-testing/overview.mdx + - page: Logging & Observability + path: ./atoms/pages/dev/build/agents/debugging-testing/logging-observability.mdx + - page: Common Issues + path: ./atoms/pages/dev/build/agents/debugging-testing/common-issues.mdx + - section: Analytics + contents: + - page: Overview + path: ./atoms/pages/dev/build/analytics/overview.mdx + - page: Call Metrics + path: ./atoms/pages/dev/build/analytics/call-metrics.mdx + - page: Post-Call Analytics + path: ./atoms/pages/dev/build/analytics/post-call-analytics.mdx + - page: SSE for Live Transcripts + path: ./atoms/pages/dev/build/analytics/sse-live-transcripts.mdx + - section: Examples + contents: + - page: Examples + path: ./atoms/pages/dev/cookbooks/examples.mdx + icon: fa-solid fa-lightbulb + - tab: api-reference layout: - api: API Reference api-name: atoms + snippets: + python: smallest-ai + typescript: smallest-ai + flattened: true + layout: + - User: + title: User + - Organization: + title: Organization + - Agent Templates: + title: Agent templates + - Agents: + title: Agents + - Workflows: + title: Workflows + - Logs: + title: Logs + - Calls: + title: Calls + - Campaigns: + title: Campaigns + - Knowledge Base: + title: Knowledge base + - Phone Numbers: + title: Phone numbers + - Webhooks: + title: Webhooks + - Audience: + title: Audience + + - tab: mcp + layout: + - section: Getting Started + contents: + - page: Quick Start + path: ./atoms/pages/dev/introduction/mcp/overview.mdx + icon: fa-solid fa-rocket + - page: Manual Setup + path: ./atoms/pages/dev/introduction/mcp/manual-setup.mdx + icon: fa-solid fa-wrench + - section: Using the MCP + contents: + - page: Available Tools + path: ./atoms/pages/dev/introduction/mcp/tools.mdx + icon: fa-solid fa-toolbox + - page: Prompt Cookbook + path: ./atoms/pages/dev/introduction/mcp/prompt-cookbook.mdx + icon: fa-solid fa-book + - section: Help + contents: + - page: Troubleshooting + path: ./atoms/pages/dev/introduction/mcp/troubleshooting.mdx + icon: fa-solid fa-circle-question diff --git a/fern/products/atoms/pages/api-reference/openapi.yaml b/fern/products/atoms/pages/api-reference/openapi.yaml index ab5a6df..16c1d90 100644 --- a/fern/products/atoms/pages/api-reference/openapi.yaml +++ b/fern/products/atoms/pages/api-reference/openapi.yaml @@ -207,8 +207,8 @@ paths: name: offset schema: type: integer - default: 5 - description: Number of items to return + default: 10 + description: Number of items to return per page - in: query name: search schema: @@ -340,8 +340,8 @@ paths: properties: enabled: type: string - enum: [en, hi, ta, kn] - description: The language of the agent. You can choose from the list of supported languages. + enum: [en, hi, ta] + description: "The language of the agent. Supported: 'en' (English), 'hi' (Hindi), 'ta' (Tamil)." default: en switching: type: object @@ -635,9 +635,293 @@ paths: "500": $ref: "#/components/responses/InternalServerErrorResponse" + /conversation: + get: + summary: Get all conversation logs + description: Retrieve paginated conversation logs with support for various filters. Returns call logs for agents belonging to the authenticated user's organization. + tags: + - Logs + security: + - BearerAuth: [] + parameters: + - in: query + name: page + schema: + type: integer + default: 1 + minimum: 1 + description: Page number for pagination + example: 1 + - in: query + name: limit + schema: + type: integer + default: 5 + minimum: 1 + description: Number of items per page + example: 10 + - in: query + name: agentIds + schema: + type: string + description: Comma-separated list of agent IDs to filter by + example: "60d0fe4f5311236168a109ca,60d0fe4f5311236168a109cb" + - in: query + name: campaignIds + schema: + type: string + description: Comma-separated list of campaign IDs to filter by + example: "60d0fe4f5311236168a109ca,60d0fe4f5311236168a109cb" + - in: query + name: callTypes + schema: + type: string + enum: + - telephony_inbound + - telephony_outbound + - chat + description: Comma-separated list of call types to filter by + example: "telephony_outbound,telephony_inbound" + - in: query + name: search + schema: + type: string + description: Search query to filter by callId, fromNumber, or toNumber + example: "+1234567890" + - in: query + name: statusFilter + schema: + type: string + description: | + Comma-separated list of call statuses to filter by. + Available statuses: pending, in_progress, completed, failed, no_answer, cancelled, busy + example: "completed,failed" + - in: query + name: disconnectReasonFilter + schema: + type: string + description: | + Comma-separated list of disconnect reasons to filter by. + Available reasons: user_hangup, agent_hangup, connection_error, timeout, system_error, transfer_complete + example: "user_hangup,agent_hangup" + - in: query + name: callAttemptFilter + schema: + type: string + description: | + Comma-separated list of call attempt types to filter by. + Available filters: initial (first attempt calls), retry (retry attempt calls), all (all calls) + example: "initial" + - in: query + name: durationFilter + schema: + type: string + description: | + Comma-separated list of duration ranges to filter by. + Available ranges: 0-30 (0-30 seconds), 30-60 (30-60 seconds), 1-5 (1-5 minutes), 5+ (more than 5 minutes) + example: "0-30,30-60" + responses: + "200": + description: Successful response + content: + application/json: + schema: + type: object + properties: + status: + type: boolean + example: true + data: + type: object + properties: + logs: + type: array + items: + type: object + properties: + _id: + type: string + description: The database ID of the call log + example: "60d0fe4f5311236168a109ca" + callId: + type: string + description: The unique call identifier + example: "CALL-1737000000000-abc123" + status: + type: string + description: The status of the call + enum: [pending, in_progress, completed, failed, no_answer, cancelled, busy] + example: "completed" + duration: + type: number + description: The duration of the call in seconds + example: 120 + from: + type: string + description: The phone number the call was made from + example: "+15551234567" + to: + type: string + description: The phone number the call was made to + example: "+15559876543" + type: + type: string + description: The type of call + enum: [telephony_inbound, telephony_outbound, chat] + example: "telephony_outbound" + agentId: + type: string + description: The ID of the agent that handled the call + example: "60d0fe4f5311236168a109ca" + agentName: + type: string + description: The name of the agent + example: "Sales Agent" + recordingUrl: + type: string + description: URL to the call recording (if available) + example: "https://storage.example.com/recordings/call-123.mp3" + recordingDualUrl: + type: string + description: URL to the dual-channel call recording (if available) + example: "https://storage.example.com/recordings/call-123-dual.mp3" + disconnectionReason: + type: string + description: The reason the call was disconnected + example: "user_hangup" + retryCount: + type: integer + description: Number of retry attempts for this call + example: 0 + createdAt: + type: string + format: date-time + description: When the call was created + example: "2025-01-15T10:30:00.000Z" + dispositionMetrics: + type: object + description: Custom disposition metrics for the call + additionalProperties: + type: string + example: + interested: "yes" + follow_up_needed: "no" + agentDispositionConfig: + type: array + description: Configuration for disposition metrics + items: + type: object + properties: + identifier: + type: string + type: + type: string + pagination: + type: object + properties: + total: + type: integer + description: Total number of matching call logs + example: 150 + page: + type: integer + description: Current page number + example: 1 + limit: + type: integer + description: Number of items per page (page size) + example: 10 + hasMore: + type: boolean + description: Whether there are more pages available + example: true + totalPages: + type: integer + description: Total number of pages + example: 15 + dispositionMetricsConfig: + type: array + description: Global disposition metrics configuration + items: + type: object + properties: + identifier: + type: string + type: + type: string + "400": + $ref: "#/components/responses/BadRequestError" + "401": + $ref: "#/components/responses/UnauthorizedErrorResponse" + "500": + $ref: "#/components/responses/InternalServerErrorResponse" + + /conversation/search: + post: + summary: Search conversation logs by call IDs + description: | + Fetch specific conversation logs by their callIds. This endpoint allows you to retrieve up to 100 specific calls at once. + Only returns calls that belong to agents in your organization (security check enforced). + Unlike the GET /conversation endpoint, this endpoint can also return retry calls (non-root calls). + tags: + - Logs + security: + - BearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - callIds + properties: + callIds: + type: array + items: + type: string + minItems: 1 + maxItems: 100 + description: Array of callIds to fetch + example: ["CALL-1737000000000-abc123", "CALL-1737000000001-def456"] + responses: + "200": + description: Successful response + content: + application/json: + schema: + type: object + properties: + status: + type: boolean + example: true + data: + type: object + properties: + logs: + type: array + items: + type: object + description: Call log details (same structure as GET /conversation) + total: + type: integer + description: Number of logs returned + example: 2 + requestedCount: + type: integer + description: Number of callIds requested + example: 3 + "400": + $ref: "#/components/responses/BadRequestError" + "401": + $ref: "#/components/responses/UnauthorizedErrorResponse" + "500": + $ref: "#/components/responses/InternalServerErrorResponse" + /conversation/{id}: get: - summary: Get conversation logs + summary: Get conversation log by ID + description: Retrieve detailed information about a specific conversation including transcript, events, and latency metrics. tags: - Logs security: @@ -646,9 +930,10 @@ paths: - in: path name: id required: true - description: The callId of the conversation. You can get the callId from the conversation logs. + description: The callId of the conversation. You can get the callId from the conversation logs endpoint. schema: type: string + example: "CALL-1737000000000-abc123" responses: "200": description: Successful response @@ -2203,7 +2488,7 @@ paths: minimum: 1 default: 1 example: 1 - - name: offset + - name: limit in: query required: false description: Number of items per page (default is 5) @@ -2709,9 +2994,10 @@ components: description: type: string backgroundSound: - type: boolean - default: false - description: Whether to add ambient background sound during calls. Currently provides office ambience by default. Additional sound options available upon request. + type: string + enum: ["", "office", "cafe", "call_center", "static"] + default: "" + description: "Ambient background sound during calls. Options: '' (none), 'office', 'cafe', 'call_center', 'static'." # visibleToEveryone: # type: boolean # default: false @@ -2721,8 +3007,8 @@ components: properties: enabled: type: string - enum: [en, hi, ta, kn] - description: The language of the agent. You can choose from the list of supported languages. + enum: [en, hi, ta] + description: "The language of the agent. Supported: 'en' (English), 'hi' (Hindi), 'ta' (Tamil)." default: en switching: type: object @@ -2846,8 +3132,9 @@ components: type: string description: The description of the agent backgroundSound: - type: boolean - description: Whether ambient background sound is enabled during calls + type: string + enum: ["", "office", "cafe", "call_center", "static"] + description: "Ambient background sound during calls. Options: '' (none), 'office', 'cafe', 'call_center', 'static'." organization: type: string description: The organization ID of the agent @@ -2871,7 +3158,7 @@ components: enabled: type: string description: The language of the agent - enum: [en, hi, ta, kn] + enum: [en, hi, ta] switching: type: object description: Language switching configuration for the agent @@ -3450,8 +3737,10 @@ tags: description: Operations related to agents - name: Workflows description: Operations related to workflow configuration and management - - name: Call Logs - description: Operations related to agent call logs + - name: Logs + description: Operations related to conversation logs, call history, and recordings. Supports filtering by agents, campaigns, call IDs, status, duration, and more. + - name: Calls + description: Operations related to initiating and managing calls - name: Phone Numbers description: Operations related to phone numbers - name: Webhooks @@ -3460,5 +3749,5 @@ tags: description: Operations related to audience management and CSV uploads servers: - - url: https://atoms-api.smallest.ai/api/v1 + - url: https://api.smallest.ai/atoms/v1 description: Production server diff --git a/fern/products/atoms/pages/client-libraries/overview.mdx b/fern/products/atoms/pages/client-libraries/overview.mdx index 0728e49..85e00ea 100644 --- a/fern/products/atoms/pages/client-libraries/overview.mdx +++ b/fern/products/atoms/pages/client-libraries/overview.mdx @@ -339,8 +339,8 @@ textChat.sendMessage("Hello!"); To use the SDK, you'll need to call our backend API from your server to get the access token and connection details. Your backend should: -1. Have an existing agent created using the [Atoms API](/api-reference) -2. Call `POST atoms-api.smallest.ai/api/v1/conversation/chat` or `POST atoms-api.smallest.ai/api/v1/conversation/webcall` with the `agentId` +1. Have an existing agent created using the [Atoms API](/atoms/api-reference) +2. Call `POST api.smallest.ai/atoms/v1/conversation/chat` or `POST api.smallest.ai/atoms/v1/conversation/webcall` with the `agentId` 3. Return the token, host, and other session details to your frontend 4. Your frontend uses these details in the `startSession()` call diff --git a/fern/products/atoms/pages/components/CapabilityCard.jsx b/fern/products/atoms/pages/components/CapabilityCard.jsx new file mode 100644 index 0000000..cdfc0c7 --- /dev/null +++ b/fern/products/atoms/pages/components/CapabilityCard.jsx @@ -0,0 +1,93 @@ +import React from 'react'; + +export const CapabilityCard = ({ + title, + description, + href, + icon = null +}) => { + const CardWrapper = href ? 'a' : 'div'; + + return ( + + {icon &&
{icon}
} +

{title}

+

{description}

+ +
+ ); +}; + +export const CapabilityGrid = ({ children, cols = 3 }) => { + return ( +
+ {children} + +
+ ); +}; + +export default CapabilityCard; diff --git a/fern/products/atoms/pages/components/FeatureHighlight.jsx b/fern/products/atoms/pages/components/FeatureHighlight.jsx new file mode 100644 index 0000000..87d1e35 --- /dev/null +++ b/fern/products/atoms/pages/components/FeatureHighlight.jsx @@ -0,0 +1,79 @@ +import React from 'react'; + +export const FeatureHighlight = ({ + title, + features = [], + badge = null, + gradient = "from-teal-600 to-cyan-600" +}) => { + return ( +
+
+ {badge && {badge}} +

{title}

+
    + {features.map((feature, index) => ( +
  • + + {feature} +
  • + ))} +
+
+ +
+ ); +}; + +export default FeatureHighlight; diff --git a/fern/products/atoms/pages/components/Hero.jsx b/fern/products/atoms/pages/components/Hero.jsx new file mode 100644 index 0000000..6c227c4 --- /dev/null +++ b/fern/products/atoms/pages/components/Hero.jsx @@ -0,0 +1,94 @@ +import React from 'react'; + +export const Hero = ({ + title = "Atoms Documentation", + subtitle = "Build enterprise-grade voice AI agents in minutes", + badge = null +}) => { + return ( +
+
+
+ {badge && {badge}} +

{title}

+

{subtitle}

+
+ +
+ ); +}; + +export default Hero; diff --git a/fern/products/atoms/pages/components/PathSelector.jsx b/fern/products/atoms/pages/components/PathSelector.jsx new file mode 100644 index 0000000..ba1ef01 --- /dev/null +++ b/fern/products/atoms/pages/components/PathSelector.jsx @@ -0,0 +1,98 @@ +import React from 'react'; + +export const PathSelector = ({ paths }) => { + return ( +
+ {paths.map((path, index) => ( + +
{path.icon}
+
+

{path.title}

+

{path.description}

+
+
+
+ ))} + +
+ ); +}; + +export default PathSelector; diff --git a/fern/products/atoms/pages/components/ProductCard.jsx b/fern/products/atoms/pages/components/ProductCard.jsx new file mode 100644 index 0000000..73e8d35 --- /dev/null +++ b/fern/products/atoms/pages/components/ProductCard.jsx @@ -0,0 +1,140 @@ +import React from 'react'; + +export const ProductCard = ({ + title, + description, + href, + image = null, + gradient = "from-teal-500 to-cyan-500", + badge = null +}) => { + const CardWrapper = href ? 'a' : 'div'; + + return ( + +
+ {image && {title}} + {!image && ( +
+ + + + + + +
+ )} + {badge && {badge}} +
+
+

{title}

+

{description}

+
+ +
+ ); +}; + +export const ProductCardGrid = ({ children, cols = 3 }) => { + return ( +
+ {children} + +
+ ); +}; + +export default ProductCard; diff --git a/fern/products/atoms/pages/components/index.js b/fern/products/atoms/pages/components/index.js new file mode 100644 index 0000000..6f51635 --- /dev/null +++ b/fern/products/atoms/pages/components/index.js @@ -0,0 +1,5 @@ +export { Hero } from './Hero'; +export { ProductCard, ProductCardGrid } from './ProductCard'; +export { CapabilityCard, CapabilityGrid } from './CapabilityCard'; +export { PathSelector } from './PathSelector'; +export { FeatureHighlight } from './FeatureHighlight'; diff --git a/fern/products/atoms/pages/custom.css b/fern/products/atoms/pages/custom.css new file mode 100644 index 0000000..afd8040 --- /dev/null +++ b/fern/products/atoms/pages/custom.css @@ -0,0 +1,46 @@ +/* Custom Atoms Documentation Styles - ElevenLabs Inspired */ + +/* Gradient utilities for product cards */ +.from-teal-500 { + --tw-gradient-from: #14b8a6; +} + +.to-cyan-500 { + --tw-gradient-to: #06b6d4; +} + +.to-cyan-600 { + --tw-gradient-to: #0891b2; +} + +.from-blue-500 { + --tw-gradient-from: #3b82f6; +} + +.to-indigo-600 { + --tw-gradient-to: #4f46e5; +} + +.from-purple-500 { + --tw-gradient-from: #a855f7; +} + +.to-pink-600 { + --tw-gradient-to: #db2777; +} + +.from-teal-600 { + --tw-gradient-from: #0d9488; +} + +.bg-gradient-to-br { + background: linear-gradient(to bottom right, var(--tw-gradient-from), var(--tw-gradient-to)); +} + +/* Typography improvements */ +h1, +h2, +h3, +h4 { + letter-spacing: -0.025em; +} \ No newline at end of file diff --git a/fern/products/atoms/pages/deep-dive/campaign/campaign.mdx b/fern/products/atoms/pages/deep-dive/campaign/campaign.mdx index ebf0721..f443f58 100644 --- a/fern/products/atoms/pages/deep-dive/campaign/campaign.mdx +++ b/fern/products/atoms/pages/deep-dive/campaign/campaign.mdx @@ -40,7 +40,7 @@ After creating your campaign, you'll see it listed in the Campaigns dashboard. C To begin making calls, click the "Start Campaign" button on the right side of the campaign details page. Your campaign will immediately begin dialing numbers from your audience list. -The system makes multiple concurrent calls based on your subscription's rate limits. More information about limits can be found [here](https://console.smallest.ai/subscription). +The system makes multiple concurrent calls based on your subscription's rate limits. More information about limits can be found [here](https://console.smallest.ai/subscription?utm_source=documentation&utm_medium=docs). ### Pausing and Resuming diff --git a/fern/products/atoms/pages/deep-dive/phone-number/phone-number.mdx b/fern/products/atoms/pages/deep-dive/phone-number/phone-number.mdx index f8b482b..a1077b7 100644 --- a/fern/products/atoms/pages/deep-dive/phone-number/phone-number.mdx +++ b/fern/products/atoms/pages/deep-dive/phone-number/phone-number.mdx @@ -19,7 +19,7 @@ The purchased number will appear in the [Your Numbers](https://atoms.smallest.ai The maximum number of phone numbers you can acquire depends on your current subscription plan. Check your - limits and available quota on the [Subscription page](https://console.smallest.ai/subscription). + limits and available quota on the [Subscription page](https://console.smallest.ai/subscription?utm_source=documentation&utm_medium=docs). ## Importing Numbers via SIP Trunking @@ -45,7 +45,7 @@ To import a phone number using SIP Trunking: The imported number will appear in the [Your Numbers](https://atoms.smallest.ai/dashboard/phone-numbers/your-numbers) tab and will be ready to assign to any agent. - + For detailed setup instructions, technical specifications, and provider-specific guides, see our comprehensive SIP Trunking documentation. diff --git a/fern/products/atoms/pages/deep-dive/phone-number/sip-trunking.mdx b/fern/products/atoms/pages/deep-dive/phone-number/sip-trunking.mdx index a6e9e75..e0aea12 100644 --- a/fern/products/atoms/pages/deep-dive/phone-number/sip-trunking.mdx +++ b/fern/products/atoms/pages/deep-dive/phone-number/sip-trunking.mdx @@ -200,7 +200,7 @@ We support all major telephony providers that offer SIP trunking. Below are deta 4. Import numbers on Smallest AI with your trunk credentials 5. Test inbound and outbound calls - + View the detailed step-by-step guide with troubleshooting and advanced configuration @@ -217,7 +217,7 @@ We support all major telephony providers that offer SIP trunking. Below are deta 4. Assign your phone numbers to the connection 5. Import numbers on Smallest AI with your connection details - + View the comprehensive guide with authentication options and monitoring setup @@ -234,7 +234,7 @@ We support all major telephony providers that offer SIP trunking. Below are deta 4. Link your phone numbers to the application 5. Import numbers on Smallest AI with your API credentials - + View the detailed guide with NCCO configuration and webhook setup @@ -278,4 +278,4 @@ If you need assistance setting up SIP trunking: - Email: support@smallest.ai - Join our [Discord community](https://smallest.ai/discord) -- Check our [FAQ section](/faqs/frequently-asked-questions) for common questions +- Check our [FAQ section](/atoms/atoms-platform/troubleshooting/faq) for common questions diff --git a/fern/products/atoms/pages/deep-dive/phone-number/telnyx-sip-setup.mdx b/fern/products/atoms/pages/deep-dive/phone-number/telnyx-sip-setup.mdx index 320c207..02fd6c1 100644 --- a/fern/products/atoms/pages/deep-dive/phone-number/telnyx-sip-setup.mdx +++ b/fern/products/atoms/pages/deep-dive/phone-number/telnyx-sip-setup.mdx @@ -413,6 +413,6 @@ If you encounter issues not covered in this guide: ## Related Documentation -- [SIP Trunking Overview](/deep-dive/phone-number/sip-trunking) - General SIP trunking concepts -- [Phone Numbers](/deep-dive/phone-number/phone-number) - Managing phone numbers on Smallest AI +- [SIP Trunking Overview](/atoms/atoms-platform/deployment/phone-numbers) - General SIP trunking concepts +- [Phone Numbers](/atoms/atoms-platform/deployment/phone-numbers) - Managing phone numbers on Smallest AI - [Telnyx Documentation](https://developers.telnyx.com/docs) - Official Telnyx developer docs diff --git a/fern/products/atoms/pages/deep-dive/phone-number/twilio-sip-setup.mdx b/fern/products/atoms/pages/deep-dive/phone-number/twilio-sip-setup.mdx index bdf0465..24c8d25 100644 --- a/fern/products/atoms/pages/deep-dive/phone-number/twilio-sip-setup.mdx +++ b/fern/products/atoms/pages/deep-dive/phone-number/twilio-sip-setup.mdx @@ -314,6 +314,6 @@ If you encounter issues not covered in this guide: ## Related Documentation -- [SIP Trunking Overview](/deep-dive/phone-number/sip-trunking) - General SIP trunking concepts -- [Phone Numbers](/deep-dive/phone-number/phone-number) - Managing phone numbers on Smallest AI +- [SIP Trunking Overview](/atoms/atoms-platform/deployment/phone-numbers) - General SIP trunking concepts +- [Phone Numbers](/atoms/atoms-platform/deployment/phone-numbers) - Managing phone numbers on Smallest AI - [Twilio Elastic SIP Trunking Docs](https://www.twilio.com/docs/sip-trunking) - Official Twilio documentation diff --git a/fern/products/atoms/pages/deep-dive/phone-number/vonage-sip-setup.mdx b/fern/products/atoms/pages/deep-dive/phone-number/vonage-sip-setup.mdx index be50e50..fb0c9fc 100644 --- a/fern/products/atoms/pages/deep-dive/phone-number/vonage-sip-setup.mdx +++ b/fern/products/atoms/pages/deep-dive/phone-number/vonage-sip-setup.mdx @@ -454,7 +454,7 @@ If you encounter issues not covered in this guide: ## Related Documentation -- [SIP Trunking Overview](/deep-dive/phone-number/sip-trunking) - General SIP trunking concepts -- [Phone Numbers](/deep-dive/phone-number/phone-number) - Managing phone numbers on Smallest AI +- [SIP Trunking Overview](/atoms/atoms-platform/deployment/phone-numbers) - General SIP trunking concepts +- [Phone Numbers](/atoms/atoms-platform/deployment/phone-numbers) - Managing phone numbers on Smallest AI - [Vonage Voice API Docs](https://developer.vonage.com/voice/voice-api/overview) - Official Vonage documentation - [Vonage NCCO Reference](https://developer.vonage.com/voice/voice-api/ncco-reference) - NCCO action reference diff --git a/fern/products/atoms/pages/deep-dive/realtime-events/events.mdx b/fern/products/atoms/pages/deep-dive/realtime-events/events.mdx index 6d4e090..14e527d 100644 --- a/fern/products/atoms/pages/deep-dive/realtime-events/events.mdx +++ b/fern/products/atoms/pages/deep-dive/realtime-events/events.mdx @@ -53,7 +53,7 @@ Authorization: Bearer your_api_token ### cURL ```bash -curl 'https://atoms-api.smallest.ai/api/v1/events?callId=CALL-1758124225863-80752e' \ +curl 'https://api.smallest.ai/atoms/v1/events?callId=CALL-1758124225863-80752e' \ -H 'Authorization: Bearer your_token' \ -H 'Accept: text/event-stream' ``` @@ -64,7 +64,7 @@ curl 'https://atoms-api.smallest.ai/api/v1/events?callId=CALL-1758124225863-8075 const EventSource = require("eventsource"); const eventSource = new EventSource( - "https://atoms-api.smallest.ai/api/v1/events?callId=CALL-1758124225863-80752e", + "https://api.smallest.ai/atoms/v1/events?callId=CALL-1758124225863-80752e", { headers: { Authorization: "Bearer your_token", @@ -436,7 +436,7 @@ Authorization: Bearer your_api_token ### cURL ```bash -curl 'https://atoms-api.smallest.ai/api/v1/events?callId=CALL-1758124225863-80752e' \ +curl 'https://api.smallest.ai/atoms/v1/events?callId=CALL-1758124225863-80752e' \ -H 'Authorization: Bearer your_token' \ -H 'Accept: text/event-stream' ``` @@ -447,7 +447,7 @@ curl 'https://atoms-api.smallest.ai/api/v1/events?callId=CALL-1758124225863-8075 const EventSource = require("eventsource"); const eventSource = new EventSource( - "https://atoms-api.smallest.ai/api/v1/events?callId=CALL-1758124225863-80752e", + "https://api.smallest.ai/atoms/v1/events?callId=CALL-1758124225863-80752e", { headers: { Authorization: "Bearer your_token", diff --git a/fern/products/atoms/pages/deep-dive/single-prompt/overview.mdx b/fern/products/atoms/pages/deep-dive/single-prompt/overview.mdx index 357225c..1efca07 100644 --- a/fern/products/atoms/pages/deep-dive/single-prompt/overview.mdx +++ b/fern/products/atoms/pages/deep-dive/single-prompt/overview.mdx @@ -7,7 +7,7 @@ icon: circle-info ## 🎯 Single Prompt Flow The single prompt flow is the **simplest** way to create an AI agent. You are supposed to describe -the entire expected behavior in a single prompt - you can check out our [guide for best practices](/deep-dive/single-prompt/prompting-best-practices) 📚. +the entire expected behavior in a single prompt - you can check out our [guide for best practices](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts) 📚. For example - the below images shows the prompt for a very simple hotel booking agent 🏨: @@ -29,11 +29,11 @@ forward corporate booking requests to a human agent 👤. The next sections describe all the tool types currently supported: -- 📞 [End Call](/deep-dive/single-prompt/tool-calls#end-call-tool) -- 🔄 [Transfer Call](/deep-dive/single-prompt/tool-calls#transfer-call-tool) -- 🌐 [API Call](/deep-dive/single-prompt/tool-calls#api-call-tool) -- 📊 [Dynamic Variable Extraction](/deep-dive/single-prompt/tool-calls#dynamic-variable-extraction-tool) -- 🔍 [Knowledge Base Search](/deep-dive/single-prompt/tool-calls#knowledge-base-search-tool) +- 📞 [End Call](/atoms/developer-guide/build/agents/tools/defining-tools#end-call-tool) +- 🔄 [Transfer Call](/atoms/developer-guide/build/agents/tools/defining-tools#transfer-call-tool) +- 🌐 [API Call](/atoms/developer-guide/build/agents/tools/defining-tools#api-call-tool) +- 📊 [Dynamic Variable Extraction](/atoms/developer-guide/build/agents/tools/defining-tools#dynamic-variable-extraction-tool) +- 🔍 [Knowledge Base Search](/atoms/developer-guide/build/agents/tools/defining-tools#knowledge-base-search-tool) diff --git a/fern/products/atoms/pages/deep-dive/voice-config/background-sound.mdx b/fern/products/atoms/pages/deep-dive/voice-config/background-sound.mdx index 4d67eb6..10bbe3f 100644 --- a/fern/products/atoms/pages/deep-dive/voice-config/background-sound.mdx +++ b/fern/products/atoms/pages/deep-dive/voice-config/background-sound.mdx @@ -42,11 +42,12 @@ Background sound can be particularly useful in scenarios where: ## Available Sound Options -Currently, a default office ambience sound is available out of the box. This provides a natural background noise that simulates a professional office environment. - - -Additional sound options (like cafe ambience, outdoor environment, etc.) can be made available upon request. Contact our support team to discuss custom sound requirements. - +You can choose from: +- `""` (none) +- `"office"` +- `"cafe"` +- `"call_center"` +- `"static"` ## API Support @@ -54,8 +55,8 @@ The background sound setting can be configured via our API: ```json { - "backgroundSound": true + "backgroundSound": "office" } ``` -For more details, see our [API Reference](/api-reference/). \ No newline at end of file +For more details, see our [API Reference](/atoms/api-reference). \ No newline at end of file diff --git a/fern/products/atoms/pages/deep-dive/voice-config/types-of-voice-models.mdx b/fern/products/atoms/pages/deep-dive/voice-config/types-of-voice-models.mdx index 1d9c1a5..8b30f63 100644 --- a/fern/products/atoms/pages/deep-dive/voice-config/types-of-voice-models.mdx +++ b/fern/products/atoms/pages/deep-dive/voice-config/types-of-voice-models.mdx @@ -10,7 +10,7 @@ icon: palette Looking for the right AI voice model? Below is a **detailed comparison** of various models based on pricing, latency, and features. -🔗 **Jump to [Available Voice Models](https://waves-docs.smallest.ai/content/getting-started/models)** +🔗 **Jump to [Available Voice Models](/waves/documentation/getting-started/models)** --- @@ -20,5 +20,5 @@ Want to create a custom AI voice? Instantly clone your voice with less than 15 s Both options provide realistic, high-quality voice synthesis, allowing you to create personalized, scalable, and expressive AI voices tailored to your needs. -🔗 **Jump to [Creating a Custom Voice](https://waves-docs.smallest.ai/content/voice-cloning/types-of-clone)** +🔗 **Jump to [Creating a Custom Voice](/waves/documentation/voice-cloning/types-of-clone)** diff --git a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/api-call.mdx b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/api-call.mdx index 072aa9f..5ca1159 100644 --- a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/api-call.mdx +++ b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/api-call.mdx @@ -155,4 +155,4 @@ Remember that all variable mappings must start with `$.` to properly access the The **API Call Node** is a powerful tool that allows your workflow to **communicate with external systems**. Whether sending data or fetching information, API Call Nodes enable **real-time, automated interactions**. -**Next Steps:** Learn how to use the [Transfer Call Node](/deep-dive/workflow/types-of-nodes/transfer-call) to add a Human into the loop! +**Next Steps:** Learn how to use the [Transfer Call Node](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types) to add a Human into the loop! diff --git a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/default.mdx b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/default.mdx index 54e563f..efd4aca 100644 --- a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/default.mdx +++ b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/default.mdx @@ -80,4 +80,4 @@ After configuring these parameters, clicking "Save" finalizes the node setup. The **Default Node** is the foundation of any workflow in Atoms. It represents a **single conversational goal**, guiding the AI agent's responses. By structuring conversations with well-defined Default Nodes, you can create **flexible and natural AI interactions**. -Next Steps: Explore [Webhook Nodes](/deep-dive/workflow/types-of-nodes/webhook) to extend your workflow with real-time data exchange. \ No newline at end of file +Next Steps: Explore [Webhook Nodes](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types) to extend your workflow with real-time data exchange. \ No newline at end of file diff --git a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/end-call.mdx b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/end-call.mdx index e17cb4c..fe574b1 100644 --- a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/end-call.mdx +++ b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/end-call.mdx @@ -78,4 +78,4 @@ Once these parameters are set, clicking **Save** finalizes the End Call Node set The **End Call Node** ensures that conversations in Atoms workflows conclude smoothly and naturally. It helps create a **structured, user-friendly experience** by closing interactions **at the right moments**. -**Next Steps:** Learn about [Best Practices for Prompting](/deep-dive/workflow/best-practices-for-prompting) to refine how your AI generates responses! +**Next Steps:** Learn about [Best Practices for Prompting](/atoms/atoms-platform/conversational-flow-agents/overview) to refine how your AI generates responses! diff --git a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/pre-call-api.mdx b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/pre-call-api.mdx index c106ecd..55137d3 100644 --- a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/pre-call-api.mdx +++ b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/pre-call-api.mdx @@ -145,4 +145,4 @@ Agent: "I see you're interested in {{productCategory}}. The **Pre Call API Node** is a powerful tool for enriching conversations with external data before they begin. By fetching relevant information upfront, you can create more personalized and efficient conversational experiences. -**Next Steps:** Learn about [API Call Node](/deep-dive/workflow/types-of-nodes/api-call) to understand how to make API calls during the conversation! +**Next Steps:** Learn about [API Call Node](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types) to understand how to make API calls during the conversation! diff --git a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/webhook.mdx b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/webhook.mdx index 097c173..052b40b 100644 --- a/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/webhook.mdx +++ b/fern/products/atoms/pages/deep-dive/workflow/types-of-nodes/webhook.mdx @@ -95,4 +95,4 @@ Once these parameters are set, clicking **Save** finalizes the Webhook Node setu The **Webhook Node** is a powerful tool that allows your workflow to **communicate with external systems**. Whether sending data or fetching information, Webhook Nodes enable **real-time, automated interactions**. -**Next Steps:** Learn how to use the [Transfer Call Node](/deep-dive/workflow/types-of-nodes/transfer-call) to add a Human into the loop! +**Next Steps:** Learn how to use the [Transfer Call Node](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types) to add a Human into the loop! diff --git a/fern/products/atoms/pages/deep-dive/workflow/what-are-variables.mdx b/fern/products/atoms/pages/deep-dive/workflow/what-are-variables.mdx index 1100281..18aa5b2 100644 --- a/fern/products/atoms/pages/deep-dive/workflow/what-are-variables.mdx +++ b/fern/products/atoms/pages/deep-dive/workflow/what-are-variables.mdx @@ -54,7 +54,7 @@ When a user says, "My name is Sarah," the agent can automatically store "Sarah" ### 3. Audience Variables -Audience variables are created automatically from the columns in your uploaded [Audience CSV](/deep-dive/audience/audience) file. Each column header in your CSV becomes a variable name that's accessible throughout your workflow. +Audience variables are created automatically from the columns in your uploaded [Audience CSV](/atoms/atoms-platform/deployment/audiences) file. Each column header in your CSV becomes a variable name that's accessible throughout your workflow. **How Audience Variables Work:** diff --git a/fern/products/atoms/pages/deep-dive/workflow/what-is-a-workflow.mdx b/fern/products/atoms/pages/deep-dive/workflow/what-is-a-workflow.mdx index 1effa43..225e3f7 100644 --- a/fern/products/atoms/pages/deep-dive/workflow/what-is-a-workflow.mdx +++ b/fern/products/atoms/pages/deep-dive/workflow/what-is-a-workflow.mdx @@ -28,8 +28,8 @@ into sub-goals or **nodes**, and based on the intent or **branches** move to the The above image describes a simple workflow of atoms. -It contains 3 [default nodes](/deep-dive/workflow/types-of-nodes/default), 2 [end call nodes](/deep-dive/workflow/types-of-nodes/end-call) -and 5 [branches](/deep-dive/workflow/what-is-a-branch.mdx). +It contains 3 [default nodes](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types), 2 [end call nodes](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types) +and 5 [branches](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/conditions). In this example, we show a simple Insurance lead generation workflow. diff --git a/fern/products/atoms/pages/dev/build/agents/agent-configuration/llm-settings.mdx b/fern/products/atoms/pages/dev/build/agents/agent-configuration/llm-settings.mdx index 54a7e9f..4b2a152 100644 --- a/fern/products/atoms/pages/dev/build/agents/agent-configuration/llm-settings.mdx +++ b/fern/products/atoms/pages/dev/build/agents/agent-configuration/llm-settings.mdx @@ -147,13 +147,13 @@ agent = CreateAgentRequest( For the complete Waves voice library with audio samples: - → [Waves Voice Models](https://waves-docs.smallest.ai/content/getting-started/models) + → [Waves Voice Models](/waves/documentation/getting-started/models) ### Voice Cloning Create custom voices from audio samples: -→ [Waves Voice Cloning Guide](https://waves-docs.smallest.ai/content/voice-cloning/types-of-clone) +→ [Waves Voice Cloning Guide](/waves/documentation/voice-cloning/types-of-clone) ### Third-Party Providers diff --git a/fern/products/atoms/pages/dev/build/agents/agent-configuration/overview.mdx b/fern/products/atoms/pages/dev/build/agents/agent-configuration/overview.mdx index 461bcba..df62c46 100644 --- a/fern/products/atoms/pages/dev/build/agents/agent-configuration/overview.mdx +++ b/fern/products/atoms/pages/dev/build/agents/agent-configuration/overview.mdx @@ -27,13 +27,13 @@ The SDK provides sensible defaults, so you can start simple and tune later. ## What's Next - + Craft effective system prompts that define behavior. - + Model selection, temperature, and streaming. - + Run local models with Ollama, vLLM, or custom servers. diff --git a/fern/products/atoms/pages/dev/build/agents/agent-configuration/prompts.mdx b/fern/products/atoms/pages/dev/build/agents/agent-configuration/prompts.mdx index da3ab1e..2c15d4a 100644 --- a/fern/products/atoms/pages/dev/build/agents/agent-configuration/prompts.mdx +++ b/fern/products/atoms/pages/dev/build/agents/agent-configuration/prompts.mdx @@ -388,3 +388,8 @@ Summarize what was done, confirm next steps, thank them. The best prompts feel like onboarding a new teammate. You're not giving them a script; you're explaining who they are, what they're trying to achieve, how they should behave, and what tools they have. Then you trust them to handle the conversation. **Clear structure + specific guidance + room for adaptation = an agent users enjoy talking to.** + + + Want an exhaustive, line-by-line prompt breakdown? See the dedicated guide in our cookbook: + [Voice Agent Prompting Guide](https://github.com/smallest-inc/cookbook/blob/main/best-practices/voice_agent_prompting_guide.md). + diff --git a/fern/products/atoms/pages/dev/build/agents/agent-configuration/voice-settings.mdx b/fern/products/atoms/pages/dev/build/agents/agent-configuration/voice-settings.mdx index 7e38731..5b1dbfc 100644 --- a/fern/products/atoms/pages/dev/build/agents/agent-configuration/voice-settings.mdx +++ b/fern/products/atoms/pages/dev/build/agents/agent-configuration/voice-settings.mdx @@ -193,10 +193,10 @@ async def start(self, init_event, task_manager): ## Next Steps - + Give your agent capabilities beyond conversation. - + Learn advanced behavior patterns. diff --git a/fern/products/atoms/pages/dev/build/agents/agent-patterns/interruption-handling.mdx b/fern/products/atoms/pages/dev/build/agents/agent-patterns/interruption-handling.mdx index 78d416c..baaf04e 100644 --- a/fern/products/atoms/pages/dev/build/agents/agent-patterns/interruption-handling.mdx +++ b/fern/products/atoms/pages/dev/build/agents/agent-patterns/interruption-handling.mdx @@ -210,7 +210,7 @@ class ToolAgent(OutputAgentNode): ], }, *[ - {"role": "tool", "tool_call_id": tc.id, "content": str(result)} + {"role": "tool", "tool_call_id": tc.id, "content": result.content} for tc, result in zip(tool_calls, results) ], ]) diff --git a/fern/products/atoms/pages/dev/build/agents/building-first-agent/complete-walkthrough.mdx b/fern/products/atoms/pages/dev/build/agents/building-first-agent/complete-walkthrough.mdx index 094c1f7..86b9db6 100644 --- a/fern/products/atoms/pages/dev/build/agents/building-first-agent/complete-walkthrough.mdx +++ b/fern/products/atoms/pages/dev/build/agents/building-first-agent/complete-walkthrough.mdx @@ -170,13 +170,29 @@ class OrderAssistant(OutputAgentNode): if tool_calls: results = await self.tool_registry.execute(tool_calls, parallel=True) - # Add tool results to context - for result in results: - self.context.add_message({ - "role": "tool", - "tool_call_id": result.tool_call_id, - "content": result.content - }) + # Add assistant tool_calls and results to context + self.context.add_messages([ + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.name, + "arguments": str(tc.arguments), + }, + } + for tc in tool_calls + ], + }, + *[ + {"role": "tool", "tool_call_id": tc.id, "content": result.content} + for tc, result in zip(tool_calls, results) + ], + ]) + followup = await self.llm.chat( messages=self.context.messages, stream=True @@ -318,15 +334,31 @@ class OrderAssistant(OutputAgentNode): if chunk.tool_calls: tool_calls.extend(chunk.tool_calls) + if tool_calls: results = await self.tool_registry.execute(tool_calls, parallel=True) - # Add tool results to context - for result in results: - self.context.add_message({ - "role": "tool", - "tool_call_id": result.tool_call_id, - "content": result.content - }) + # Add assistant tool_calls and results to context + self.context.add_messages([ + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.name, + "arguments": str(tc.arguments), + }, + } + for tc in tool_calls + ], + }, + *[ + {"role": "tool", "tool_call_id": tc.id, "content": result.content} + for tc, result in zip(tool_calls, results) + ], + ]) followup = await self.llm.chat( messages=self.context.messages, @@ -364,10 +396,10 @@ In this guide, you: ## Next Steps - + Craft better system prompts and agent personas. - + Learn advanced tool patterns and best practices. diff --git a/fern/products/atoms/pages/dev/build/agents/debugging-testing/common-issues.mdx b/fern/products/atoms/pages/dev/build/agents/debugging-testing/common-issues.mdx index f017894..6dfb87c 100644 --- a/fern/products/atoms/pages/dev/build/agents/debugging-testing/common-issues.mdx +++ b/fern/products/atoms/pages/dev/build/agents/debugging-testing/common-issues.mdx @@ -220,7 +220,7 @@ async def generate_response(self): If you cannot resolve an issue: 1. Check the [Discord](https://discord.gg/smallest) for community help -2. Search existing [GitHub issues](https://github.com/smallest-ai/smallest-python-sdk/issues) +2. Search existing [GitHub issues](https://github.com/smallest-inc/smallest-python-sdk/issues) 3. Open a new issue with: - Python version - SDK version (`pip show smallestai`) @@ -230,10 +230,10 @@ If you cannot resolve an issue: ## Next Steps - + See complete working examples. - + Get help from the community. diff --git a/fern/products/atoms/pages/dev/build/agents/debugging-testing/logging-observability.mdx b/fern/products/atoms/pages/dev/build/agents/debugging-testing/logging-observability.mdx index 2a7b93b..05e668b 100644 --- a/fern/products/atoms/pages/dev/build/agents/debugging-testing/logging-observability.mdx +++ b/fern/products/atoms/pages/dev/build/agents/debugging-testing/logging-observability.mdx @@ -262,10 +262,10 @@ DEBUG=true python agent.py ## Next Steps - + Troubleshoot common problems. - + See complete working examples. diff --git a/fern/products/atoms/pages/dev/build/agents/debugging-testing/overview.mdx b/fern/products/atoms/pages/dev/build/agents/debugging-testing/overview.mdx index 4617ef3..996661a 100644 --- a/fern/products/atoms/pages/dev/build/agents/debugging-testing/overview.mdx +++ b/fern/products/atoms/pages/dev/build/agents/debugging-testing/overview.mdx @@ -24,10 +24,10 @@ The SDK gives you visibility into what's happening at every step. ## What's Next - + Trace execution flow and debug issues. - + Troubleshooting FAQ and fixes. diff --git a/fern/products/atoms/pages/dev/build/agents/overview.mdx b/fern/products/atoms/pages/dev/build/agents/overview.mdx index 53395c1..4e05842 100644 --- a/fern/products/atoms/pages/dev/build/agents/overview.mdx +++ b/fern/products/atoms/pages/dev/build/agents/overview.mdx @@ -27,19 +27,35 @@ This happens continuously, creating a natural back-and-forth conversation. | **Multi-Provider LLM** | Use OpenAI, Anthropic, or bring your own model. | | **Production Ready** | Deploy with one command. Handle thousands of concurrent calls. | +## Node Types + +The SDK provides three node types for building agents: + +| Node | Purpose | +|------|---------| +| `Node` | Base primitive for routing, logging, and custom logic | +| `OutputAgentNode` | Conversational agent that speaks to users | +| `BackgroundAgentNode` | Silent observer for analytics and monitoring | + + + Deep dive into node architecture, when to use each type, and how to build custom nodes. + + +--- + ## What's Next - + Set up prompts and LLM settings. - + Give your agent actions and data access. - + Conversation flows, interruptions, multi-agent. - + Test locally and debug issues. diff --git a/fern/products/atoms/pages/dev/build/agents/tools-functions/built-in-tools.mdx b/fern/products/atoms/pages/dev/build/agents/tools-functions/built-in-tools.mdx index 95d6a20..6018b9d 100644 --- a/fern/products/atoms/pages/dev/build/agents/tools-functions/built-in-tools.mdx +++ b/fern/products/atoms/pages/dev/build/agents/tools-functions/built-in-tools.mdx @@ -133,7 +133,7 @@ class SupportAgent(OutputAgentNode): for tc in tool_calls ] }, - *[{"role": "tool", "tool_call_id": tc.id, "content": str(result)} + *[{"role": "tool", "tool_call_id": tc.id, "content": result.content} for tc, result in zip(tool_calls, results)] ]) diff --git a/fern/products/atoms/pages/dev/build/agents/tools-functions/executing-tools.mdx b/fern/products/atoms/pages/dev/build/agents/tools-functions/executing-tools.mdx index 1ad8c94..3997c22 100644 --- a/fern/products/atoms/pages/dev/build/agents/tools-functions/executing-tools.mdx +++ b/fern/products/atoms/pages/dev/build/agents/tools-functions/executing-tools.mdx @@ -104,7 +104,7 @@ if tool_calls: ], }, *[ - {"role": "tool", "tool_call_id": tc.id, "content": str(result)} + {"role": "tool", "tool_call_id": tc.id, "content": result.content} for tc, result in zip(tool_calls, results) ], ]) diff --git a/fern/products/atoms/pages/dev/build/agents/tools-functions/overview.mdx b/fern/products/atoms/pages/dev/build/agents/tools-functions/overview.mdx index 215085b..f7053c3 100644 --- a/fern/products/atoms/pages/dev/build/agents/tools-functions/overview.mdx +++ b/fern/products/atoms/pages/dev/build/agents/tools-functions/overview.mdx @@ -33,13 +33,13 @@ The LLM never runs your code directly—it just tells you what to run and with w ## What's Next - + The `@function_tool` decorator, docstrings, and type hints. - + ToolRegistry, handling calls, and feeding results back. - + SDK-provided actions: ending calls, transferring to humans. diff --git a/fern/products/atoms/pages/dev/build/analytics/call-metrics.mdx b/fern/products/atoms/pages/dev/build/analytics/call-metrics.mdx index 9ae337f..c320186 100644 --- a/fern/products/atoms/pages/dev/build/analytics/call-metrics.mdx +++ b/fern/products/atoms/pages/dev/build/analytics/call-metrics.mdx @@ -1,166 +1,227 @@ --- title: "Call Metrics" sidebarTitle: "Call Metrics" -description: "Track call duration, status, and transcripts." +description: "Retrieve call details, transcripts, recordings, and performance data." --- -Retrieve detailed metrics for individual calls. +Access detailed information about every call through the SDK. -## Getting Call Details +## Getting Recent Calls + +Fetch a paginated list of calls: ```python -import requests -import os +from smallestai.atoms.call import Call -API_KEY = os.getenv("SMALLEST_API_KEY") -call_id = "CALL-1768155029217-0bae45" +call = Call() -response = requests.get( - f"https://atoms.smallest.ai/api/v1/conversation/{call_id}", - headers={"Authorization": f"Bearer {API_KEY}"} -) -data = response.json()["data"] +calls = call.get_calls(limit=5) -print(f"Call ID: {data['callId']}") -print(f"Duration: {data['duration']} seconds") -print(f"Status: {data['status']}") +for log in calls["data"]["logs"]: + print(f"{log['callId']}: {log['status']} ({log['duration']}s)") ``` -## Call Duration - -```python -duration_seconds = data["duration"] -duration_minutes = duration_seconds / 60 - -print(f"Call lasted {duration_minutes:.1f} minutes") +The response includes pagination info: + +```json +{ + "status": true, + "data": { + "logs": [...], + "pagination": { + "total": 94, + "page": 1, + "hasMore": true, + "limit": 5 + } + } +} ``` -## Call Status +--- + +## Filtering Calls -Check call outcomes: +Narrow results using filter parameters: ```python -status = data["status"] - -if status == "completed": - print("Call completed successfully") -elif status == "in_progress": - print("Call is ongoing") -elif status == "failed": - print(f"Call failed: {data.get('callFailureReason', 'Unknown')}") -``` +from smallestai.atoms.call import Call -## Call Types +call = Call() -```python -call_type = data["type"] +# By status +completed = call.get_calls(status="completed", limit=10) -# Possible values: -# - 'telephony_inbound' - Incoming call -# - 'telephony_outbound' - Outgoing call -# - 'chat' - Text conversation -``` +# By agent +agent_calls = call.get_calls(agent_id="696ddd281ea16a73cb8aafbe", limit=10) -## Phone Numbers +# By campaign +campaign_calls = call.get_calls(campaign_id="696ddd2a04ff172dbd8eddad", limit=10) -```python -caller = data["from"] # Who initiated -callee = data["to"] # Who received +# By call type +outbound = call.get_calls(call_type="telephony_outbound", limit=10) -print(f"From: {caller}") -print(f"To: {callee}") +# By phone number +found = call.get_calls(search="+916366821717", limit=10) ``` -## Transcripts +### Filter Parameters + +| Parameter | Type | Description | +|-----------|------|-------------| +| `agent_id` | string | Filter by agent ID | +| `campaign_id` | string | Filter by campaign ID | +| `page` | int | Page number (default: 1) | +| `limit` | int | Results per page (default: 10) | +| `status` | string | completed, failed, in_progress, no_answer, busy | +| `call_type` | string | telephony_inbound, telephony_outbound, chat | +| `search` | string | Match callId, fromNumber, or toNumber | + +--- + +## Getting Single Call Details -Get the full conversation: +Retrieve complete details for one call: ```python -transcript = data["transcript"] +from smallestai.atoms.call import Call + +call = Call() + +call_id = "CALL-1768807723178-4561d0" +details = call.get_call(call_id) -for message in transcript: - role = message.get("role", "unknown") - content = message.get("content", "") - print(f"{role}: {content}") +data = details["data"] +print(f"Status: {data['status']}") +print(f"Duration: {data['duration']} seconds") +print(f"From: {data['from']} → To: {data['to']}") ``` -## Recording URL +### Response Fields + +| Field | Description | +|-------|-------------| +| `callId` | Unique call identifier | +| `status` | completed, failed, in_progress, no_answer | +| `duration` | Length in seconds | +| `type` | telephony_outbound, telephony_inbound, chat | +| `from` / `to` | Phone numbers | +| `transcript` | Array of conversation messages | +| `recordingUrl` | Mono audio file URL | +| `recordingDualUrl` | Stereo audio file URL | +| `callCost` | Cost in credits | +| `disconnectionReason` | user_hangup, agent_hangup, timeout | +| `postCallAnalytics` | AI summary and extracted metrics | -Access the call recording: +--- + +## Accessing Transcripts + +The transcript is an array of messages with speaker roles: ```python -recording_url = data["recordingUrl"] -dual_url = data["recordingDualUrl"] # Stereo version +details = call.get_call(call_id) +transcript = details["data"].get("transcript", []) -if recording_url: - print(f"Recording: {recording_url}") +for msg in transcript: + print(f"{msg['role']}: {msg['content']}") ``` -## Call Cost +--- + +## Accessing Recordings + +Recordings are available after the call completes: ```python -cost = data["callCost"] -print(f"Call cost: {cost} credits") +details = call.get_call(call_id) +data = details["data"] + +if data.get("recordingUrl"): + print(f"Mono: {data['recordingUrl']}") + +if data.get("recordingDualUrl"): + print(f"Stereo: {data['recordingDualUrl']}") ``` -## Call Events +--- + +## Batch Search -View the call lifecycle: +Fetch multiple calls at once with `search_calls()`: ```python -events = data["events"] +call_ids = ["CALL-1768807723178-4561d0", "CALL-1768807723177-4561cd"] +result = call.search_calls(call_ids) -for event in events: - event_type = event["eventType"] - event_time = event["eventTime"] - print(f"{event_time}: {event_type}") +print(f"Found {result['data']['total']} of {len(call_ids)} calls") ``` -Common event types: -- `call_pending` - Call initiated -- `call_queued` - Waiting for agent -- `in_progress` - Call connected -- `completed` - Call ended normally -- `failed` - Call failed + +Maximum 100 call IDs per request. + -## Batch Analysis +--- + +## Performance Metrics -Analyze multiple calls: +Each call includes latency breakdowns: ```python -def analyze_calls(call_ids): - total_duration = 0 - completed = 0 - - for call_id in call_ids: - resp = requests.get( - f"https://atoms.smallest.ai/api/v1/conversation/{call_id}", - headers={"Authorization": f"Bearer {API_KEY}"} - ) - data = resp.json()["data"] - - total_duration += data.get("duration", 0) - if data.get("status") == "completed": - completed += 1 - - avg_duration = total_duration / len(call_ids) if call_ids else 0 - completion_rate = completed / len(call_ids) * 100 if call_ids else 0 - - print(f"Average duration: {avg_duration:.1f}s") - print(f"Completion rate: {completion_rate:.1f}%") +details = call.get_call(call_id) +data = details["data"] + +print(f"Transcriber: {data.get('average_transcriber_latency')}ms") +print(f"Agent (LLM): {data.get('average_agent_latency')}ms") +print(f"Synthesizer: {data.get('average_synthesizer_latency')}ms") ``` -## Export Data +| Metric | Description | +|--------|-------------| +| `average_transcriber_latency` | Speech-to-text processing time | +| `average_agent_latency` | LLM response generation time | +| `average_synthesizer_latency` | Text-to-speech processing time | -```python -import json +--- -# Export to JSON -with open("call_log.json", "w") as f: - json.dump(data, f, indent=2) +## SDK Reference -# Export transcript to text -with open("transcript.txt", "w") as f: - for msg in data.get("transcript", []): - f.write(f"{msg.get('role', '')}: {msg.get('content', '')}\n") -``` +| Method | Description | +|--------|-------------| +| `get_calls(...)` | List calls with optional filters | +| `get_call(call_id)` | Get single call with all details | +| `search_calls(call_ids)` | Batch fetch by call IDs | + +--- + +## Tips + + + + `status` is the outcome (completed, failed). `disconnectionReason` explains *why* it ended: + - `user_hangup` — Caller hung up + - `agent_hangup` — Agent ended the call + - `dial_no_answer` — No pickup + - `timeout` — Call timeout + + + Recordings generate after the call ends. They may take a few seconds to appear. Check if `recordingUrl` is non-empty before accessing. + + + Fetch calls with `get_calls()`, then calculate: + ```python + calls = call.get_calls(status="completed", limit=100) + durations = [log["duration"] for log in calls["data"]["logs"]] + avg = sum(durations) / len(durations) + ``` + + + Use the `search` parameter: + ```python + calls = call.get_calls(search="+916366821717") + ``` + + + Currently, use pagination and filter client-side by `createdAt`. Date range filters are coming soon. + + diff --git a/fern/products/atoms/pages/dev/build/analytics/overview.mdx b/fern/products/atoms/pages/dev/build/analytics/overview.mdx index 65b6401..39c7b52 100644 --- a/fern/products/atoms/pages/dev/build/analytics/overview.mdx +++ b/fern/products/atoms/pages/dev/build/analytics/overview.mdx @@ -1,56 +1,48 @@ --- title: "Analytics" sidebarTitle: "Overview" -description: "Track call metrics and agent performance." +description: "Track every call, measure performance, and extract insights." --- -Retrieve conversation logs to analyze call metrics, view transcripts, and monitor your agents. - -## Quick Example - -```python -import requests -import os - -API_KEY = os.getenv("SMALLEST_API_KEY") - -# Get conversation details -response = requests.get( - f"https://atoms.smallest.ai/api/v1/conversation/{call_id}", - headers={"Authorization": f"Bearer {API_KEY}"} -) -data = response.json()["data"] - -print(f"Duration: {data['duration']} seconds") -print(f"Status: {data['status']}") -print(f"Type: {data['type']}") -print(f"Recording: {data['recordingUrl']}") -``` - -## Available Metrics - -| Metric | Field | Description | -|--------|-------|-------------| -| Call ID | `callId` | Unique call identifier | -| Status | `status` | in_progress, completed, failed | -| Duration | `duration` | Length in seconds | -| Type | `type` | telephony_outbound, telephony_inbound, chat | -| Transcript | `transcript` | Array of conversation messages | -| Recording | `recordingUrl` | Link to mono audio | -| Dual Recording | `recordingDualUrl` | Link to stereo audio | -| Cost | `callCost` | Call cost in credits | -| From | `from` | Caller phone number | -| To | `to` | Recipient phone number | -| Events | `events` | Call lifecycle events | -| Agent | `agent` | Agent configuration used | +**Analytics** gives you complete visibility into what happens during and after every call your agents handle. + +## What Can You Track? + +Every call generates rich data that you can access programmatically: + +| Category | Data Available | +|----------|----------------| +| **Call Details** | Duration, status, phone numbers, timestamps | +| **Transcript** | Full conversation text with speaker labels | +| **Recordings** | Mono and stereo audio files | +| **AI Summaries** | Auto-generated call summaries | +| **Disposition Metrics** | Extracted data points you define | +| **Performance** | Latency breakdowns per component | +| **Cost** | Credits consumed per call | + +## How It Works + +1. **During the call** — Events are logged in real-time (status changes, transcription, speech) +2. **After the call** — AI processes the transcript to generate summaries and extract configured metrics +3. **On demand** — SDK methods let you query, filter, and search call data + +## SDK Methods + +| Method | Purpose | +|--------|---------| +| `get_calls()` | List calls with filters | +| `get_call(id)` | Get single call details | +| `search_calls(ids)` | Batch fetch by call IDs | +| `get_post_call_config(agent_id)` | Get agent's analytics config | +| `set_post_call_config(agent_id, ...)` | Configure post-call extraction | ## What's Next - - Track call duration, status, and transcripts. + + Retrieve calls, transcripts, recordings, and performance data. - - Monitor latency and identify bottlenecks. + + Configure AI summaries and disposition metrics. diff --git a/fern/products/atoms/pages/dev/build/analytics/post-call-analytics.mdx b/fern/products/atoms/pages/dev/build/analytics/post-call-analytics.mdx new file mode 100644 index 0000000..a311b15 --- /dev/null +++ b/fern/products/atoms/pages/dev/build/analytics/post-call-analytics.mdx @@ -0,0 +1,252 @@ +--- +title: "Post-Call Analytics" +sidebarTitle: "Post-Call Analytics" +description: "Configure AI summaries and disposition metrics for automated call insights." +--- + +After each call, the platform automatically generates analytics. You can configure what data to extract. + +## What Gets Generated + +Every completed call includes `postCallAnalytics`: + +| Field | Description | +|-------|-------------| +| `summary` | AI-generated call summary | +| `dispositionMetrics` | Extracted data points you configure | + +--- + +## Accessing Post-Call Data + +```python +from smallestai.atoms.call import Call + +call = Call() + +# Get call details +details = call.get_call("CALL-1768842587790-69eb58") +data = details["data"] + +# Access analytics +analytics = data.get("postCallAnalytics", {}) + +print(f"Summary: {analytics.get('summary')}") + +for metric in analytics.get("dispositionMetrics", []): + print(f" {metric['identifier']}: {metric['value']}") + print(f" Confidence: {metric['confidence']}") + print(f" Reasoning: {metric['reasoning']}") +``` + +### Example Output + +``` +Summary: The call involved an agent reaching out to discuss AI products. +The user expressed interest and provided their name. + + user_interested: yes + Confidence: 1 + Reasoning: The user explicitly stated 'I am interested'. + user_name: John + Confidence: 1 + Reasoning: The user provided their name directly. +``` + +--- + +## Configuring Disposition Metrics + +Use `set_post_call_config()` to define what data to extract: + +```python +from smallestai.atoms.call import Call + +call = Call() + +call.set_post_call_config( + agent_id="696e655577e1d88ff54b4fbf", + summary_prompt="Summarize this sales call briefly.", + disposition_metrics=[ + { + "identifier": "user_interested", + "dispositionMetricPrompt": "Was the user interested? yes, no, or unclear", + "dispositionValues": {"type": "ENUM"}, + "choices": ["yes", "no", "unclear"] + }, + { + "identifier": "user_name", + "dispositionMetricPrompt": "What is the user's name? Return 'unknown' if not mentioned.", + "dispositionValues": {"type": "STRING"} + } + ] +) +``` + +--- + +## Disposition Metric Types + +| Type | Description | Requires `choices` | +|------|-------------|-------------------| +| `STRING` | Free text (names, notes) | No | +| `BOOLEAN` | Yes/No values | No | +| `INTEGER` | Numeric values (ratings) | No | +| `ENUM` | Selection from predefined list | Yes | +| `DATETIME` | Date/time values | No | + +--- + +## Metric Configuration Schema + +Each disposition metric requires: + +| Field | Required | Description | +|-------|----------|-------------| +| `identifier` | Yes | Unique ID (e.g., `customer_status`) | +| `dispositionMetricPrompt` | Yes | Question to extract this data | +| `dispositionValues.type` | Yes | STRING, BOOLEAN, INTEGER, ENUM, DATETIME | +| `choices` | For ENUM | List of allowed values | + +--- + +## Getting Current Configuration + +```python +config = call.get_post_call_config("696e655577e1d88ff54b4fbf") + +print("Configured metrics:") +for metric in config["data"].get("dispositionMetrics", []): + print(f" {metric['identifier']}: {metric['dispositionMetricType']}") +``` + +--- + +## Complete Example: Sales Call Analytics + +```python +import time +from smallestai.atoms import AtomsClient +from smallestai.atoms.call import Call +from smallestai.atoms.audience import Audience +from smallestai.atoms.campaign import Campaign + +client = AtomsClient() +call = Call() +audience = Audience() +campaign = Campaign() + +# 1. Create agent +agent = client.new_agent( + name=f"Sales Agent {int(time.time())}", + prompt="You are a sales agent. Ask if interested and get their name.", + description="Testing disposition metrics" +) +agent_id = agent.data + +# 2. Configure disposition metrics +call.set_post_call_config( + agent_id=agent_id, + summary_prompt="Summarize this sales call briefly.", + disposition_metrics=[ + { + "identifier": "user_interested", + "dispositionMetricPrompt": "Was the user interested? yes, no, or unclear", + "dispositionValues": {"type": "ENUM"}, + "choices": ["yes", "no", "unclear"] + }, + { + "identifier": "user_name", + "dispositionMetricPrompt": "What is the user's name? Return 'unknown' if not mentioned.", + "dispositionValues": {"type": "STRING"} + } + ] +) + +# 3. Create audience and campaign +phones = client.get_phone_numbers() +phone_id = phones["data"][0]["_id"] + +aud = audience.create( + name=f"Test Audience {int(time.time())}", + phone_numbers=["+916366821717"], + names=[("Test", "User")] +) +audience_id = aud["data"]["_id"] + +camp = campaign.create( + name=f"Analytics Test {int(time.time())}", + agent_id=agent_id, + audience_id=audience_id, + phone_ids=[phone_id] +) +campaign_id = camp["data"]["_id"] + +# 4. Start campaign +campaign.start(campaign_id) +print("Call in progress...") + +# 5. Wait for completion +time.sleep(60) + +# 6. Get call with analytics +calls = call.get_calls(agent_id=agent_id, limit=1) +call_id = calls["data"]["logs"][0]["callId"] + +details = call.get_call(call_id) +data = details["data"] + +# 7. Display results +print(f"\nCall Status: {data['status']}") +print(f"Duration: {data['duration']}s") + +print("\nTranscript:") +for line in data.get("transcript", []): + print(f" [{line['role'].upper()}]: {line['content']}") + +analytics = data.get("postCallAnalytics", {}) +if analytics: + print(f"\nSummary: {analytics.get('summary')}") + print("\nDisposition Metrics:") + for m in analytics.get("dispositionMetrics", []): + print(f" {m['identifier']}: {m['value']}") + print(f" Confidence: {m['confidence']}") + print(f" Reasoning: {m['reasoning']}") + +# 8. Cleanup +campaign.delete(campaign_id) +audience.delete(audience_id) +client.delete_agent(id=agent_id) +``` + +--- + +## SDK Reference + +| Method | Description | +|--------|-------------| +| `call.get_post_call_config(agent_id)` | Get agent's analytics config | +| `call.set_post_call_config(agent_id, ...)` | Configure summary and disposition metrics | +| `call.get_call(call_id)` | Get call details with analytics | +| `call.get_calls(agent_id=..., limit=...)` | List calls with optional filters | + +--- + +## Tips + + + + Disposition metrics are extracted after the call ends, typically within 10-30 seconds. The AI analyzes the transcript based on your configured prompts. + + + Be specific and direct. Instead of "What happened?", use: + - "Did the customer agree to schedule a follow-up? Answer yes or no." + - "What is the customer's email? Return 'not provided' if not mentioned." + + + Yes. New calls use the updated config. Existing calls keep their original analytics. + + + The metric will have an empty or null value. Specify fallback behavior in your prompts, like "Return 'unknown' if not mentioned." + + diff --git a/fern/products/atoms/pages/dev/build/analytics/sse-live-transcripts.mdx b/fern/products/atoms/pages/dev/build/analytics/sse-live-transcripts.mdx new file mode 100644 index 0000000..8326a7f --- /dev/null +++ b/fern/products/atoms/pages/dev/build/analytics/sse-live-transcripts.mdx @@ -0,0 +1,227 @@ +--- +title: "SSE for Live Transcripts" +sidebarTitle: "SSE for Live Transcripts" +description: "Real-time streaming of user speech (STT) and agent speech (TTS) events for an active call via Server-Sent Events." +--- + +Real-time streaming of user speech (STT) and agent speech (TTS) events for an active call via Server-Sent Events. + +## Endpoint + +**GET** `/api/v1/events?callId={callId}` + +| Detail | Value | +|--------|--------| +| **Protocol** | Server-Sent Events (SSE) | +| **Authentication** | Cookie-based (logged-in dashboard user) | +| **Authorization** | User must belong to the org that owns the agent | +| **Content-Type** | `text/event-stream` | + +### Query Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `callId` | string | Yes | The call ID to subscribe events for | + +### Errors + +| Status | Condition | +|--------|-----------| +| 400 | Missing or invalid callId | +| 400 | Call is already completed | +| 404 | Not authorized / org mismatch | +| 404 | Call log or agent not found | + +## Event Types (Transcript-related) + +All events are sent as `data: \n\n`. The three transcript-relevant event types are: + +### 1. user_interim_transcription + +Fired as the user is speaking. Contains partial, in-progress transcription text. These events are emitted frequently and may change as more audio is processed. + +```json +{ + "event_type": "user_interim_transcription", + "event_id": "evt_abc123", + "timestamp": "2026-03-02T10:00:01.123Z", + "call_id": "CALL-1758124225863-80752e", + "interim_transcription_text": "I wanted to ask about my" +} +``` + +### 2. user_transcription + +Fired when the user finishes a speech segment. Contains the final transcription for that turn. + +```json +{ + "event_type": "user_transcription", + "event_id": "evt_abc456", + "timestamp": "2026-03-02T10:00:02.456Z", + "call_id": "CALL-1758124225863-80752e", + "user_transcription_text": "I wanted to ask about my recent order" +} +``` + +### 3. tts_completed + +Fired when the agent finishes speaking a TTS segment. Contains the text that was spoken and optionally the TTS latency. + +```json +{ + "event_type": "tts_completed", + "event_id": "evt_abc789", + "timestamp": "2026-03-02T10:00:03.789Z", + "call_id": "CALL-1758124225863-80752e", + "tts_latency": 245, + "tts_text": "Sure, I can help you with your recent order. Could you provide your order number?" +} +``` + +## Lifecycle Events + +### sse_init + +Sent immediately when the SSE connection is established. + +```json +{ + "event_type": "sse_init", + "event_time": "2026-03-02T10:00:00.000Z" +} +``` + +### sse_close + +Sent when the call ends, right before the server closes the connection. + +```json +{ + "event_type": "sse_close", + "event_time": "2026-03-02T10:05:00.000Z" +} +``` + +## Usage Examples + +### cURL + +```bash +curl -N 'https://api.smallest.ai/atoms/v1/events?callId=CALL-1758124225863-80752e' \ + -H 'Cookie: your_session_cookie' \ + -H 'Accept: text/event-stream' +``` + +### JavaScript (Browser) + +```javascript +const callId = "CALL-1758124225863-80752e"; +const evtSource = new EventSource( + `/api/v1/events?callId=${callId}`, + { withCredentials: true } +); + +evtSource.onmessage = (event) => { + const data = JSON.parse(event.data); + + switch (data.event_type) { + case "user_interim_transcription": + // Update live transcription preview (partial, will change) + console.log("[STT interim]", data.interim_transcription_text); + break; + + case "user_transcription": + // Final user speech for this turn + console.log("[STT final]", data.user_transcription_text); + break; + + case "tts_completed": + // Agent finished speaking this segment + console.log("[TTS]", data.tts_text); + break; + + case "sse_close": + // Call ended, clean up + evtSource.close(); + break; + } +}; + +evtSource.onerror = (err) => { + console.error("SSE connection error:", err); + evtSource.close(); +}; +``` + +### Node.js + +```javascript +import EventSource from "eventsource"; + +const BASE_URL = "https://api.smallest.ai/atoms/v1"; +const callId = "CALL-1758124225863-80752e"; + +const es = new EventSource( + `${BASE_URL}/events?callId=${callId}`, + { headers: { Cookie: "your_session_cookie" } } +); + +es.onmessage = (event) => { + const data = JSON.parse(event.data); + + if (data.event_type === "user_transcription") { + console.log(`User said: ${data.user_transcription_text}`); + } + + if (data.event_type === "tts_completed") { + console.log(`Agent said: ${data.tts_text}`); + } + + if (data.event_type === "sse_close") { + es.close(); + } +}; +``` + +### Python + +```python +import json +import requests + +url = "https://api.smallest.ai/atoms/v1/events" +params = {"callId": "CALL-1758124225863-80752e"} +headers = { + "Cookie": "your_session_cookie", + "Accept": "text/event-stream", +} + +with requests.get(url, params=params, headers=headers, stream=True) as resp: + for line in resp.iter_lines(decode_unicode=True): + if not line or not line.startswith("data: "): + continue + + data = json.loads(line[len("data: "):]) + + if data["event_type"] == "user_interim_transcription": + print(f"[STT interim] {data['interim_transcription_text']}") + + elif data["event_type"] == "user_transcription": + print(f"[STT final] {data['user_transcription_text']}") + + elif data["event_type"] == "tts_completed": + print(f"[TTS] {data['tts_text']}") + + elif data["event_type"] == "sse_close": + print("Call ended.") + break +``` + +## Notes + +- The connection is **real-time** — events stream directly from the call runtime as they are produced. +- **Interim transcriptions** are partial and will be superseded by the final `user_transcription` event. Use them for live preview UI only. +- The SSE connection **auto-closes** when the call ends (`sse_close` event). The server will also terminate the connection. +- **Only active calls** can be subscribed to. Completed calls return a 400 error. +- Other event types (e.g. `call_start`, `call_end`, `turn_latency`, metrics) are also sent on this stream but are not covered in this doc. diff --git a/fern/products/atoms/pages/dev/build/campaigns/creating-campaigns.mdx b/fern/products/atoms/pages/dev/build/campaigns/creating-campaigns.mdx index 5598f9f..644af29 100644 --- a/fern/products/atoms/pages/dev/build/campaigns/creating-campaigns.mdx +++ b/fern/products/atoms/pages/dev/build/campaigns/creating-campaigns.mdx @@ -1,6 +1,212 @@ --- -title: "Creating Campaigns" -description: "Create outbound campaigns" +title: "Running Campaigns" +sidebarTitle: "Running Campaigns" +description: "Create, start, monitor, and manage outbound call campaigns." --- -# Creating Campaigns +This guide walks through the complete campaign lifecycle—from setup to cleanup. + +## Prerequisites + +Before creating a campaign, you need: + +- **Agent ID** — Create one with `new_agent()` or use an existing agent +- **Audience ID** — Create one with `create_audience()` ([see Audiences](/atoms/developer-guide/build/campaigns/managing-audiences)) +- **Phone Number ID** — Retrieve available numbers with `get_phone_numbers()` + +--- + +## Getting Your Phone Number + +`get_phone_numbers()` returns all outbound phone numbers available for campaigns: + +```python +from smallestai.atoms import AtomsClient +from smallestai.atoms.campaign import Campaign + +client = AtomsClient() +campaign = Campaign() + +phones = client.get_phone_numbers() +phone_id = phones["data"][0]["_id"] +``` + +Each phone includes provider details: + +```json +{ + "status": true, + "data": [ + { + "_id": "6963d3a8862e1cb702da7244", + "attributes": { + "provider": "plivo", + "phoneNumber": "+912268093560" + }, + "isActive": true + } + ] +} +``` + +--- + +## Creating a Campaign + +`create()` creates a campaign linking your agent, audience, and phone number: + +```python +response = campaign.create( + name="January Outreach", + agent_id=agent_id, + audience_id=audience_id, + phone_ids=[phone_id], + max_retries=2, + retry_delay=30 +) + +campaign_id = response["data"]["_id"] +``` + +The response includes all campaign details: + +```json +{ + "status": true, + "data": { + "name": "January Outreach", + "agentId": "696ddd281ea16a73cb8aafbe", + "audienceId": "696ddd287f45bf7b27344e7c", + "participantsCount": 2, + "maxRetries": 2, + "retryDelay": 30, + "status": "draft", + "_id": "696ddd2a04ff172dbd8eddad" + // ... + } +} +``` + +### Campaign Parameters + +| Parameter | Type | Description | +|-----------|------|-------------| +| `name` | string | Campaign name (required) | +| `agent_id` | string | Agent to handle calls (required) | +| `audience_id` | string | Contacts to dial (required) | +| `phone_ids` | list | Outbound phone number IDs (required) | +| `max_retries` | int | Retry attempts for failed calls (0–10, default: 3) | +| `retry_delay` | int | Minutes between retries (1–1440, default: 15) | + +--- + +## Starting a Campaign + +`start()` begins dialing contacts in the audience: + +```python +result = campaign.start(campaign_id) +``` + +The response confirms the campaign is processing: + +```json +{ + "status": true, + "data": { + "message": "Campaign is being processed", + "taskId": "081a5402-3f1f-4447-a257-6dd7bc6bad60", + "campaignId": "696ddd2a04ff172dbd8eddad" + } +} +``` + +--- + +## Monitoring Progress + +`get()` returns the campaign status, execution history, and metrics: + +```python +status = campaign.get(campaign_id) + +data = status["data"]["campaign"] +metrics = status["data"]["metrics"] + +print(f"Status: {data['status']}") +print(f"Called: {metrics['contacts_called']}/{metrics['total_participants']}") +print(f"Connected: {metrics['contacts_connected']}") +``` + +Full status response: + +```json +{ + "status": true, + "data": { + "campaign": { + "status": "running", + "executions": [ + { + "executionNumber": 1, + "status": "completed", + "totalMembers": 2, + "processedMembers": 2 + } + ] + }, + // ... + "metrics": { + "total_participants": 2, + "contacts_called": 2, + "contacts_connected": 1 + } + } +} +``` + +--- + +## Cleaning Up + +`delete()` removes the campaign and its execution history: + +```python +campaign.delete(campaign_id) +``` + + +Running campaigns should be paused before deletion. Use `pause(id)` first if needed. + + +--- + +## SDK Reference + +| Method | Description | +|--------|-------------| +| `get_phone_numbers()` | List available outbound numbers (via generic client) | +| `create(...)` | Create a campaign | +| `start(id)` | Begin dialing | +| `get(id)` | Get status and metrics | +| `pause(id)` | Pause a running campaign | +| `delete(id)` | Remove a campaign | + +--- + +## Tips + + + + `contacts_called` counts dial attempts. `contacts_connected` counts answered calls. The difference represents voicemail, busy signals, and no-answers. + + + Failed calls enter the retry queue based on your `max_retries` and `retry_delay` settings. After all retries are exhausted, the contact is marked as failed. + + + Yes. Each campaign operates independently. Just ensure you have sufficient phone capacity for parallel dialing. + + + Campaigns cannot change agents after creation. Create a new campaign with the desired agent and audience. + + diff --git a/fern/products/atoms/pages/dev/build/campaigns/managing-audiences.mdx b/fern/products/atoms/pages/dev/build/campaigns/managing-audiences.mdx index d975206..3edbc37 100644 --- a/fern/products/atoms/pages/dev/build/campaigns/managing-audiences.mdx +++ b/fern/products/atoms/pages/dev/build/campaigns/managing-audiences.mdx @@ -1,6 +1,177 @@ --- -title: "Managing Audiences" -description: "Audience lists and segmentation" +title: "Audiences" +sidebarTitle: "Audiences" +description: "Create and manage contact lists for outbound campaigns." --- -# Managing Audiences +An **Audience** is a curated list of phone numbers your campaigns will dial. Think of it as a segment—leads to call, customers to survey, appointments to confirm. + +## Creating an Audience + +Use `create_audience` with a list of phone numbers and optional contact names: + +```python +from smallestai.atoms.audience import Audience + +audience = Audience() + +response = audience.create( + name="Q1 Sales Leads", + phone_numbers=["+916366821717", "+919353662554"], + names=[("John", "Doe"), ("Jane", "Smith")] +) + +audience_id = response["data"]["_id"] +``` + +The response confirms creation with the audience ID: + +```json +{ + "status": true, + "data": { + "name": "Q1 Sales Leads", + "phoneNumberColumnName": "phoneNumber", + "_id": "696ddd56b905442f52b71392" + } +} +``` + + +Phone numbers must use E.164 format: `+` followed by country code and number. Example: `+916366821717` + + +--- + +## Listing Audiences + +`list()` returns all audiences with their campaign associations: + +```python +audiences = audience.list() +``` + +Each audience includes campaign associations and member counts: + +```json +{ + "status": true, + "data": [ + { + "_id": "696ddd56b905442f52b71392", + "name": "Q1 Sales Leads", + "memberCount": 2, + "hasCampaigns": true, + "campaigns": [ + {"_id": "...", "name": "January Outreach", "status": "completed"} + ] + } + ] +} +``` + +--- + +## Viewing Members + +`get_members()` returns a paginated list of contacts in the audience: + +```python +members = audience.get_members( + audience_id=audience_id, + page=1, + offset=10 +) +``` + +Each member stores their contact data: + +```json +{ + "status": true, + "data": { + "members": [ + { + "_id": "696ddd56b905442f52b71394", + "data": { + "firstName": "John", + "lastName": "Doe", + "phoneNumber": "+916366821717" + } + } + ], + "totalCount": 2, + "totalPages": 1, + "hasMore": false + } +} +``` + +--- + +## Adding Members + +`add_contacts()` appends new contacts to an existing audience: + +```python +audience.add_contacts( + audience_id=audience_id, + phone_numbers=["+919876543210"], + names=[("New", "Contact")] +) +``` + +The response reports how many were added: + +```json +{ + "status": true, + "data": [{"message": "1 members added successfully", "data": {"added": 1, "skipped": 0}}] +} +``` + +--- + +## Deleting an Audience + +`delete()` removes the audience and all its members: + +```python +audience.delete(audience_id=audience_id) +``` + + +Audiences with active campaigns cannot be deleted. Complete or delete the campaigns first. + + +--- + +## SDK Reference + +| Method | Description | +|--------|-------------| +| `create(name, phone_numbers, names)` | Create a new audience | +| `list()` | List all audiences | +| `get(id)` | Get a single audience | +| `get_members(id, page, offset)` | Paginated member list | +| `add_contacts(id, phone_numbers, names)` | Append contacts | +| `delete(id)` | Remove an audience | + +--- + +## Tips + + + + E.164 format is required: `+` followed by country code and number with no spaces or dashes. Example: `+14155551234` for US, `+916366821717` for India. + + + Currently, you can add new members but not update existing ones. To change a contact's info, delete and recreate the audience or add the corrected record (duplicates are skipped). + + + Audiences can scale to thousands of contacts. For very large lists, consider splitting into segments for easier management. + + + Members with duplicate phone numbers within the same audience are skipped. Check that each phone number is unique. + + diff --git a/fern/products/atoms/pages/dev/build/campaigns/overview.mdx b/fern/products/atoms/pages/dev/build/campaigns/overview.mdx new file mode 100644 index 0000000..a47b59f --- /dev/null +++ b/fern/products/atoms/pages/dev/build/campaigns/overview.mdx @@ -0,0 +1,84 @@ +--- +title: "Campaigns" +sidebarTitle: "Overview" +description: "Run outbound call campaigns at scale with the Atoms SDK." +--- + +Campaigns let your agents call through a list of contacts automatically. Upload an audience, assign an agent and phone number, then start the campaign — Atoms handles dialing, retries, and tracking. + +--- + +## What You Can Do + +| Action | Method | Description | +|--------|--------|-------------| +| **Create** | `campaign.create()` | Set up a new campaign with agent, audience, and phone number | +| **Start** | `campaign.start()` | Begin dialing contacts in the audience | +| **Pause** | `campaign.pause()` | Temporarily stop dialing (resume later) | +| **Monitor** | `campaign.get()` | Check progress, call counts, and status | +| **Delete** | `campaign.delete()` | Remove a campaign and its data | + +--- + +## How It Works + +``` +Audience (contacts) + Agent + Phone Number + ↓ + Create Campaign + ↓ + Start Campaign + ↓ + Atoms dials each contact → Agent handles the call + ↓ + Monitor progress → Pause/Resume as needed +``` + +--- + +## Prerequisites + +Before running a campaign, you need three things: + +1. **An agent** — the AI that handles each call +2. **An audience** — the list of phone numbers to dial +3. **A phone number** — the outbound number your agent calls from + +```python +from smallestai.atoms import AtomsClient +from smallestai.atoms.campaign import Campaign + +client = AtomsClient() +campaign = Campaign() + +# Get available phone numbers +phones = client.get_phone_numbers() +phone_id = phones[0]["_id"] +``` + +--- + +## Campaign Lifecycle + +Campaigns move through these statuses: + +| Status | Meaning | +|--------|---------| +| **Draft** | Created but not started | +| **Active** | Currently dialing contacts | +| **Paused** | Temporarily stopped | +| **Completed** | All contacts have been called | +| **Failed** | Encountered an error | + +--- + +## What's Next + + + + Create and update contact lists for your campaigns. + + + Full walkthrough: create, start, monitor, and clean up. + + diff --git a/fern/products/atoms/pages/dev/build/knowledge-base/overview.mdx b/fern/products/atoms/pages/dev/build/knowledge-base/overview.mdx index 07253cc..2ee0610 100644 --- a/fern/products/atoms/pages/dev/build/knowledge-base/overview.mdx +++ b/fern/products/atoms/pages/dev/build/knowledge-base/overview.mdx @@ -1,35 +1,69 @@ --- title: "Knowledge Base" sidebarTitle: "Overview" -description: "Give your agents access to your documents and data." +description: "Give your agents accurate, domain-specific knowledge." --- -A **Knowledge Base** (KB) is a searchable container for your content. Upload documents, FAQs, and product information, and your agent can answer questions based on that data automatically. +A **Knowledge Base** is a collection of documents your agent can reference during conversations. Instead of relying solely on the LLM's training data, agents with a knowledge base provide answers grounded in *your* information. -When a user asks a question, the platform searches your KB, finds relevant content, and includes it in the LLM's context—so your agent answers using real information instead of making things up. +## Why Knowledge Bases Matter -## What We Offer +LLMs are powerful but imperfect. They hallucinate facts, go out of date, and know nothing about your specific business. Knowledge bases solve this. -| Feature | Description | -|---------|-------------| -| **Document Upload** | Upload PDFs to be indexed and searchable | -| **Automatic Retrieval** | Platform searches KB when users ask questions | -| **Agent Integration** | Assign KBs to agents via `global_knowledge_base_id` | -| **Multiple KBs** | Create separate KBs for different purposes | +| Problem | Solution | +|---------|----------| +| **Hallucination** | Agent answers from verified documents, not imagination | +| **Stale data** | Update your KB anytime; agent always has current info | +| **Domain gaps** | Upload product specs, policies, FAQs—whatever your domain needs | +| **Consistency** | Every agent using the same KB gives the same answers | + +## How It Works + +When a user asks a question: + +1. The agent searches the knowledge base for relevant content +2. Matching passages are injected into the LLM context +3. The agent responds using this grounded information + +This is called **Retrieval-Augmented Generation (RAG)**—your agent retrieves before it generates. + +--- + +## Use Cases + +- **Product Support** — Upload manuals, troubleshooting guides, warranty policies. Agents answer product questions accurately. +- **Sales Enablement** — Add pricing sheets, feature comparisons, objection handlers. Agents sell with confidence. +- **HR & Onboarding** — Store employee handbooks, benefits info, org charts. New hire questions handled instantly. +- **Legal & Compliance** — Include policies, regulations, approved language. Agents stay within bounds. + +--- + +## Key Concepts + +### One Agent, One KB + +Each agent can link to one knowledge base via the `globalKnowledgeBaseId` field. This becomes the default source for all conversations with that agent. + +### KB Independence + +Knowledge bases exist independently of agents. You can: +- Create a KB first, then link agents later +- Reuse one KB across multiple agents +- Update KB content without touching agent configuration + +### Document Types + +Currently supported: +- **PDF files** (preferred) via the upload endpoint +- **URL scraping** (fetch and ingest page content) +- **Text content** (when the text upload endpoint is deployed, will be available soon) + +--- ## What's Next - - - Create and manage knowledge bases. - - - Add documents to your KB. - - - Best practices and troubleshooting. - - - Connect KB to your agents. + + + Create a knowledge base, upload content, and link it to an agent. diff --git a/fern/products/atoms/pages/dev/build/knowledge-base/usage.mdx b/fern/products/atoms/pages/dev/build/knowledge-base/usage.mdx index f99a0fd..f3d0c8d 100644 --- a/fern/products/atoms/pages/dev/build/knowledge-base/usage.mdx +++ b/fern/products/atoms/pages/dev/build/knowledge-base/usage.mdx @@ -1,191 +1,169 @@ --- title: "Using Knowledge Bases" sidebarTitle: "Usage" -description: "Create, populate, and connect Knowledge Bases to your agents." +description: "Create, upload content, and connect knowledge bases to your agents." --- -Knowledge Bases give your agents context from your documents. Upload PDFs, and the platform automatically retrieves relevant information during conversations. +This guide walks through the complete knowledge base workflow—from creation to agent integration. -## Complete Workflow +## Creating a Knowledge Base + +Every KB starts with a name and optional description. Use the `create` method: ```python +from smallestai.atoms.kb import KB from smallestai.atoms import AtomsClient -from smallestai.atoms.models import KnowledgebasePostRequest, CreateAgentRequest -import time +# Initialize separate managers +kb = KB() client = AtomsClient() -# 1. Create Knowledge Base -kb = client.create_knowledge_base( - knowledgebase_post_request=KnowledgebasePostRequest( - name="ProductDocs", - description="Product documentation" - ) -) -kb_id = kb.data - -# 2. Upload Documents -with open("manual.pdf", "rb") as f: - client.upload_media_to_knowledge_base(id=kb_id, media=("manual.pdf", f.read())) - -# 3. Wait for Processing -for _ in range(10): - items = client.get_knowledge_base_items(id=kb_id).data - if all(item.processing_status == "completed" for item in items): - print("Ready!") - break - time.sleep(2) - -# 4. Create Agent with KB -agent = client.create_agent( - create_agent_request=CreateAgentRequest( - name="SupportBot", - global_knowledge_base_id=kb_id, - global_prompt="Answer using the knowledge base.", - slm_model="gpt-4o" - ) +# Create KB +response = kb.create( + name="Product Documentation", + description="Technical specs and troubleshooting guides" ) -``` -## Creating a Knowledge Base +kb_id = response["data"] +``` -```python -from smallestai.atoms.models import KnowledgebasePostRequest +The response contains the KB ID you'll use for all subsequent operations: -kb = client.create_knowledge_base( - knowledgebase_post_request=KnowledgebasePostRequest( - name="CompanyKB", - description="Internal documentation" - ) -) -kb_id = kb.data +```json +{ + "status": true, + "data": "696ddd64b9f099f0679fdb41" +} ``` -## Uploading Documents +--- -### PDF Files +## Retrieving KB Details -```python -with open("manual.pdf", "rb") as f: - client.upload_media_to_knowledge_base( - id=kb_id, - media=("manual.pdf", f.read()) - ) -``` +### Single Knowledge Base -### Multiple Files +`get()` retrieves the full details of a specific KB: ```python -import os +details = kb.get(kb_id) +``` -for filename in os.listdir("./documents"): - if filename.endswith(".pdf"): - with open(f"./documents/{filename}", "rb") as f: - client.upload_media_to_knowledge_base(id=kb_id, media=(filename, f.read())) +Returns the full KB object: + +```json +{ + "status": true, + "data": { + "_id": "696ddd64b9f099f0679fdb41", + "name": "Product Documentation", + "description": "Technical specs and troubleshooting guides", + "organization": "693abd625a5f74726c0450a4", + "createdBy": "693abd625a5f74726c0450a1" + } +} ``` -## Checking Processing Status +### All Knowledge Bases -Documents are processed asynchronously. Poll until complete: +`list()` returns every KB in your organization: ```python -import time - -def wait_for_processing(client, kb_id, timeout=60): - start = time.time() - - while time.time() - start < timeout: - items = client.get_knowledge_base_items(id=kb_id).data - - if all(item.processing_status == "completed" for item in items): - return True - - time.sleep(2) - - return False +all_kbs = kb.list() + +for item in all_kbs["data"]: + print(f"{item['name']}: {item['_id']}") ``` -| Status | Meaning | -|--------|---------| -| `pending` | Queued | -| `processing` | Being indexed | -| `completed` | Ready | -| `failed` | Error | +--- -## Connecting to Agents +## Linking a KB to an Agent -### At Creation +The connection happens at agent creation time. Pass `kb_id` to the `new_agent` helper: ```python -from smallestai.atoms.models import CreateAgentRequest - -agent = client.create_agent( - create_agent_request=CreateAgentRequest( - name="SupportBot", - global_knowledge_base_id=kb_id, - global_prompt="Answer using the knowledge base.", - slm_model="gpt-4o" - ) +agent = client.new_agent( + name="Support Agent", + prompt="You are a helpful support agent. Use your knowledge base to answer product questions accurately.", + description="Agent with product KB", + kb_id=kb_id ) -``` -### Update Existing Agent +agent_id = agent.data +``` -```python -from smallestai.atoms.models import AgentIdPatchRequest +The agent now has access to all content in that knowledge base during conversations. -# Connect KB -client.update_agent( - id=agent_id, - agent_id_patch_request=AgentIdPatchRequest(global_knowledge_base_id=kb_id) -) - -# Disconnect KB -client.update_agent( - id=agent_id, - agent_id_patch_request=AgentIdPatchRequest(global_knowledge_base_id="") -) -``` +--- -## Managing Documents +## Verifying the Link -### List Items +`get_agent_by_id()` returns the agent with its `globalKnowledgeBaseId` field: ```python -items = client.get_knowledge_base_items(id=kb_id).data +agent_details = client.get_agent_by_id(id=agent_id) -for item in items: - print(f"{item.id}: {item.file_name or item.title}") +linked_kb = agent_details["data"]["globalKnowledgeBaseId"] +print(f"Agent is linked to KB: {linked_kb}") ``` -### Delete KB +The response includes the linked KB ID: + +```json +{ + "status": true, + "data": { + "_id": "696ddd6593f50590da907bcf", + "name": "Support Agent", + "globalKnowledgeBaseId": "696ddd64b9f099f0679fdb41", + "globalPrompt": "You are a helpful support agent..." + } +} +``` + +--- + +## Deleting a Knowledge Base - Disconnect the KB from agents before deleting. +A knowledge base cannot be deleted while it's connected to an agent. Delete or archive the agent first. +Use `delete_agent()` to archive the agent, then `delete()` to remove the KB: + ```python -from smallestai.atoms.models import AgentIdPatchRequest +# First, remove the agent +client.delete_agent(id=agent_id) -client.update_agent( - id=agent_id, - agent_id_patch_request=AgentIdPatchRequest(global_knowledge_base_id="") -) -client.delete_knowledge_base(id=kb_id) +# Then delete the KB +kb.delete(kb_id) ``` -## Best Practices +--- + +## SDK Reference + +| Method | Description | +|--------|-------------| +| `kb.create(name, description)` | Create a new knowledge base | +| `kb.list()` | List all KBs in your organization | +| `kb.get(id)` | Retrieve a specific KB | +| `kb.delete(id)` | Delete a KB (must be unlinked first) | + +--- + +## Tips - - Upload content relevant to your agent's purpose. Quality over quantity. + + Currently, each agent supports one knowledge base via `globalKnowledgeBaseId`. For multiple knowledge sources, combine them into a single KB before linking. + + + PDF files are fully supported. Text upload is available but may require backend deployment. Check with your administrator. - - - Always confirm `completed` status before using with an agent. + + Upload new documents to the same KB. The agent will automatically use the updated content in future conversations. - - - Make test calls to verify the agent correctly uses KB content. + + The error "This knowledge base is connected to an agent" means you must delete or archive the linked agent first. KBs with active connections cannot be removed. diff --git a/fern/products/atoms/pages/dev/build/phone-calling/call-control.mdx b/fern/products/atoms/pages/dev/build/phone-calling/call-control.mdx index 825a454..01bc49e 100644 --- a/fern/products/atoms/pages/dev/build/phone-calling/call-control.mdx +++ b/fern/products/atoms/pages/dev/build/phone-calling/call-control.mdx @@ -11,19 +11,20 @@ Control the call flow from within your agent. End calls gracefully or transfer t Use `SDKAgentEndCallEvent` to end the call: ```python -from smallestai.atoms.agent.nodes.output_agent import OutputAgentNode +from smallestai.atoms.agent.nodes import OutputAgentNode from smallestai.atoms.agent.events import SDKAgentEndCallEvent from smallestai.atoms.agent.tools import function_tool class MyAgent(OutputAgentNode): @function_tool() - def end_call(self) -> SDKAgentEndCallEvent: + async def end_call(self) -> None: """End the call gracefully.""" - return SDKAgentEndCallEvent() + await self.send_event(SDKAgentEndCallEvent()) + return None ``` - `SDKAgentEndCallEvent` has no parameters. + Tools that send SDK events must use `await self.send_event()` and return `None`. Simply returning the event object won't work. ## Transferring to Humans @@ -40,15 +41,18 @@ from smallestai.atoms.agent.tools import function_tool class MyAgent(OutputAgentNode): @function_tool() - def transfer_to_human(self) -> SDKAgentTransferConversationEvent: + async def transfer_to_human(self) -> None: """Transfer to a human agent.""" - return SDKAgentTransferConversationEvent( + await self.send_event( + SDKAgentTransferConversationEvent( transfer_call_number="+14155551234", transfer_options=TransferOption( type=TransferOptionType.COLD_TRANSFER ), on_hold_music="relaxing_sound" ) + ) + return None ``` ### Transfer Parameters @@ -62,7 +66,12 @@ class MyAgent(OutputAgentNode): ### Transfer Types ```python -from smallestai.atoms.agent.events import TransferOption, TransferOptionType +from smallestai.atoms.agent.events import ( + TransferOption, + TransferOptionType, + WarmTransferPrivateHandoffOption, + WarmTransferHandoffOptionType, +) # Cold transfer (immediate handoff) cold = TransferOption(type=TransferOptionType.COLD_TRANSFER) @@ -70,10 +79,10 @@ cold = TransferOption(type=TransferOptionType.COLD_TRANSFER) # Warm transfer (agent briefs human first) warm = TransferOption( type=TransferOptionType.WARM_TRANSFER, - private_handoff_option={ - "type": "prompt", - "prompt": "Customer calling about billing" - } + private_handoff_option=WarmTransferPrivateHandoffOption( + type=WarmTransferHandoffOptionType.PROMPT, + prompt="Customer calling about billing" + ) ) ``` diff --git a/fern/products/atoms/pages/dev/build/phone-calling/campaigns.mdx b/fern/products/atoms/pages/dev/build/phone-calling/campaigns.mdx index b618c1c..eedbc4f 100644 --- a/fern/products/atoms/pages/dev/build/phone-calling/campaigns.mdx +++ b/fern/products/atoms/pages/dev/build/phone-calling/campaigns.mdx @@ -18,173 +18,240 @@ Create → Start → Running → Pause/Complete → Delete Before creating a campaign, you need: -1. **Agent** — With a phone number linked (configured in Dashboard) -2. **Audience** — A contact list uploaded in Dashboard -3. **Phone Number** — Assigned to your agent in Dashboard → Phone Numbers +1. **Agent** — Created via SDK or Dashboard +2. **Audience** — Contact list with phone numbers +3. **Phone Number ID** — From `get_phone_numbers()` - Link a phone number to your agent in the [Dashboard](https://atoms.smallest.ai) under **Phone Numbers**. Campaigns use the phone number assigned to the agent as the caller ID. + Get your phone number ID using `client.get_phone_numbers()` — the `_id` field is what you pass to campaigns. - - Create audiences by uploading a CSV with columns: - `phoneNumber`, `Name`, `Address`, `Email`, `CustomerID` - - -## Creating a Campaign - -### Basic Campaign +## Quick Start ```python from smallestai.atoms import AtomsClient +from smallestai.atoms.audience import Audience +from smallestai.atoms.campaign import Campaign client = AtomsClient() +audience = Audience() +campaign = Campaign() + +# 1. Get phone number +phones = client.get_phone_numbers() +phone_id = phones["data"][0]["_id"] + +# 2. Create agent +agent = client.new_agent( + name="Campaign Agent", + prompt="You are a friendly assistant.", + description="Agent for outreach" +) +agent_id = agent.data + +# 3. Create audience +aud = audience.create( + name="My Contacts", + phone_numbers=["+916366821717", "+919353662554"], + names=[("John", "Doe"), ("Jane", "Smith")] +) +audience_id = aud["data"]["_id"] + +# 4. Create campaign +camp = campaign.create( + name="Outreach Campaign", + agent_id=agent_id, + audience_id=audience_id, + phone_ids=[phone_id], + description="Follow-up calls", + max_retries=2, + retry_delay=15 +) +campaign_id = camp["data"]["_id"] + +# 5. Start campaign +campaign.start(campaign_id) +print(f"Campaign started: {campaign_id}") +``` + +## Creating an Audience + +```python +from smallestai.atoms.audience import Audience -campaign = client.create_campaign( - campaign_post_request={ - "name": "January Lead Follow-up", - "description": "Follow up with leads from webinar", - "agentId": "your-agent-id", # Must have phone linked - "audienceId": "your-audience-id" - } +audience = Audience() + +# Create with phone numbers and names +result = audience.create( + name="January Leads", + phone_numbers=["+916366821717", "+919353662554"], + names=[("John", "Doe"), ("Jane", "Smith")] ) -campaign_id = campaign.data.get('_id') -print(f"Created campaign: {campaign_id}") +audience_id = result["data"]["_id"] +print(f"Created audience: {audience_id}") ``` -### With Retry and Schedule Settings +### Adding Contacts Later ```python -import requests -import os -from datetime import datetime, timedelta, timezone - -API_KEY = os.getenv("SMALLEST_API_KEY") - -# Schedule for tomorrow at 9 AM UTC -scheduled_time = datetime.now(timezone.utc).replace( - hour=9, minute=0, second=0 -) + timedelta(days=1) - -response = requests.post( - "https://atoms.smallest.ai/api/v1/campaign", - headers={ - "Authorization": f"Bearer {API_KEY}", - "Content-Type": "application/json" - }, - json={ - "name": "Summer Sale Campaign", - "description": "Promotional calls", - "agentId": "agent-with-phone", - "audienceId": "summer-leads", - "maxRetries": 3, # Retry unanswered (0-10) - "retryDelay": 15, # Minutes between retries (1-1440) - "scheduledAt": scheduled_time.isoformat() - } +# Add more contacts to existing audience +audience.add_contacts( + audience_id=audience_id, + phone_numbers=["+919876543210"], + names=[("New", "User")] +) +``` + +## Creating a Campaign + +```python +from smallestai.atoms.campaign import Campaign + +campaign = Campaign() + +result = campaign.create( + name="Follow-up Campaign", + agent_id="your-agent-id", + audience_id="your-audience-id", + phone_ids=["your-phone-id"], + description="Following up with leads", + max_retries=3, + retry_delay=15 ) -result = response.json() campaign_id = result["data"]["_id"] -print(f"Scheduled campaign: {campaign_id}") ``` ### Parameters | Parameter | Type | Required | Description | |-----------|------|----------|-------------| -| `name` | string | Yes | Campaign name (max 40 chars) | -| `description` | string | No | Notes about the campaign | -| `agentId` | string | Yes | Agent ID (must have phone linked) | -| `audienceId` | string | Yes | Contact list to dial | -| `maxRetries` | int | No | Retry unanswered (0-10, default: 3) | -| `retryDelay` | int | No | Minutes between retries (1-1440, default: 15) | -| `scheduledAt` | string | No | ISO timestamp to auto-start | +| `name` | string | Yes | Campaign name | +| `agent_id` | string | Yes | Agent ID | +| `audience_id` | string | Yes | Contact list ID | +| `phone_ids` | list | Yes | Phone number IDs (from `get_phone_numbers()`) | +| `description` | string | No | Campaign description | +| `max_retries` | int | No | Retry unanswered (0-10, default: 3) | +| `retry_delay` | int | No | Minutes between retries (default: 15) | ## Starting a Campaign ```python -client.start_campaign(id=campaign_id) -print("Campaign started") +campaign.start(campaign_id) +print("Campaign started!") ``` -Scheduled campaigns start automatically at the specified time. - -## Checking Campaign Status +## Monitoring Progress ```python -import requests -import os - -API_KEY = os.getenv("SMALLEST_API_KEY") +import time -response = requests.get( - f"https://atoms.smallest.ai/api/v1/campaign/{campaign_id}", - headers={"Authorization": f"Bearer {API_KEY}"} -) -data = response.json() +for i in range(10): + time.sleep(10) + + status = campaign.get(campaign_id) + campaign_data = status["data"]["campaign"] + metrics = status["data"]["metrics"] + + print(f"Status: {campaign_data['status']}") + print(f"Called: {metrics['contacts_called']}/{metrics['total_participants']}") + + if campaign_data["status"] in ["completed", "failed"]: + break +``` -campaign = data["data"]["campaign"] -metrics = data["data"]["metrics"] +## Stopping a Running Campaign -print(f"Status: {campaign['status']}") -print(f"Total: {metrics['total_participants']}") -print(f"Called: {metrics['contacts_called']}") -print(f"Connected: {metrics['contacts_connected']}") +```python +# Only works for running campaigns +status = campaign.get(campaign_id) +if status["data"]["campaign"]["status"] in ["running", "active"]: + campaign.stop(campaign_id) + print("Campaign stopped") +else: + print("Campaign already completed") ``` -## Pausing a Campaign +## Listing Campaigns ```python -client.pause_campaign(id=campaign_id) +campaigns = campaign.list() + +for c in campaigns["data"]["campaigns"]: + print(f"{c['name']}: {c['status']}") ``` ## Deleting a Campaign ```python -client.delete_campaign(id=campaign_id) +campaign.delete(campaign_id) ``` ## Complete Example ```python -import requests -import os import time from smallestai.atoms import AtomsClient +from smallestai.atoms.audience import Audience +from smallestai.atoms.campaign import Campaign -API_KEY = os.getenv("SMALLEST_API_KEY") -BASE_URL = "https://atoms.smallest.ai/api/v1" -headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"} - -# 1. Create -resp = requests.post(f"{BASE_URL}/campaign", headers=headers, json={ - "name": f"Outreach {int(time.time())}", - "agentId": "your-agent-id", - "audienceId": "your-audience-id", - "maxRetries": 2, - "retryDelay": 30 -}) -campaign_id = resp.json()["data"]["_id"] -print(f"Created: {campaign_id}") - -# 2. Start client = AtomsClient() -client.start_campaign(id=campaign_id) -print("Started") - -# 3. Monitor -for _ in range(10): - time.sleep(30) - resp = requests.get(f"{BASE_URL}/campaign/{campaign_id}", headers=headers) - status = resp.json()["data"]["campaign"]["status"] - print(f"Status: {status}") - if status in ["completed", "failed"]: +audience = Audience() +campaign = Campaign() + +# 1. Get phone number +phones = client.get_phone_numbers() +phone_id = phones["data"][0]["_id"] +print(f"Using phone: {phones['data'][0]['attributes']['phoneNumber']}") + +# 2. Create agent +agent = client.new_agent( + name=f"Campaign Agent {int(time.time())}", + prompt="You are a friendly survey agent. Ask about interest and get their name.", + description="Test agent" +) +agent_id = agent.data +print(f"Agent: {agent_id}") + +# 3. Create audience +aud = audience.create( + name=f"Test Audience {int(time.time())}", + phone_numbers=["+916366821717"], + names=[("Test", "User")] +) +audience_id = aud["data"]["_id"] +print(f"Audience: {audience_id}") + +# 4. Create campaign +camp = campaign.create( + name=f"Test Campaign {int(time.time())}", + agent_id=agent_id, + audience_id=audience_id, + phone_ids=[phone_id], + max_retries=1, + retry_delay=5 +) +campaign_id = camp["data"]["_id"] +print(f"Campaign: {campaign_id}") + +# 5. Start +campaign.start(campaign_id) +print("Campaign started!") + +# 6. Monitor +for i in range(6): + time.sleep(10) + status = campaign.get(campaign_id) + print(f"Status: {status['data']['campaign']['status']}") + if status['data']['campaign']['status'] in ['completed', 'failed']: break -# 4. Delete -client.delete_campaign(id=campaign_id) -print("Deleted") +# 7. Cleanup +campaign.delete(campaign_id) +audience.delete(audience_id) +client.delete_agent(id=agent_id) +print("Cleanup complete") ``` ## Campaign States @@ -202,23 +269,20 @@ print("Deleted") | Error | Solution | |-------|----------| -| "No phone numbers available" | Link a phone number to your agent in Dashboard → Phone Numbers | -| Campaign stays "draft" | Call `start_campaign()` | -| Campaign fails immediately | Check agent has valid phone and audience has contacts | +| "No phone numbers available" | Pass `phone_ids` from `get_phone_numbers()` | +| Campaign stays "draft" | Call `campaign.start(campaign_id)` | +| Campaign fails immediately | Verify agent and audience IDs are valid | ## Best Practices - - In Dashboard → Phone Numbers, link a phone to your agent before creating campaigns. - - Start with 5-10 contacts to verify before scaling. + Start with 1-2 contacts to verify before scaling. 2-3 retries with 15-30 minute delays balances persistence and respect. - - Call during 9am-8pm local time. + + Monitor with `campaign.get()` and stop if needed. diff --git a/fern/products/atoms/pages/dev/build/phone-calling/overview.mdx b/fern/products/atoms/pages/dev/build/phone-calling/overview.mdx index 73bf55f..0865bde 100644 --- a/fern/products/atoms/pages/dev/build/phone-calling/overview.mdx +++ b/fern/products/atoms/pages/dev/build/phone-calling/overview.mdx @@ -38,10 +38,10 @@ Before making calls: ## What's Next - + Make individual calls with code examples. - + End calls and transfer to humans. diff --git a/fern/products/atoms/pages/dev/build/voice-speech/voice-models/available-voices.mdx b/fern/products/atoms/pages/dev/build/voice-speech/voice-models/available-voices.mdx index 7d66c87..75a07ca 100644 --- a/fern/products/atoms/pages/dev/build/voice-speech/voice-models/available-voices.mdx +++ b/fern/products/atoms/pages/dev/build/voice-speech/voice-models/available-voices.mdx @@ -108,7 +108,7 @@ Create custom voices with ElevenLabs: 3. Use the generated voice ID in your agent - Voice cloning requires appropriate consent and permissions. See [Voice Cloning](/dev/build/voice-speech/advanced-voice/voice-cloning) for guidelines. + Voice cloning requires appropriate consent and permissions. See [Voice Cloning](/atoms/product-overview/capabilities/voice-and-speech) for guidelines. ### Request New Voices diff --git a/fern/products/atoms/pages/dev/cookbooks/examples.mdx b/fern/products/atoms/pages/dev/cookbooks/examples.mdx index 53d2d7f..d6a5336 100644 --- a/fern/products/atoms/pages/dev/cookbooks/examples.mdx +++ b/fern/products/atoms/pages/dev/cookbooks/examples.mdx @@ -1,11 +1,196 @@ --- title: "Examples" sidebarTitle: "Examples" -description: "Full end-to-end code examples." +description: "Full end-to-end code examples from the cookbook." --- -| Example | Description | Concepts | Link | -|---------|-------------|----------|------| -| **Getting Started** | SDK setup, outbound calls | AtomsClient, start_outbound_call | [View →](https://github.com/smallest-inc/cookbook/tree/main/atoms/getting_started) | -| **Agent with Tools** | Custom function tools | function_tool, OutputAgentNode | [View →](https://github.com/smallest-inc/cookbook/tree/main/atoms/agent_with_tools) | -| **Call Control** | End calls, transfer to humans | SDKAgentEndCallEvent, TransferConversationEvent | [View →](https://github.com/smallest-inc/cookbook/tree/main/atoms/call_control) | +Complete, runnable examples you can deploy in minutes. Each example includes full source code, setup instructions, and deployment guide. + + + All examples are available in the [cookbook](https://github.com/smallest-inc/cookbook) repository. + + +--- + +## Getting Started + + + + **5 min** · Build and deploy a basic conversational agent. + + `OutputAgentNode` · `OpenAIClient` · `AtomsApp` · CLI deployment + + + +--- + +## Agent Development + + + + **10 min** · Give your agent actions it can perform. + + `@function_tool` · `ToolRegistry` · Tool execution · LLM function calling + + + + **15 min** · Real-time sentiment analysis alongside your main agent. + + `BackgroundAgentNode` · Multi-node architecture · Shared state · Silent processing + + + + **10 min** · Configure how your agent handles user interruptions. + + `is_interruptible` · Dynamic settings · Barge-in handling + + + + **15 min** · Multilingual agent with language detection. + + Language detection · Dynamic prompts · Multi-language support + + + +--- + +## Calling + + + + **10 min** · End calls and transfer to human agents. + + `SDKAgentEndCallEvent` · `SDKAgentTransferConversationEvent` · Cold/warm transfers · Hold music + + + + **15 min** · Build an interactive voice response menu. + + Menu navigation · DTMF handling · Department routing + + + +--- + +## Platform Features + + + + **15 min** · Connect your agent to a knowledge base for accurate answers. + + Knowledge base setup · RAG retrieval · Document upload + + + + **20 min** · Run outbound calling campaigns at scale. + + `AtomsClient` · Audience management · Campaign creation · Batch calling + + + +--- + +## Analytics + + + + **10 min** · Retrieve call metrics, transcripts, and post-call analysis. + + Call details · Transcript export · Post-call configuration · Metrics API + + + +--- + +## End-to-End Projects + + + + **30 min** · Production-grade voice banking agent with real database queries, identity verification, and compliance logging. + + `BackgroundAgentNode` · SQL execution · Multi-round tool chaining · KBA verification · Audit logging · Call transfers + + + + **30 min** · AI leasing agent with property search, tour scheduling, maintenance requests, and lead tracking — backed by a real SQLite database. + + `BackgroundAgentNode` · SQL execution · Multi-round tool chaining · Lead tracking · Tour scheduling + + + + **20 min** · Fun voice order-taker with menu browsing, pizza customization, cart management, combo deals, and deterministic pricing — zero external deps. + + Multi-round tool chaining · In-memory state · Deterministic pricing · Upselling + + + + **20 min** · Voice-based clinic receptionist that checks real Cal.com availability, negotiates time slots, and books appointments. + + Cal.com API · Slot negotiation · `@function_tool` · Real calendar booking + + + + **20 min** · State-machine-driven voice data collection with typed validation, backtracking, and native Jotform submission. + + `FormEngine` · Step-by-step collection · Field validation · Jotform integration + + + +--- + +## Observability & Integrations + + + + **15 min** · Real-time voice agent observability with Langfuse via a BackgroundAgentNode — tool calls, LLM generations, and transcripts stream to your dashboard with zero latency impact. + + `BackgroundAgentNode` · Langfuse traces · LLM generation tracking · Tool call spans + + + + **15 min** · Error tracking and performance monitoring with Sentry via a BackgroundAgentNode — tool failures, conversation breadcrumbs, and performance metrics stream to Sentry with zero latency impact. + + `BackgroundAgentNode` · Sentry transactions · Error capture · Performance spans · Breadcrumbs + + + +--- + +## Running Examples + +Each example follows the same pattern: + +```bash +# Clone the cookbook +git clone https://github.com/smallest-inc/cookbook +cd cookbook/voice-agents/ + +# Install dependencies +pip install -e . + +# Set environment variables +export OPENAI_API_KEY="your-key" + +# Run locally +python app.py + +# Test with CLI (in another terminal) +smallestai agent chat +``` + +## Deploying Examples + +```bash +# Login to Smallest AI +smallestai auth login + +# Link to your agent +smallestai agent init + +# Deploy +smallestai agent deploy --entry-point app.py + +# Make live +smallestai agent builds +``` + diff --git a/fern/products/atoms/pages/dev/introduction/core-concepts/nodes.mdx b/fern/products/atoms/pages/dev/introduction/core-concepts/nodes.mdx index 44bed77..20cf719 100644 --- a/fern/products/atoms/pages/dev/introduction/core-concepts/nodes.mdx +++ b/fern/products/atoms/pages/dev/introduction/core-concepts/nodes.mdx @@ -15,7 +15,7 @@ In the conceptual graph, a Node is a vertex that performs three key actions: ## Abstracted Nodes -To help you get started quickly, we have abstracted two common node patterns for you. You can use these out of the box or build your own custom nodes from scratch. +To help you get started quickly, we have abstracted three common node patterns for you. You can use these out of the box or build your own custom nodes from scratch. ### 1. The Base Node (`Node`) @@ -77,6 +77,36 @@ class MyAgent(OutputAgentNode): yield chunk.content ``` +### 3. The Background Agent (`BackgroundAgentNode`) + +A silent observer node that processes events without producing audio output. + +**Key Features:** +* **Silent Processing**: Receives all events but doesn't speak. +* **Parallel Execution**: Runs alongside your main agent. +* **State Sharing**: Main agent can query its state. + +**Use Case**: Sentiment analysis, call quality monitoring, analytics, webhooks. + +```python +from smallestai.atoms.agent.nodes import BackgroundAgentNode +from smallestai.atoms.agent.events import SDKEvent, SDKAgentTranscriptUpdateEvent + +class SentimentAnalyzer(BackgroundAgentNode): + def __init__(self): + super().__init__(name="sentiment-analyzer") + self.current_sentiment = "neutral" + + async def process_event(self, event: SDKEvent): + if isinstance(event, SDKAgentTranscriptUpdateEvent): + if event.role == "user": + self.current_sentiment = await self._analyze(event.content) +``` + + + See [Background Agent](/atoms/developer-guide/build/agents/overview) for a complete guide. + + --- ## How to Write a Custom Node diff --git a/fern/products/atoms/pages/dev/introduction/core-concepts/overview.mdx b/fern/products/atoms/pages/dev/introduction/core-concepts/overview.mdx index afd27d5..6dbc3cb 100644 --- a/fern/products/atoms/pages/dev/introduction/core-concepts/overview.mdx +++ b/fern/products/atoms/pages/dev/introduction/core-concepts/overview.mdx @@ -19,16 +19,16 @@ The system manages all the complexity of real-time streaming, interruptions, and ## The Four Building Blocks - + Processing units that handle events. The brain of your agent logic. - + Messages flowing through the system. Audio, text, and control signals. - + Connect nodes into pipelines. Build complex multi-agent flows. - + Manage conversation state and lifecycle. One session per call. diff --git a/fern/products/atoms/pages/dev/introduction/mcp/manual-setup.mdx b/fern/products/atoms/pages/dev/introduction/mcp/manual-setup.mdx new file mode 100644 index 0000000..616f091 --- /dev/null +++ b/fern/products/atoms/pages/dev/introduction/mcp/manual-setup.mdx @@ -0,0 +1,155 @@ +--- +title: Manual Setup +subtitle: Step-by-step config for Cursor and Claude Desktop. +--- + + + Need an API key first? Follow the steps below to create one from the Atoms platform. + + +## Get Your API Key + + + + Click your profile in the top-left of [app.smallest.ai](https://app.smallest.ai?utm_source=documentation&utm_medium=docs) and select **Settings**. + + + Atoms platform profile dropdown showing the Settings option in the left sidebar + + + + + Click **API Keys** in the left sidebar, then click **Create API Key** in the top-right. + + + Atoms platform API Keys settings page showing existing keys and the Create API Key button + + + + + Give your key a descriptive name (e.g., `atoms-mcp-server`) and click **Create API Key**. Copy and save it — you won't see it again. + + + Create New API Key dialog with a name input field and Create API Key button + + + + +--- + +## Install + + + + Zero dependencies — no Node.js, no npm. + + + + ```bash + curl -fsSL https://raw.githubusercontent.com/smallest-inc/mcp-server/main/install.sh | bash + ``` + + + ```powershell + irm https://raw.githubusercontent.com/smallest-inc/mcp-server/main/install.ps1 | iex + ``` + + + + It prompts for your API key and writes the config for both Cursor and Claude Desktop automatically. + + Skip to [Restart your editor](#restart-your-editor) below. + + + + Requires **Node.js 18+** installed on your machine. + + + Open the MCP config file for your editor: + + + + | OS | Path | + |---|---| + | **Mac / Linux** | `~/.cursor/mcp.json` | + | **Windows** | `%USERPROFILE%\.cursor\mcp.json` | + + If the file doesn't exist, create it. + + + Open **Claude Desktop** → **Settings** (gear icon) → **Developer** → **Edit Config** + + Or open the file manually: + + | OS | Path | + |---|---| + | **Mac** | `~/Library/Application Support/Claude/claude_desktop_config.json` | + | **Windows** | `%APPDATA%\Claude\claude_desktop_config.json` | + + + + Paste this config. If you already have other MCP servers, just add the `"atoms"` entry inside `"mcpServers"`. + + ```json + { + "mcpServers": { + "atoms": { + "command": "npx", + "args": ["-y", "@developer-smallestai/atoms-mcp-server"], + "env": { + "ATOMS_API_KEY": "sk_your_api_key_here" + } + } + } + } + ``` + + **Replace** `sk_your_api_key_here` with your actual API key. + + + Don't share your API key or commit it to git. + + + + +--- + +## Restart your editor + + + + `Cmd+Shift+P` (Mac) or `Ctrl+Shift+P` (Windows) → **Developer: Reload Window** + + + Fully quit and reopen the app. + + + +--- + +## Verify + + + + Go to **Cursor Settings** → **MCP**. You should see `atoms` listed with a **green dot**. + + Open a new chat (**make sure you're in Agent mode**, not Ask mode) and type: + + ``` + List all my agents + ``` + + + Start a new conversation and type: + + ``` + List all my agents + ``` + + + +You should see your Atoms agents listed back. If you do — you're done! + +## Next steps + +Head to the **[Prompt Cookbook](/atoms/mcp/using-the-mcp/prompt-cookbook)** — it has copy-paste prompts for everything you can do. \ No newline at end of file diff --git a/fern/products/atoms/pages/dev/introduction/mcp/overview.mdx b/fern/products/atoms/pages/dev/introduction/mcp/overview.mdx new file mode 100644 index 0000000..6cd73c9 --- /dev/null +++ b/fern/products/atoms/pages/dev/introduction/mcp/overview.mdx @@ -0,0 +1,135 @@ +--- +title: Quick Start +subtitle: Talk to your Atoms voice agents, debug calls, and view analytics — all without leaving your editor. +--- + +The Atoms MCP Server connects your AI editor to the Atoms platform. No console tab-switching — just type what you want in natural language. + +--- + +## Setup + + + + Open [app.smallest.ai](https://app.smallest.ai?utm_source=documentation&utm_medium=docs), click your profile in the top-left, and go to **Settings**. + + + Atoms platform profile dropdown showing the Settings option + + + Click **API Keys** in the sidebar, then **Create API Key**. + + + Atoms platform API Keys page with the Create API Key button highlighted + + + Name your key and click **Create API Key**. Copy it — it starts with `sk_`. + + + Create New API Key dialog with name input and Create button + + + + + Open a chat in **Cursor** or **Claude Desktop** and paste one of these: + + + + Zero dependencies — no Node.js needed. + + + + ``` + Set up the Atoms MCP server for me using the curl installer. + Run: curl -fsSL https://raw.githubusercontent.com/smallest-inc/mcp-server/main/install.sh | bash + My API key is: sk_paste_your_key_here + ``` + + + ``` + Set up the Atoms MCP server for me using the PowerShell installer. + Run: irm https://raw.githubusercontent.com/smallest-inc/mcp-server/main/install.ps1 | iex + My API key is: sk_paste_your_key_here + ``` + + + + + Requires **Node.js 18+**. + + ``` + Set up the Atoms MCP server for me. + The npm package is @developer-smallestai/atoms-mcp-server. + My API key is: sk_paste_your_key_here + ``` + + + + Your assistant configures everything for you. + + + + + + `Cmd+Shift+P` (Mac) or `Ctrl+Shift+P` (Windows) → **Developer: Reload Window** + + + Fully quit and reopen the app. + + + + + + Type this in chat: + + ``` + List all my agents + ``` + + You should see your agents listed back. You're set up. + + + +Prefer to set things up yourself? See the **[Manual Setup](manual-setup)** guide. + +--- + +## What can you do? + +Just type in your editor's chat — the AI picks the right tool: + + + + ``` + Show me all failed calls from this week + ``` + ``` + What's our total call spend this month? + ``` + ``` + List all my agents + ``` + + + ``` + Debug call CALL-1234567890-abc123 + ``` + Returns the full transcript, errors, timing, costs, and post-call analytics. + + + ``` + Create a new agent called "Support Bot" + ``` + ``` + Update its prompt to: "You are a helpful support agent..." + ``` + + + ``` + Call +14155551234 using the "Sales" agent + ``` + Triggers a real outbound call — the agent follows its configured prompt. + + + +See the full list in the **[Prompt Cookbook](/atoms/mcp/using-the-mcp/prompt-cookbook)**. \ No newline at end of file diff --git a/fern/products/atoms/pages/dev/introduction/mcp/prompt-cookbook.mdx b/fern/products/atoms/pages/dev/introduction/mcp/prompt-cookbook.mdx new file mode 100644 index 0000000..61173ad --- /dev/null +++ b/fern/products/atoms/pages/dev/introduction/mcp/prompt-cookbook.mdx @@ -0,0 +1,180 @@ +--- +title: Prompt Cookbook +subtitle: Copy-paste prompts for everything the MCP can do. Just type them in your editor's chat. +--- + +No commands to memorize. Just ask in plain English — the AI picks the right tool. + +--- + +## Agents + + + + ``` + List all my agents + ``` + ``` + Show me agents with "support" in the name + ``` + + + + ``` + Create a new agent called "Customer Support" + ``` + ``` + Create an agent called "Survey Bot" with Hindi as the default language + ``` + + + + ``` + Update the prompt for agent "Support Bot" to: + "You are a helpful customer support agent for Acme Corp. + Be polite, concise, and always confirm before taking action." + ``` + + + + ``` + Change the first message of "Support Bot" to "Hi, thanks for calling Acme support! How can I help?" + ``` + ``` + Update "Support Bot" to support English and Hindi + ``` + + + + ``` + Archive the agent called "Old Test Bot" + ``` + + + +--- + +## Call logs + + + + ``` + Show me all calls from the last 24 hours + ``` + ``` + Show me calls from this week + ``` + + + + ``` + Show me failed calls from this week + ``` + ``` + Get all completed calls for agent "Support Bot" + ``` + + + + ``` + Find all calls to +14155551234 + ``` + + + + ``` + Show me all inbound calls this month + ``` + ``` + List outbound calls from the last 7 days + ``` + + + +--- + +## Debugging + + + Call IDs look like `CALL-1234567890-abc123`. You can find them in call log responses or in the Atoms Console. + + +``` +Debug call CALL-1234567890-abc123 +``` + +This returns everything about a single call: + +- **Full transcript** — complete conversation between agent and caller +- **Errors** — any errors that occurred during the call +- **Cost breakdown** — LLM, TTS, STT, telephony, and platform costs +- **Post-call analytics** — disposition metrics extracted from the conversation + +--- + +## Usage & costs + +``` +What's our total call spend this week? +``` +``` +Show usage stats for the "Sales Outreach" agent this month +``` +``` +How many calls did we make in January? +``` + +--- + +## Making calls + + + These trigger **real phone calls**. The agent will call the number and follow its configured prompt. + + +``` +Call +14155551234 using the "Sales Outreach" agent +``` +``` +Call +916366821717 using agent "Support Bot" +``` + +--- + +## Campaigns & phone numbers + + + + ``` + Show me all running campaigns + ``` + ``` + List completed campaigns for the "Survey" agent + ``` + + + ``` + List all phone numbers in my organization + ``` + + + +--- + +## Chaining actions + +You can combine multiple steps in a single prompt — the AI executes them in sequence: + +``` +Create a new agent called "Demo Bot", +set its prompt to "You are a friendly demo agent that introduces +our product in under 30 seconds", +then call +14155551234 with it +``` + +This will: +1. Create the agent +2. Update its prompt +3. Make the call + +All in one go. diff --git a/fern/products/atoms/pages/dev/introduction/mcp/tools.mdx b/fern/products/atoms/pages/dev/introduction/mcp/tools.mdx new file mode 100644 index 0000000..a00483d --- /dev/null +++ b/fern/products/atoms/pages/dev/introduction/mcp/tools.mdx @@ -0,0 +1,44 @@ +--- +title: Available Tools +subtitle: Everything the MCP server can do, under the hood. +--- + +You never call these directly — just describe what you want and the AI picks the right tool. This page is a reference. + +--- + +## Read + +| Tool | What it does | +|---|---| +| `get_call_logs` | Query call logs with filters for status, type, date range, agent name, or phone number | +| `debug_call` | Deep-dive into a single call — full transcript, errors, timing, cost breakdown, post-call analytics | +| `get_agents` | List agents with their configuration, voice, LLM model, and call stats | +| `get_usage_stats` | Usage statistics — total calls, duration, costs, status breakdown | +| `get_campaigns` | List outbound calling campaigns with status and progress | +| `get_phone_numbers` | List phone numbers owned by your organization | + +## Write + +| Tool | What it does | +|---|---| +| `create_agent` | Create a new AI voice agent | +| `update_agent_prompt` | Update an agent's system prompt / instructions | +| `update_agent_config` | Update agent settings — name, language, voice, first message, etc. | +| `delete_agent` | Archive (soft-delete) an agent | + +## Act + +| Tool | What it does | +|---|---| +| `make_call` | Initiate an outbound phone call using a specific agent — triggers a real call | + +--- + +## Resources + +The MCP server also exposes a **Platform Overview** resource that gives the AI context about Atoms terminology, call types, statuses, and cost breakdowns. This helps the AI give more accurate answers without you having to explain the domain. + +| Resource | URI | +|---|---| +| Platform Overview | `atoms://docs/platform-overview` | diff --git a/fern/products/atoms/pages/dev/introduction/mcp/troubleshooting.mdx b/fern/products/atoms/pages/dev/introduction/mcp/troubleshooting.mdx new file mode 100644 index 0000000..685eec7 --- /dev/null +++ b/fern/products/atoms/pages/dev/introduction/mcp/troubleshooting.mdx @@ -0,0 +1,113 @@ +--- +title: Troubleshooting +subtitle: Common issues and how to fix them. +--- + +## Server not showing up + + + + After editing `~/.cursor/mcp.json`, you need to reload: + + `Cmd+Shift+P` → **Developer: Reload Window** + + For Claude Desktop, fully **quit and reopen** the app. + + + + A missing comma or bracket will silently break the config. Paste your `mcp.json` into [jsonlint.com](https://jsonlint.com) to validate. + + Common mistakes: + - Trailing comma after the last entry + - Missing closing brace `}` + - Using single quotes instead of double quotes + + + + If you used the one-line installer, verify the binary is in place: + + ```bash + ls ~/.atoms/atoms-mcp + ``` + + If not, re-run the installer: + + ```bash + curl -fsSL https://raw.githubusercontent.com/smallest-inc/mcp-server/main/install.sh | bash + ``` + + + + The npm method requires **Node.js 18+**. Check with: + + ```bash + node --version + ``` + + If not installed, download from [nodejs.org](https://nodejs.org), or use the one-line installer instead (no Node.js needed). + + + +--- + +## API key errors + + + + Your `ATOMS_API_KEY` is either incorrect or has been revoked. + + + ### Go to the Atoms Console + + Open **[console.smallest.ai](https://console.smallest.ai?utm_source=documentation&utm_medium=docs)** → **API Keys** + + ### Create a new key + + Click **Create API Key** and copy it. + + ### Update your config + + Replace the old key in `~/.cursor/mcp.json` or `claude_desktop_config.json`. + + ### Reload + + Reload Cursor (`Cmd+Shift+P` → Reload Window) or restart Claude Desktop. + + + + + Your API key is valid but isn't linked to an organization. This usually means the account was just created. Ask your team admin to add you to the org. + + + +--- + +## Tools not being called + + + In Cursor, make sure you're in **Agent mode** (not Ask mode). Ask mode is read-only and won't trigger tools. + + +If the AI still doesn't pick up the tools, be more explicit: + +``` +Use the atoms MCP to list my agents +``` + +--- + +## Calls not going through + +When using `make_call`, check these: + +| Requirement | Example | +|---|---| +| Phone number in **E.164 format** | `+14155551234` (with `+` and country code) | +| Agent exists and is **not archived** | Check with `List all my agents` | +| Org has a **phone number assigned** | Check with `List all phone numbers` | + +--- + +## Still stuck? + +Reach out to the Atoms team on your internal Slack channel or email **support@smallest.ai**. diff --git a/fern/products/atoms/pages/dev/introduction/overview.mdx b/fern/products/atoms/pages/dev/introduction/overview.mdx index 359a272..cb38096 100644 --- a/fern/products/atoms/pages/dev/introduction/overview.mdx +++ b/fern/products/atoms/pages/dev/introduction/overview.mdx @@ -3,20 +3,20 @@ title: "What is Atoms SDK?" description: "Build real-time voice AI agents in Python." --- -The Atoms SDK is the Python framework for building voice agents on the [Smallest AI platform](https://smallest.ai). It handles streaming audio, conversation state, and tool coordination so you can focus on your agent's logic. +The Atoms SDK is the Python framework for building voice agents on the Smallest AI platform. It handles streaming audio, conversation state, and tool coordination so you can focus on your agent's logic. --- ## Get Started - + Build your first agent in minutes. - + Nodes, Sessions, Events. - + Full code samples. @@ -26,16 +26,16 @@ The Atoms SDK is the Python framework for building voice agents on the [Smallest ## Build - + Voice agent architecture. - + Outbound calls. - + End calls, transfers. - + Call metrics. diff --git a/fern/products/atoms/pages/dev/introduction/python-sdk.mdx b/fern/products/atoms/pages/dev/introduction/python-sdk.mdx index 6f16d24..16eb631 100644 --- a/fern/products/atoms/pages/dev/introduction/python-sdk.mdx +++ b/fern/products/atoms/pages/dev/introduction/python-sdk.mdx @@ -18,10 +18,10 @@ pip install smallestai | Component | Description | Learn More | |-----------|-------------|------------| -| **Nodes** | Processing units that handle events | [Nodes →](/dev/introduction/core-concepts/nodes) | -| **Events** | Messages flowing through the system | [Events →](/dev/introduction/core-concepts/events) | -| **Graphs** | Connect nodes into pipelines | [Graphs →](/dev/introduction/core-concepts/graphs) | -| **Sessions** | Manage conversation state and lifecycle | [Sessions →](/dev/introduction/core-concepts/sessions) | +| **Nodes** | Processing units that handle events | [Nodes →](/atoms/developer-guide/introduction/sdk/nodes) | +| **Events** | Messages flowing through the system | [Events →](/atoms/developer-guide/introduction/sdk/events) | +| **Graphs** | Connect nodes into pipelines | [Graphs →](/atoms/developer-guide/introduction/sdk/graphs) | +| **Sessions** | Manage conversation state and lifecycle | [Sessions →](/atoms/developer-guide/introduction/sdk/sessions) | --- @@ -29,7 +29,7 @@ pip install smallestai | Import | Purpose | |--------|---------| -| `smallestai.atoms.agent.nodes` | `Node`, `OutputAgentNode` base classes | +| `smallestai.atoms.agent.nodes` | `Node`, `OutputAgentNode`, `BackgroundAgentNode` base classes | | `smallestai.atoms.agent.events` | Event types (`SDKEvent`, etc.) | | `smallestai.atoms.agent.tools` | `@function_tool` decorator, registry | | `smallestai.atoms.agent.clients` | LLM clients (OpenAI-compatible) | @@ -41,10 +41,10 @@ pip install smallestai ## Next Steps - + Understand nodes, sessions, and events. - + Development and deployment tools. diff --git a/fern/products/atoms/pages/dev/introduction/quickstart.mdx b/fern/products/atoms/pages/dev/introduction/quickstart.mdx index f03f8f6..2f92ae4 100644 --- a/fern/products/atoms/pages/dev/introduction/quickstart.mdx +++ b/fern/products/atoms/pages/dev/introduction/quickstart.mdx @@ -1,9 +1,10 @@ --- -title: "Getting Started" -description: "From zero to a running AI agent." +title: "Quick Start" +sidebarTitle: "Quick Start" +description: "Install the SDK, build an agent, and run it." --- -This guide walks you through installing the SDK, writing your first intelligent agent, and running it. +Install the SDK, write your first agent, and test it — locally or deployed to the cloud. ## Prerequisites @@ -42,7 +43,11 @@ Create two files: one for the agent logic, and one to run the application. ) async def generate_response(self): - async for chunk in await self.llm.chat(self.context.messages, stream=True): + response = await self.llm.chat( + messages=self.context.messages, + stream=True + ) + async for chunk in response: if chunk.content: yield chunk.content ``` @@ -69,6 +74,18 @@ Create two files: one for the agent logic, and one to run the application. Your entry point can be named anything (`app.py`, `run.py`, etc.). When deploying, specify it with `--entry-point your_file.py`. + + + **Want a greeting?** Use `@session.on_event` to speak when the user joins: + ```python + @session.on_event("on_event_received") + async def on_event(_, event): + if isinstance(event, SDKSystemUserJoinedEvent): + agent.context.add_message({"role": "assistant", "content": "Hello!"}) + await agent.speak("Hello! How can I help?") + ``` + Adding the greeting to context ensures the LLM knows the conversation has started. + @@ -97,7 +114,7 @@ Once your files are ready, you have two options: To have Smallest AI host your agent in the cloud (for production, API access, or phone calls): - **Prerequisite:** You must first create an agent on the [Atoms platform](https://atoms.smallest.ai). The `agent init` command links your local code to that agent. + **Prerequisite:** You must first create an agent on the [Atoms platform](https://app.smallest.ai?utm_source=documentation&utm_medium=docs). The `agent init` command links your local code to that agent. @@ -128,10 +145,16 @@ Once your files are ready, you have two options: ## What's Next? - + Give your agent calculators, search, and APIs. - + Connect multiple agents for complex workflows. + +### Need Help? + + + Ask questions, share what you're building, and get help from other developers. + \ No newline at end of file diff --git a/fern/products/atoms/pages/docs.json b/fern/products/atoms/pages/docs.json index 13aa960..9077154 100644 --- a/fern/products/atoms/pages/docs.json +++ b/fern/products/atoms/pages/docs.json @@ -8,109 +8,166 @@ "dark": "#004D4D" }, "favicon": "logo/favi.svg", + "modeToggle": { + "default": "light" + }, "navigation": { "tabs": [ { - "tab": "Documentation", + "tab": "Introduction", "groups": [ { - "group": "Get Started", + "group": "Capabilities", "pages": [ - "introduction", - "quickstart" + "intro/welcome", + "intro/capabilities/agents-explained", + "intro/capabilities/knowledge-base", + "intro/capabilities/voice-speech", + "intro/capabilities/campaigns", + "intro/capabilities/telephony", + "intro/capabilities/integrations" ] }, { - "group": "Deep Dive", + "group": "Reference", + "pages": ["intro/reference/changelog", "intro/reference/support"] + } + ] + }, + { + "tab": "Atoms Platform", + "groups": [ + { + "group": "Introduction", "pages": [ + "platform/introduction/intro", + "platform/introduction/quick-start", + "platform/introduction/quick-ref" + ] + }, + { + "group": "Single Prompt Agents", + "pages": [ + "platform/single-prompt/overview", { - "group": "Workflow", + "group": "Creating Your Agent", "pages": [ - "deep-dive/workflow/what-is-a-workflow", - "deep-dive/workflow/what-is-a-node", - "deep-dive/workflow/what-is-a-branch", - "deep-dive/workflow/what-are-variables", - { - "group": "Types of Nodes", - "pages": [ - "deep-dive/workflow/types-of-nodes/default", - "deep-dive/workflow/types-of-nodes/api-call", - "deep-dive/workflow/types-of-nodes/transfer-call", - "deep-dive/workflow/types-of-nodes/end-call", - "deep-dive/workflow/types-of-nodes/pre-call-api", - "deep-dive/workflow/types-of-nodes/post-call-api" - ] - }, - "deep-dive/workflow/best-practices-for-prompting", - "deep-dive/workflow/workflow-vs-single-prompt" + "platform/single-prompt/manual-setup", + "platform/single-prompt/from-template", + "platform/single-prompt/ai-assisted" ] }, { - "group": "Single Prompt", + "group": "Prompt Section", "pages": [ - "deep-dive/single-prompt/overview", - "deep-dive/single-prompt/tool-calls", - "deep-dive/single-prompt/prompting-best-practices" + "platform/single-prompt/writing-prompts", + "platform/single-prompt/config/model-selection", + "platform/single-prompt/config/voice-selection", + "platform/single-prompt/config/language-selection" ] }, { - "group": "Voice Config", + "group": "Configuration Panel", "pages": [ - "deep-dive/voice-config/types-of-voice-models", - "deep-dive/voice-config/understanding-voice-parameters", - "deep-dive/voice-config/pronunciation-dictionaries", - "deep-dive/voicemail-detection/voicemail-detection" + "platform/single-prompt/config/end-call", + "platform/single-prompt/config/transfer-call", + "platform/single-prompt/config/knowledge-base", + "platform/single-prompt/config/variables", + "platform/single-prompt/config/api-calls" ] }, { - "group": "LLM Config", + "group": "Agent Settings", "pages": [ - "deep-dive/llm-config/llm-config" + "platform/single-prompt/config/voice-settings", + "platform/single-prompt/config/model-settings", + "platform/single-prompt/config/phone-number", + "platform/single-prompt/config/webhooks", + "platform/single-prompt/config/general-settings" ] - }, + } + ] + }, + { + "group": "Conversational Flow Agents", + "pages": [ + "platform/convo-flow/overview", { - "group": "Call Logs", + "group": "Creating Your Agent", "pages": [ - "deep-dive/call-logs/what-is-a-call-log" + "platform/convo-flow/manual-setup", + "platform/convo-flow/from-template" ] }, { - "group": "Phone Number", + "group": "Workflow Tab", "pages": [ - "deep-dive/phone-number/phone-number", - "deep-dive/phone-number/sip-trunking", - { - "group": "Provider Setup Guides", - "pages": [ - "deep-dive/phone-number/twilio-sip-setup", - "deep-dive/phone-number/telnyx-sip-setup", - "deep-dive/phone-number/vonage-sip-setup" - ] - } + "platform/convo-flow/workflow-builder", + "platform/convo-flow/node-types", + "platform/convo-flow/conditions", + "platform/convo-flow/config/variables" ] }, - "deep-dive/audience/audience", - "deep-dive/campaign/campaign", - "deep-dive/webhooks/webhooks", - "deep-dive/widget/widget", - "deep-dive/realtime-events/events" + { + "group": "Agent Settings", + "pages": [ + "platform/convo-flow/config/languages", + "platform/convo-flow/config/voice-settings", + "platform/convo-flow/config/model-settings", + "platform/convo-flow/config/phone-number", + "platform/convo-flow/config/webhooks", + "platform/convo-flow/config/general-settings" + ] + } ] - } - ] - }, - { - "tab": "Usecases", - "groups": [ + }, + { + "group": "Features", + "pages": [ + "platform/features/knowledge-base", + "platform/features/webhooks", + "platform/features/widget", + "platform/features/integrations", + "platform/features/post-call-metrics", + "platform/features/variables-reference", + "platform/features/api-calls-reference" + ] + }, + { + "group": "Deployment", + "pages": [ + "platform/deployment/phone-numbers", + "platform/deployment/audiences", + "platform/deployment/campaigns" + ] + }, { - "group": "Top Industries", + "group": "Analytics & Logs", "pages": [ - "usecases/banking", - "usecases/e-commerce", - "usecases/logistics", - "usecases/healthcare", - "usecases/recruitment", - "usecases/small-business", - "usecases/startups" + "platform/analytics/overview", + "platform/analytics/testing", + "platform/analytics/conversation-logs", + "platform/analytics/locking" + ] + }, + { + "group": "Cookbooks", + "pages": ["platform/cookbooks/using-cookbooks"] + }, + { + "group": "Troubleshooting", + "pages": [ + { + "group": "Common Issues", + "pages": [ + "platform/troubleshooting/common-issues/agent-issues", + "platform/troubleshooting/common-issues/call-quality", + "platform/troubleshooting/common-issues/configuration" + ] + }, + "platform/troubleshooting/faq", + "platform/troubleshooting/glossary", + "platform/troubleshooting/getting-help" ] } ] @@ -180,6 +237,13 @@ } ] }, + { + "group": "Knowledge Base", + "pages": [ + "dev/build/knowledge-base/overview", + "dev/build/knowledge-base/usage" + ] + }, { "group": "Calling", "pages": [ @@ -188,71 +252,36 @@ "dev/build/phone-calling/call-control" ] }, + { + "group": "Campaigns", + "pages": [ + "dev/build/campaigns/overview", + "dev/build/campaigns/managing-audiences", + "dev/build/campaigns/creating-campaigns" + ] + }, { "group": "Analytics", "pages": [ "dev/build/analytics/overview", "dev/build/analytics/call-metrics", - "dev/build/analytics/agent-performance" + "dev/build/analytics/post-call-analytics" ] } ] }, { "group": "Examples", - "pages": [ - "dev/cookbooks/examples" - ] + "pages": ["dev/cookbooks/examples"] } ] }, { - "tab": "API Reference V1", + "tab": "API Reference", "href": "docs/api-reference/", "openapi": "/api-reference/openapi.yaml" - }, - { - "tab": "Client Libraries", - "groups": [ - { - "group": "Client Libraries", - "pages": [ - "client-libraries/overview" - ] - } - ] - }, - { - "tab": "FAQs", - "groups": [ - { - "group": "Frequently Asked Questions", - "pages": [ - "faqs/frequently-asked-questions" - ] - } - ] } - ], - "global": { - "anchors": [ - { - "anchor": "Documentation", - "href": "https://atoms-docs.smallest.ai/", - "icon": "book-open-cover" - }, - { - "anchor": "Community", - "href": "https://smallest.ai/discord", - "icon": "discord" - }, - { - "anchor": "Blog", - "href": "https://smallest.ai/blog", - "icon": "newspaper" - } - ] - } + ] }, "logo": { "light": "/logo/dark.png", @@ -279,4 +308,4 @@ "youtube": "https://www.youtube.com/@smallest_ai" } } -} \ No newline at end of file +} diff --git a/fern/products/atoms/pages/faqs/frequently-asked-questions.mdx b/fern/products/atoms/pages/faqs/frequently-asked-questions.mdx index 2f26d0f..42849c2 100644 --- a/fern/products/atoms/pages/faqs/frequently-asked-questions.mdx +++ b/fern/products/atoms/pages/faqs/frequently-asked-questions.mdx @@ -5,24 +5,27 @@ icon: circle-question --- ### 1. How many languages do you support? -We primarily support English, Hindi for enterprises with extremely robust controls on quality. + +We primarily support English, Hindi for enterprises with extremely robust controls on quality. Apart from that, we also have support for 30+ languages such as French, Spanish, German, Italian in Europe; Marathi, Tamil, Telegu, Kannada, Gujarati, Malayalam in India; Arabic for the middle-east and Chinese, Japanese, Korean in Asia. However, the accuracy for these languages is limited at the moment. ### 2. Can we integrate our systems with Atoms? -Yes, atoms has a very robust API that can be integrated into any software. You can get the full list of APIs [here](/api-reference). + +Yes, atoms has a very robust API that can be integrated into any software. You can get the full list of APIs [here](/atoms/api-reference). ### 3. What integrations are supported for Atoms? + Almost every single software can be integrated in an Atom - before, during and after a call is complete. -We suggest using tools like [Zapier](https://zapier.com/), [Make.com](https://www.make.com/), [N8N](https://n8n.io/) etc. that provide easy integrations support to +We suggest using tools like [Zapier](https://zapier.com/), [Make.com](https://www.make.com/), [N8N](https://n8n.io/) etc. that provide easy integrations support to 1000s of third party softwares. -To get the full list of integrations, please check [here](/developer-guide/integrations). +To get the full list of integrations, please check the [API Reference](/atoms/api-reference). -Apart from the ones mentioned above, you can also use the [API](/api-reference) to integrate Atoms into any software. +Apart from the ones mentioned above, you can also use the [API](/atoms/api-reference) to integrate Atoms into any software. ### 3. Do you provide developer support for integration? @@ -34,21 +37,21 @@ In case your account size is smaller, feel free to reach out to our development You can expect a sub 1 second latency unless you are doing actions in your system that are taking longer to return. -For example, if you are doing a long running data base call, then the latency might increase. However, there are tricks to +For example, if you are doing a long running data base call, then the latency might increase. However, there are tricks to overcome this by adding filler words in between as shown [here](deep-dive/filler-words) ### 5. Can I use Atoms over chat? -Yes, you can use atoms over chat as well. Please refer to the [API documentation](/api-reference) to know more. +Yes, you can use atoms over chat as well. Please refer to the [API documentation](/atoms/api-reference) to know more. ### 6. How much does it cost? The cost depends on the volume of usage. It starts at 0.08 USD/min and goes 2-3x lower as you scale in volumes. -You can get the detailed pricing [here](https://smallest.ai/pricing) and it is also explained in depth [here](/pricing/introduction]) +You can get the detailed pricing [here](https://smallest.ai/pricing) and it is also explained in depth [here](/atoms/product-overview/capabilities/welcome) ### 7. Do you provide on-premise deployments? Yes we provide on-premise deployments both in private clouds and on physical hardware for annual account sizes larger than 100,000 USD. -Please reach out to [support@smallest.ai](mailto:support@smallest.ai) if this fits your profile. \ No newline at end of file +Please reach out to [support@smallest.ai](mailto:support@smallest.ai) if this fits your profile. diff --git a/fern/products/atoms/pages/images/api-keys-page.png b/fern/products/atoms/pages/images/api-keys-page.png new file mode 100644 index 0000000..d7ffbcb Binary files /dev/null and b/fern/products/atoms/pages/images/api-keys-page.png differ diff --git a/fern/products/atoms/pages/images/app.smallest.ai_sign_up_atoms-platform.png b/fern/products/atoms/pages/images/app.smallest.ai_sign_up_atoms-platform.png new file mode 100644 index 0000000..897301e Binary files /dev/null and b/fern/products/atoms/pages/images/app.smallest.ai_sign_up_atoms-platform.png differ diff --git a/fern/products/atoms/pages/images/app.smallest.ai_welcome_screen_atoms-platform.png b/fern/products/atoms/pages/images/app.smallest.ai_welcome_screen_atoms-platform.png new file mode 100644 index 0000000..33608e0 Binary files /dev/null and b/fern/products/atoms/pages/images/app.smallest.ai_welcome_screen_atoms-platform.png differ diff --git a/fern/products/atoms/pages/images/create-api-key-dialog.png b/fern/products/atoms/pages/images/create-api-key-dialog.png new file mode 100644 index 0000000..b5f77c8 Binary files /dev/null and b/fern/products/atoms/pages/images/create-api-key-dialog.png differ diff --git a/fern/products/atoms/pages/images/platform-settings-menu.png b/fern/products/atoms/pages/images/platform-settings-menu.png new file mode 100644 index 0000000..3ffcdc4 Binary files /dev/null and b/fern/products/atoms/pages/images/platform-settings-menu.png differ diff --git a/fern/products/atoms/pages/images/test-agent-web-call.png b/fern/products/atoms/pages/images/test-agent-web-call.png new file mode 100644 index 0000000..6e26d6d Binary files /dev/null and b/fern/products/atoms/pages/images/test-agent-web-call.png differ diff --git a/fern/products/atoms/pages/images/testing-voice-agent.png b/fern/products/atoms/pages/images/testing-voice-agent.png new file mode 100644 index 0000000..560f3c8 Binary files /dev/null and b/fern/products/atoms/pages/images/testing-voice-agent.png differ diff --git a/fern/products/atoms/pages/intro/admin/account-team.mdx b/fern/products/atoms/pages/intro/admin/account-team.mdx new file mode 100644 index 0000000..4700e3b --- /dev/null +++ b/fern/products/atoms/pages/intro/admin/account-team.mdx @@ -0,0 +1,51 @@ +--- +title: "Settings & Team" +description: "Manage your profile, organization, and team members" +--- + +**Location:** [app.smallest.ai](https://app.smallest.ai/dashboard/settings?utm_source=documentation&utm_medium=docs) → Settings + + + ![Settings](/intro/admin/images/settings.png) + + +--- + +## Profile + +The Settings page shows your account details: + +| Field | Description | +|-------|-------------| +| **User Name** | Your display name | +| **Organization ID** | Unique identifier for your organization (click to copy) | + +--- + +## Organization Members + +Manage who has access to your organization. You can invite new members and see the status of existing ones. + +### Inviting a Member + +1. Go to [app.smallest.ai](https://app.smallest.ai/dashboard/settings?utm_source=documentation&utm_medium=docs) → **Settings** +2. Click **+ Invite Member** +3. Enter the team member's email address +4. The invitee will receive an email to join your organization + +### Member Details + +The members table shows: + +| Column | Description | +|--------|-------------| +| **Email** | The member's email address | +| **Status** | Active or **Pending** (invitation sent but not yet accepted) | +| **Role** | **Admin** or **Member** | + +### Roles + +| Role | Access | +|------|--------| +| **Admin** | Full access — manage agents, campaigns, settings, billing, and team members | +| **Member** | Standard access — work with agents and campaigns, no team or billing management | diff --git a/fern/products/atoms/pages/intro/admin/api-keys.mdx b/fern/products/atoms/pages/intro/admin/api-keys.mdx new file mode 100644 index 0000000..ac26124 --- /dev/null +++ b/fern/products/atoms/pages/intro/admin/api-keys.mdx @@ -0,0 +1,86 @@ +--- +title: "API Keys" +description: "Create and manage API keys for programmatic access" +--- + +**Location:** [app.smallest.ai](https://app.smallest.ai/dashboard/settings?utm_source=documentation&utm_medium=docs) → API Keys + + + ![API Keys](/intro/admin/images/api-keys.png) + + +--- + +## Creating an API Key + +1. Go to [app.smallest.ai](https://app.smallest.ai/dashboard/settings?utm_source=documentation&utm_medium=docs) → Settings → **API Keys** +2. Click **Create API Key** +3. Give it a name (e.g. `production`, `staging`, `dev-team`) +4. **Copy immediately** — the key is only shown once + + + Your API key provides access to your account. Never share it publicly, commit it to version control, or expose it in client-side code. + + +--- + +## Managing Keys + +The API Keys page shows all your keys with their name and creation date. + +| Action | How | +|--------|-----| +| **Search** | Use the search bar to find a key by name | +| **Copy** | Click the copy icon next to any key | +| **Delete** | Click the delete icon to revoke a key | + +Create separate keys for different environments (production, staging, development) so you can rotate or revoke them independently. + +--- + +## Using Your API Key + +### Environment Variable (Recommended) + +```bash +export SMALLEST_API_KEY="your-api-key-here" +``` + +### In API Requests + +Include the key in the `Authorization` header as a Bearer token: + +```bash +curl -X GET "https://api.smallest.ai/atoms/v1/agent" \ + -H "Authorization: Bearer YOUR_API_KEY" +``` + +### In Code + + +```python Python +from smallestai.atoms import AtomsClient + +# Automatically reads SMALLEST_API_KEY from environment +client = AtomsClient() + +# Or pass explicitly +client = AtomsClient(api_key="your-api-key") +``` + +```javascript Node.js +import { AtomsClient } from '@smallest/atoms'; + +// Automatically reads SMALLEST_API_KEY from environment +const client = new AtomsClient(); +``` + + +--- + +## Best Practices + +- **Use environment variables** — never hardcode keys in source code +- **One key per environment** — separate keys for dev, staging, production +- **Rotate regularly** — rotate keys periodically and after any team changes +- **Revoke unused keys** — delete keys that are no longer needed diff --git a/fern/products/atoms/pages/intro/admin/billing.mdx b/fern/products/atoms/pages/intro/admin/billing.mdx new file mode 100644 index 0000000..48d9f08 --- /dev/null +++ b/fern/products/atoms/pages/intro/admin/billing.mdx @@ -0,0 +1,110 @@ +--- +title: "Subscription & Plans" +description: "Manage your subscription, compare plans, and track credits" +--- + +**Location:** [app.smallest.ai](https://app.smallest.ai/dashboard/settings?utm_source=documentation&utm_medium=docs) → Subscription + + + ![Subscription](/intro/admin/images/subscription.png) + + +--- + +## Manage Subscription + +The Subscription page shows your current plan details at a glance: + +| Field | Description | +|-------|-------------| +| **Current Plan & Cost** | Your active plan and billing cycle | +| **Billing Date** | Next billing date | +| **Credits Remaining** | Remaining credits out of your plan's total | + +Click **Manage Subscription** to upgrade, downgrade, or update payment details. + +--- + +## Plans + +Plans are available on **Monthly** or **Annual** billing. All plans include access to the no-code Agent Builder and Agentic Graph Builder. + +### Overview + +| | Pay As You Go | Personal | Business | Enterprise | +|---|---|---|---|---| +| **Price** | Usage Based | $49/month | $1,999/month | Custom | +| **Best For** | Pay only for what you use. No monthly commitment. | Individual developers or early-stage teams building with voice AI | Growing teams deploying voice AI across products or workflows | High-volume, regulated, or OEM deployments requiring full control | + +--- + +### AI Agents + +| Feature | Pay As You Go | Personal | Business | Enterprise | +|---|---|---|---|---| +| No. of AI Agents | 5 | 5 | 20 | Unlimited | +| Agentic Graph Builder / No-code Agent Builder | ✅ | ✅ | ✅ | ✅ | +| Prompt Engineering Support | — | — | 1 agent free setup | Custom | +| Extra Custom Agent Setup | — | — | $2,000 per agent | Custom support | +| Voice Cloned Calls | — | — | — | Custom | +| Analytics (Coming soon) | — | — | — | Custom support | + +--- + +### Calling & Campaigns + +| Feature | Pay As You Go | Personal | Business | Enterprise | +|---|---|---|---|---| +| Cost/min (India) | ~$0.09 | ~$0.09 | ~$0.07 | Custom | +| Cost/min (US) | ~$0.15 | ~$0.15 | ~$0.12 | Custom | +| Campaigns | 60 | 60 | 1,000 | Custom | +| Parallel Calls | 1 | 1 | 10 | Custom | +| Add-On Phone Numbers | — | $10/number/month | $10/number/month | Custom | +| Phone Number Rotation | — | — | — | Contact Sales | + +--- + +### Our Models + +| Feature | Pay As You Go | Personal | Business | Enterprise | +|---|---|---|---|---| +| Speech Model Access | All models | All models | All models | All models | +| Lightning / 10K chars (USD) | ~$0.08 | ~$0.08 | ~$0.05 | Custom | +| Lightning V2 / 10K chars (USD) | ~$0.20 | ~$0.20 | ~$0.10 | Custom | +| No. of Voice Clones | 5 | 5 | 15 | Unlimited | +| Professional Voice Clone Support | — | — | — | Unlimited | +| Custom Model Support | — | — | — | ✅ | +| Concurrency on WebSocket | 3 requests | 3 requests | 15 requests | Custom | +| RPM for TTS APIs | 100 | 100 | 1,000 | Custom | + +--- + +### Support & Security + +| Feature | Pay As You Go | Personal | Business | Enterprise | +|---|---|---|---|---| +| API Access | Limited | Limited | Full access | Full access | +| CRM Integrations | — | — | ✅ | ✅ | +| On-premise Deployment | — | — | — | ✅ | +| Compliance (HIPAA, SOC2, etc.) | — | — | — | ✅ | +| SIP Server Access | — | — | — | ✅ | +| KYC Requirement | — | — | — | ✅ | +| SLAs & Support | Email support | Email support | Slack + Priority Support | Custom | + +--- + +### Tools / Products + +| Feature | Pay As You Go | Personal | Business | Enterprise | +|---|---|---|---|---| +| TTS Studio | ✅ | ✅ | ✅ | ✅ | +| AI Script Assistant | ✅ | ✅ | ✅ | ✅ | +| No. of TTS Projects | 10 | 10 | 500 | Custom | + +--- + +## Questions? + + + Custom pricing for enterprise or high-volume needs + diff --git a/fern/products/atoms/pages/intro/admin/data-privacy.mdx b/fern/products/atoms/pages/intro/admin/data-privacy.mdx new file mode 100644 index 0000000..9e476a4 --- /dev/null +++ b/fern/products/atoms/pages/intro/admin/data-privacy.mdx @@ -0,0 +1,147 @@ +--- +title: "Data & Privacy" +description: "How we handle your data and protect privacy" +--- + +# Data & Privacy + +Understand how Atoms handles data, what we collect, and how we protect privacy. + +--- + +## Data We Collect + +| Data Type | Purpose | Retention | +|-----------|---------|-----------| +| **Account info** | User management | Account lifetime | +| **Call metadata** | Analytics, billing | 90 days default | +| **Call recordings** | Quality, compliance | Configurable | +| **Transcripts** | Analytics, training | Configurable | +| **Knowledge base** | Agent intelligence | Until deleted | + +--- + +## Data Ownership + +**Your data belongs to you.** + +- You own all content you upload +- You own all call recordings and transcripts +- We don't use your data to train AI models (unless you opt in) +- You can export or delete your data at any time + +--- + +## Data Retention + +### Default Retention + +| Data | Default Retention | +|------|-------------------| +| Call metadata | 90 days | +| Call recordings | 30 days | +| Transcripts | 90 days | +| Analytics | 1 year | + +### Custom Retention + +Enterprise customers can configure: +- Extended retention (up to 7 years) +- Reduced retention (for privacy compliance) +- Automatic deletion rules + +--- + +## Data Export + +Export your data at any time: + +1. Go to **Settings** → **Data Management** +2. Click **Export Data** +3. Select what to export (agents, calls, contacts) +4. Download as JSON or CSV + +--- + +## Data Deletion + +### Delete Specific Data +- Delete individual call recordings +- Remove contacts from campaigns +- Clear knowledge base content + +### Delete Everything +Request full account deletion: +1. Contact support@smallest.ai +2. We'll verify your identity +3. All data deleted within 30 days + +--- + +## GDPR Compliance + +For EU users and data subjects: + +| Right | How to Exercise | +|-------|-----------------| +| **Access** | Export your data via dashboard | +| **Rectification** | Edit your data in dashboard | +| **Erasure** | Delete data or request full deletion | +| **Portability** | Export in standard formats | +| **Objection** | Contact privacy@smallest.ai | + +**Data Processing Agreement (DPA)** available for Enterprise customers. + +--- + +## HIPAA Compliance + +For healthcare data: + +- **BAA required** — Sign a Business Associate Agreement +- **PHI handling** — Special protections for patient data +- **Audit logging** — Enhanced logging for compliance + + + Contact us to sign a Business Associate Agreement + + +--- + +## PII Handling + +### Automatic Protection +- Phone numbers masked in debug logs +- PII detection in transcripts +- Secure credential storage + +### Configurable +- Enable/disable call recording +- Redact sensitive info from transcripts +- Set data retention policies + +--- + +## Third-Party Processors + +We use trusted partners for infrastructure: + +| Partner | Purpose | Compliance | +|---------|---------|------------| +| **AWS** | Cloud infrastructure | SOC 2, ISO 27001 | +| **Google Cloud** | AI/ML services | SOC 2, ISO 27001 | +| **Stripe** | Payment processing | PCI DSS Level 1 | + +Full list of sub-processors available on request. + +--- + +## Contact + +For privacy questions or data requests: +- **Email:** privacy@smallest.ai +- **Address:** [Company address] + + + Read our full Privacy Policy + diff --git a/fern/products/atoms/pages/intro/admin/security.mdx b/fern/products/atoms/pages/intro/admin/security.mdx new file mode 100644 index 0000000..99862df --- /dev/null +++ b/fern/products/atoms/pages/intro/admin/security.mdx @@ -0,0 +1,115 @@ +--- +title: "Security Overview" +description: "How Atoms protects your data and calls" +--- + +# Security Overview + +Atoms is built with enterprise security in mind. Here's how we protect your data, calls, and infrastructure. + +--- + +## Infrastructure Security + +| Layer | Protection | +|-------|------------| +| **Network** | All traffic encrypted via TLS 1.3 | +| **Data at rest** | AES-256 encryption | +| **Infrastructure** | SOC 2 Type II compliant cloud | +| **Access** | Role-based access control (RBAC) | + +--- + +## Data Protection + +### Encryption + +- **In transit** — TLS 1.3 for all API and voice traffic +- **At rest** — AES-256 for stored data +- **Call recordings** — Encrypted with customer-specific keys + +### Data Residency + +Choose where your data is stored: +- 🇺🇸 United States +- 🇪🇺 European Union +- 🇦🇺 Australia +- More regions available for Enterprise + +--- + +## Access Control + +### Authentication +- **API Keys** — Secure token-based access +- **SSO/SAML** — Enterprise single sign-on +- **MFA** — Multi-factor authentication supported + +### Authorization +- **Role-based** — Owner, Admin, Editor, Viewer roles +- **Workspace isolation** — Data separated between workspaces +- **Audit logs** — Track all user actions + +--- + +## Compliance + +| Standard | Status | +|----------|--------| +| **SOC 2 Type II** | ✅ Certified | +| **GDPR** | ✅ Compliant | +| **HIPAA** | ✅ Available (Enterprise) | +| **PCI DSS** | ✅ Level 1 | +| **ISO 27001** | 🔄 In progress | + +Request our SOC 2 report or sign a BAA by contacting security@smallest.ai + +--- + +## Call Security + +### Voice Traffic +- End-to-end encryption for voice data +- No call content stored unless recording is enabled +- Recordings encrypted and access-controlled + +### PII Handling +- Phone numbers masked in logs by default +- PII redaction available for transcripts +- Configurable data retention policies + +--- + +## Penetration Testing + +- Annual third-party penetration tests +- Continuous vulnerability scanning +- Bug bounty program (coming soon) + +--- + +## Incident Response + +| SLA | Response Time | +|-----|---------------| +| Critical (P1) | < 1 hour | +| High (P2) | < 4 hours | +| Medium (P3) | < 24 hours | +| Low (P4) | < 72 hours | + +Report security issues to: **security@smallest.ai** + +--- + +## Enterprise Security Features + +Available on Enterprise plans: +- Dedicated infrastructure +- Custom data retention policies +- Private cloud deployment +- Advanced audit logging +- Custom SSO/SCIM + + + Discuss enterprise security requirements + diff --git a/fern/products/atoms/pages/intro/capabilities/agents-explained.mdx b/fern/products/atoms/pages/intro/capabilities/agents-explained.mdx new file mode 100644 index 0000000..5a2eba9 --- /dev/null +++ b/fern/products/atoms/pages/intro/capabilities/agents-explained.mdx @@ -0,0 +1,76 @@ +--- +title: "Voice AI Agents" +sidebarTitle: "Agents" +icon: "robot" +description: "AI-powered voice assistants that handle real conversations" +--- + +An agent is an AI that talks on the phone. It understands what callers say, reasons through the conversation, takes actions, and responds naturally — all in real-time. + +--- + +## What Agents Can Do + +| Capability | Description | +|------------|-------------| +| **Answer questions** | Pull from knowledge bases, policies, and FAQs | +| **Take actions** | Book appointments, update records, process payments | +| **Route calls** | Transfer to humans, escalate issues, route to departments | +| **Collect data** | Qualify leads, gather feedback, complete intake forms | + +--- + +## Two Ways to Build + + + + **One prompt, infinite flexibility.** You define personality and rules — the AI handles the flow. Best for support, FAQs, open-ended conversations. + + + Deprecated + + **Visual workflow builder.** You design the exact path — nodes, branches, conditions. Best for qualification, booking, structured processes. + + + +| Aspect | Single Prompt | Conversational Flow | +|--------|---------------|---------------------| +| **Setup** | Fast — write one prompt | Longer — design the flow | +| **Flexibility** | High — handles unexpected topics | Structured — follows your design | +| **Control** | AI decides the path | You dictate the path | +| **Best for** | Support, FAQs, advisory | Booking, intake, qualification | + +--- + +## How It Works + +1. **Caller speaks** → Real-time speech-to-text +2. **AI processes** → Understands intent, retrieves context +3. **Takes action** → Calls APIs, queries knowledge base +4. **Responds** → Natural text-to-speech, sub-second latency + +The entire turn happens in under a second. Callers experience natural, human-like conversation. + +--- + +## Use Cases + +- **Customer Support** — Handle FAQs, troubleshoot, escalate to humans +- **Sales** — Qualify leads, book demos, follow up on interest +- **Scheduling** — Book, reschedule, confirm appointments +- **Collections** — Friendly payment reminders +- **Surveys** — Gather feedback through natural conversation +- **Healthcare** — Patient intake, reminders, triage + +--- + +## Get Started + + + + Build agents visually — no code required + + + Full SDK access for custom implementations + + diff --git a/fern/products/atoms/pages/intro/capabilities/campaigns.mdx b/fern/products/atoms/pages/intro/capabilities/campaigns.mdx new file mode 100644 index 0000000..49a0a67 --- /dev/null +++ b/fern/products/atoms/pages/intro/capabilities/campaigns.mdx @@ -0,0 +1,70 @@ +--- +title: "Campaigns" +sidebarTitle: "Campaigns" +icon: "megaphone" +description: "Outbound calling at scale" +--- + +Campaigns let you make thousands of calls automatically. Upload contacts, assign an agent, schedule the timing — Atoms handles the rest. + +--- + +## Use Cases + +| Campaign Type | Example | +|---------------|---------| +| **Lead outreach** | Initial contact with new leads | +| **Appointment reminders** | Confirm upcoming bookings | +| **Payment reminders** | Friendly collection calls | +| **Surveys** | Gather feedback at scale | +| **Re-engagement** | Reach dormant customers | +| **Event invitations** | Promote webinars, events | + +--- + +## How It Works + +1. **Upload contacts** — CSV with phone numbers and personalization data +2. **Select agent** — Choose which agent handles the calls +3. **Configure** — Set calling hours, timezone, retry rules +4. **Launch** — Start the campaign +5. **Monitor** — Real-time dashboard shows progress + +--- + +## Features + +| Feature | Description | +|---------|-------------| +| **Scheduling** | Business hours only, timezone-aware | +| **Rate limiting** | Control calls per minute | +| **Retry logic** | Automatic retries for no-answers | +| **DNC compliance** | Do Not Call list handling | +| **Personalization** | Use contact data in conversations | +| **Analytics** | Success rates, durations, outcomes | + +--- + +## Personalization + +Pass contact data to your agent: + +```csv +phone,first_name,appointment_date ++12025551234,John,January 15th +``` + +Agent says: *"Hi John, I'm calling to confirm your appointment on January 15th..."* + +--- + +## Get Started + + + + Launch campaigns from the dashboard + + + Programmatic campaign management + + diff --git a/fern/products/atoms/pages/intro/capabilities/integrations.mdx b/fern/products/atoms/pages/intro/capabilities/integrations.mdx new file mode 100644 index 0000000..9058823 --- /dev/null +++ b/fern/products/atoms/pages/intro/capabilities/integrations.mdx @@ -0,0 +1,70 @@ +--- +title: "Integrations" +sidebarTitle: "Integrations" +icon: "plug" +description: "Connect Atoms to your existing tools" +--- + +Atoms connects to your existing systems — CRMs, calendars, databases, custom APIs. Your agent reads data, takes actions, and keeps everything in sync. + +--- + +## Integration Types + +| Type | Examples | +|------|----------| +| **CRM** | Salesforce, HubSpot, Zoho — log calls, update records | +| **Calendar** | Google Calendar, Outlook — book appointments | +| **Payments** | Stripe, Square — process transactions | +| **Messaging** | Twilio SMS, Slack — send notifications | +| **Custom APIs** | Any REST endpoint | +| **Webhooks** | Push events to your systems | + +--- + +## Pre-built Integrations + +| Integration | What Your Agent Can Do | +|-------------|------------------------| +| **Salesforce** | Create leads, log calls, update contacts | +| **HubSpot** | Create contacts, log activities, update deals | +| **Google Calendar** | Check availability, book, reschedule, cancel | +| **Outlook** | Check availability, book, reschedule, cancel | +| **Stripe** | Create payment links, check status | + +--- + +## Custom APIs + +Connect any REST API: + +1. Define the endpoint +2. Map parameters from conversation +3. Handle response in agent logic + +Your agent can query order status, verify accounts, check inventory — anything your APIs support. + +--- + +## Webhooks + +Push call events to your systems in real-time: + +| Event | When | +|-------|------| +| `call.started` | Call begins | +| `call.ended` | Call completes | +| `analytics.ready` | Post-call analysis done | + +--- + +## Get Started + + + + Configure integrations visually + + + Build custom integrations with the SDK + + diff --git a/fern/products/atoms/pages/intro/capabilities/knowledge-base.mdx b/fern/products/atoms/pages/intro/capabilities/knowledge-base.mdx new file mode 100644 index 0000000..fee5781 --- /dev/null +++ b/fern/products/atoms/pages/intro/capabilities/knowledge-base.mdx @@ -0,0 +1,64 @@ +--- +title: "Knowledge Base" +sidebarTitle: "Knowledge Base" +icon: "book" +description: "Give agents your company's knowledge" +--- + +A knowledge base lets your agent answer questions using your content — product docs, FAQs, policies, pricing. Instead of generic AI responses, your agent provides accurate, sourced answers from your actual documents. + +--- + +## How It Works + +1. **Upload** — PDFs, docs, CSVs, web pages +2. **Customer asks** — Agent searches your content +3. **Finds match** — Retrieves relevant sections +4. **Responds** — Answers grounded in your data + +This is called Retrieval-Augmented Generation (RAG) — combining search with AI for accurate, hallucination-free and grounded answers. + +--- + +## What You Can Upload + +| Format | Examples | +|--------|----------| +| **Documents** | PDF, DOCX, TXT | +| **Spreadsheets** | CSV, XLSX | +| **Web content** | HTML, Markdown, URLs | +| **Structured data** | JSON | + +--- + +## Why Use It + +| Without KB | With KB | +|------------|---------| +| Agent guesses or hallucinates | Agent cites your documents | +| "I'm not sure about that" | "According to our policy..." | +| Generic responses | Company-specific expertise | +| Outdated information | Always current | + +--- + +## Example + +**Upload:** Return policy PDF + +**Customer:** "What's your return policy for electronics?" + +**Agent:** "You can return electronics within 30 days with original packaging. Would you like me to start a return for you?" + +--- + +## Get Started + + + + Create and manage knowledge bases visually + + + Programmatic knowledge base management + + diff --git a/fern/products/atoms/pages/intro/capabilities/telephony.mdx b/fern/products/atoms/pages/intro/capabilities/telephony.mdx new file mode 100644 index 0000000..23b3ce0 --- /dev/null +++ b/fern/products/atoms/pages/intro/capabilities/telephony.mdx @@ -0,0 +1,82 @@ +--- +title: "Phone Numbers" +sidebarTitle: "Phone Numbers" +icon: "phone" +description: "Inbound, outbound, and phone infrastructure" +--- + +Atoms provides full telephony infrastructure — buy phone numbers, receive inbound calls, make outbound calls, and integrate with existing phone systems. + +--- + +## Capabilities + +| Type | Description | +|------|-------------| +| **Inbound** | Customers call your number, reach your agent | +| **Outbound** | Your agent calls customers (single or campaigns) | +| **Transfer** | Agent connects to human agents when needed | +| **SIP** | Connect to existing phone infrastructure | + +--- + +## Getting Numbers + +### Buy from Atoms + +Available in 40+ countries with instant provisioning. + +| Type | Best For | +|------|----------| +| **Local** | Regional presence, lower cost | +| **Toll-free** | National reach, professional image | +| **Mobile** | SMS capability | + +### Import Your Own Number (SIP) + +Already have phone numbers? Import them via SIP trunking. Provide your number and SIP termination URL, and Atoms sets up both inbound and outbound trunks so your number works for making and receiving calls through the platform. + +| You Provide | Description | +|-------------|-------------| +| **Phone Number** | Your existing number in E.164 format (e.g., `+14155551234`) | +| **SIP Termination URL** | Where Atoms routes outbound calls (your SIP provider) | +| **Username / Password** | Optional, only if your trunk requires authentication | + +Atoms gives you a **SIP Origination URL** to configure in your provider so inbound calls route to your agent. + +Works with any SIP-compatible provider: Twilio, Telnyx, Vonage, or your own infrastructure. + +> Set this up in the [Platform UI](/atoms/atoms-platform/deployment/phone-numbers) or via the [API](/atoms/api-reference). + +--- + +## Call Controls + +During calls, agents can: + +| Action | Description | +|--------|-------------| +| **Transfer** | Connect to human or external number | +| **Hold** | Place caller on hold with music | +| **End** | Gracefully terminate call | +| **DTMF** | Detect keypad presses | +| **Record** | Start/stop recording | + +--- + +## SIP Integration + +Connect Atoms to your existing phone system. Any provider that supports standard SIP trunking works — Twilio, Telnyx, Vonage, or your own PBX/VoIP setup. + +--- + +## Get Started + + + + Buy and manage phone numbers + + + Programmatic telephony control + + diff --git a/fern/products/atoms/pages/intro/capabilities/voice-speech.mdx b/fern/products/atoms/pages/intro/capabilities/voice-speech.mdx new file mode 100644 index 0000000..2a36949 --- /dev/null +++ b/fern/products/atoms/pages/intro/capabilities/voice-speech.mdx @@ -0,0 +1,79 @@ +--- +title: "Voice & Speech" +sidebarTitle: "Voice & Speech" +icon: "microphone" +description: "Industry-leading voice AI with Lightning, Pulse, and Electron" +--- + +Atoms is powered by our proprietary voice AI stack — the fastest, most accurate, and most natural-sounding in the industry. Three models working together in sequence: transcribe speech, reason through the response, synthesize voice. Sub-800ms end-to-end. + +--- + +## Pulse — Speech-to-Text + +High-accuracy, low-latency ASR built for real-time transcription. 32 languages with automatic detection. + +| Spec | Performance | +|------|-------------| +| **Latency (TTFT)** | 64ms | +| **English WER** | 4.5% | +| **Best WER** | 3.0% (Italian), 3.2% (Spanish) | +| **Languages** | 32 supported | +| **Concurrency** | 100 requests per GPU | + +**Key strengths:** +- 64ms time-to-first-transcript +- Industry-leading accuracy for Romance and Indic languages +- PII/PCI redaction built-in +- Handles accents and noisy environments + +### Supported Languages + +English, Hindi, Spanish, Portuguese, Italian, French, German, Dutch, Russian, Ukrainian, Polish, Czech, Slovak, Romanian, Bulgarian, Hungarian, Finnish, Swedish, Danish, Lithuanian, Latvian, Estonian, Maltese, Kannada, Malayalam, Telugu, Tamil, Marathi, Gujarati, Bengali, Punjabi, Oriya + +--- + +## Electron — Small Language Model + +Our optimized SLM for voice AI. Fast reasoning with low latency, purpose-built for conversational agents. + +| Spec | Performance | +|------|-------------| +| **Optimized for** | Voice conversations | +| **Latency** | Sub-500ms responses | +| **Context handling** | Multi-turn conversations | + +Electron understands conversational context, handles interruptions gracefully, and generates responses optimized for spoken delivery — not just text. + +--- + +## Lightning v3.1 — Text-to-Speech + +The fastest high-fidelity TTS model available. 44kHz native resolution with ultra-low latency. + +| Spec | Performance | +|------|-------------| +| **Latency** | 175ms @ 20 concurrency | +| **Sample Rate** | 44,100 Hz native | +| **Speed Control** | 0.5x to 2.0x | +| **Languages** | English, Hindi (more coming) | +| **Voice Cloning** | Instant (5-15s) and Professional (45min+) | + +**Key strengths:** +- Studio-grade 44kHz audio clarity +- Natural prosody and intonation +- Real-time streaming (HTTP, SSE, WebSocket) +- Voice cloning for custom brand voices + +--- + +## Get Started + + + + Configure voice settings visually + + + Programmatic voice configuration + + diff --git a/fern/products/atoms/pages/intro/getting-started/quickstart-business.mdx b/fern/products/atoms/pages/intro/getting-started/quickstart-business.mdx new file mode 100644 index 0000000..d36a935 --- /dev/null +++ b/fern/products/atoms/pages/intro/getting-started/quickstart-business.mdx @@ -0,0 +1,88 @@ +--- +title: "Quickstart for Business Users" +description: "Build your first voice AI agent in 5 minutes — no coding required" +--- + +# Quickstart for Business Users + +Get your first AI voice agent up and running in just 5 minutes using our no-code platform. + + + + 1. Go to [atoms.smallest.ai](https://atoms.smallest.ai) + 2. Create your account or sign in + 3. You'll land on your **Dashboard** + + New accounts get free credits to test your first agents! + + + + 1. Click **Create Agent** in the dashboard + 2. Choose a template or start from scratch: + - **Customer Support** — Handle inquiries 24/7 + - **Appointment Booking** — Schedule meetings + - **Lead Qualification** — Screen inbound calls + - **Blank** — Build from scratch + 3. Give your agent a name + + Templates come pre-configured with prompts, voices, and settings optimized for each use case. + + + + **Choose Agent Type:** + - **Single Prompt** — One system prompt, agent decides flow + - **Convo Flow** — Step-by-step conversation script + + **Set the Basics:** + - Write your agent's **Role & Objective** + - Select a **Voice** (preview to find the right one) + - Choose your **LLM** (GPT-4.1, Albert, or custom) + - Set the **Language** + + + Understanding Single Prompt vs Convo Flow + + + + + 1. Click the **Test Agent** button (top right) + 2. A test call will connect you to your agent + 3. Have a conversation — try different scenarios + 4. Review the **Conversation Logs** to see what happened + + Always test thoroughly before going live! Try edge cases and unexpected inputs. + + + + **Option A: Get a Phone Number** + 1. Go to **Phone Numbers** → **Buy Number** + 2. Search by country/area code + 3. Purchase and assign to your agent + 4. Your agent now answers calls to that number! + + **Option B: Widget for Website** + 1. Go to **Widget** settings + 2. Copy the embed code + 3. Add to your website + 4. Visitors can call your agent from the browser + + + +--- + +## Next Steps + + + + Give your agent company-specific knowledge + + + Make outbound calls at scale + + + Connect to Salesforce, HubSpot, and more + + + Track call performance and insights + + diff --git a/fern/products/atoms/pages/intro/getting-started/quickstart-developers.mdx b/fern/products/atoms/pages/intro/getting-started/quickstart-developers.mdx new file mode 100644 index 0000000..caa08b5 --- /dev/null +++ b/fern/products/atoms/pages/intro/getting-started/quickstart-developers.mdx @@ -0,0 +1,180 @@ +--- +title: "Quickstart for Developers" +description: "Get API access and make your first programmatic call in minutes" +--- + +# Quickstart for Developers + +Get your API keys, install the SDK, and create your first voice agent programmatically. + +## Prerequisites + +- Python 3.8+ or Node.js 16+ +- An Atoms account ([sign up here](https://atoms.smallest.ai)) + +--- + +## Step 1: Get Your API Key + +1. Log in to your [Atoms Dashboard](https://atoms.smallest.ai) +2. Go to **Settings** → **API Keys** +3. Click **Generate New Key** +4. Copy and save your key securely + +Never commit your API key to version control. Use environment variables. + +```bash +export SMALLEST_API_KEY="your-api-key-here" +``` + +--- + +## Step 2: Install the SDK + + +```bash Python +pip install smallestai +``` + +```bash Node.js +npm install @smallest/atoms +``` + + +--- + +## Step 3: Create Your First Agent + + +```python Python +from smallestai.atoms import AtomsClient + +client = AtomsClient() + +# Create a new agent +agent = client.new_agent( + name="My First Agent", + system_prompt="""You are a helpful customer support agent for Acme Corp. + Be friendly, professional, and helpful. + If you don't know something, offer to transfer to a human.""", + voice_id="emily", # Choose a voice + language="en-US" +) + +print(f"Agent created! ID: {agent.id}") +``` + +```javascript Node.js +import { AtomsClient } from '@smallest/atoms'; + +const client = new AtomsClient(); + +const agent = await client.newAgent({ + name: "My First Agent", + systemPrompt: `You are a helpful customer support agent for Acme Corp. + Be friendly, professional, and helpful. + If you don't know something, offer to transfer to a human.`, + voiceId: "emily", + language: "en-US" +}); + +console.log(`Agent created! ID: ${agent.id}`); +``` + + +--- + +## Step 4: Make an Outbound Call + + +```python Python +# Start an outbound call +call = client.start_outbound_call( + agent_id=agent.id, + phone_number="+1234567890", + from_number="+0987654321" # Your Atoms number +) + +print(f"Call started! Call ID: {call.id}") +``` + +```javascript Node.js +const call = await client.startOutboundCall({ + agentId: agent.id, + phoneNumber: "+1234567890", + fromNumber: "+0987654321" +}); + +console.log(`Call started! Call ID: ${call.id}`); +``` + + +--- + +## Step 5: Get Call Results + + +```python Python +from smallestai.atoms import Call + +call_manager = Call() + +# Get call details +call_data = call_manager.get_call(call_id=call.id) + +print(f"Status: {call_data['status']}") +print(f"Duration: {call_data['duration']}s") +print(f"Transcript: {call_data['transcript']}") +``` + +```javascript Node.js +const callData = await client.getCall(call.id); + +console.log(`Status: ${callData.status}`); +console.log(`Duration: ${callData.duration}s`); +console.log(`Transcript: ${callData.transcript}`); +``` + + +--- + +## Next Steps + + + + Deep dive into SDK concepts + + + Full REST API documentation + + + Add RAG capabilities to agents + + + Scale to thousands of calls + + + +--- + +## Common Patterns + +```python +# List all agents +agents = client.get_agents() + +# Update an agent +client.update_agent(agent_id=agent.id, system_prompt="New prompt...") + +# Delete an agent +client.delete_agent(agent_id=agent.id) + +# Search call logs +from smallestai.atoms import Call +call_manager = Call() +calls = call_manager.search_calls(agent_id=agent.id, limit=100) +``` + + + Check out our [Cookbooks](/atoms/developer-guide/examples/examples) for complete working examples of common use cases. + diff --git a/fern/products/atoms/pages/intro/reference/changelog.mdx b/fern/products/atoms/pages/intro/reference/changelog.mdx new file mode 100644 index 0000000..8a1308b --- /dev/null +++ b/fern/products/atoms/pages/intro/reference/changelog.mdx @@ -0,0 +1,8 @@ +--- +title: "Changelog" +sidebarTitle: "Changelog" +icon: "clock-rotate-left" +description: "What's new in Atoms" +--- + +Coming soon. diff --git a/fern/products/atoms/pages/intro/reference/support.mdx b/fern/products/atoms/pages/intro/reference/support.mdx new file mode 100644 index 0000000..4ca14dc --- /dev/null +++ b/fern/products/atoms/pages/intro/reference/support.mdx @@ -0,0 +1,42 @@ +--- +title: "Support" +sidebarTitle: "Support" +icon: "life-ring" +description: "Get help with Atoms" +--- + +## Contact + + + + General questions and issues + + + Payment and subscription help + + + Report security concerns + + + Enterprise inquiries + + + +--- + +## Resources + + + + Guides, tutorials, and references + + + Community chat and quick answers + + + System status and incidents + + + SDKs and sample code + + diff --git a/fern/products/atoms/pages/intro/welcome.mdx b/fern/products/atoms/pages/intro/welcome.mdx new file mode 100644 index 0000000..9062c51 --- /dev/null +++ b/fern/products/atoms/pages/intro/welcome.mdx @@ -0,0 +1,212 @@ +--- +title: " " +sidebarTitle: "Welcome" +icon: "house" +description: "Build, test, and deploy voice AI agents in minutes." +mode: "wide" +"og:title": "Welcome to Atoms - Voice AI Platform" +--- + + + +{/* Animated gradient border card hero */} +
+ + + {/* Card wrapper */} +
+ {/* Blur glow layer */} +
+ + {/* Animated gradient border layer */} +
+ + {/* The actual card */} +
+ {/* Background gradient blob - like smallest logo */} +
+
+ + {/* Main title */} + Welcome to Atoms + + {/* Description */} +

Voice AI agents that sound human. Build, deploy, and scale intelligent conversations in minutes.

+
+
+
+ +Atoms is a voice AI platform that turns phone calls into outcomes. Customers get instant help. Leads get qualified. Appointments get booked — all in real-time, at any scale, around the clock. + +We built the entire stack from scratch. Pulse transcribes speech with 64ms time-to-first-transcript. Electron processes intent and generates responses in under 500ms. Lightning synthesizes studio-quality voice at 175ms latency. End-to-end, sub-800ms turn times — indistinguishable from human conversation. + +### Our Models + +| Model | What It Does | Performance | +|-------|--------------|-------------| +| **[Pulse](/atoms/product-overview/capabilities/voice-and-speech)** | Speech-to-Text | 32 languages, 64ms latency, 4.5% English WER | +| **[Electron](/atoms/product-overview/capabilities/voice-and-speech)** | Language Model | Voice-optimized SLM, sub-500ms reasoning | +| **[Lightning v3.1](/atoms/product-overview/capabilities/voice-and-speech)** | Text-to-Speech | 44kHz studio-grade, 175ms latency, voice cloning | + +--- + +## Choose Your Path + + + + **No-code visual builder** — Build your first agent in 5 minutes, no code required. + + + **Full SDK access** — Build your first agent in Python with the Atoms SDK. + + + +--- + +## What You Can Build + + + + AI that understands speech, processes intent, and responds naturally. + + + Give agents your docs, PDFs, and URLs to reference. + + + Outbound calling at scale with automated management. + + + +--- + +## Capabilities + + + + One prompt, infinite flexibility. Agent adapts dynamically. + + + Deprecated + + Visual workflows for structured, predictable conversations. + + + Lightning + Pulse + Electron. Sub-800ms total turn time. + + + Numbers in 40+ countries. Inbound and outbound. + + + Connect CRMs, calendars, and custom APIs. + + + Call metrics and conversation insights. + + + +--- + +## Resources + + + + Code examples and templates on GitHub. + + + See what others have built with Atoms. + + + Join the community — ask questions, share projects. + + diff --git a/fern/products/atoms/pages/introduction.mdx b/fern/products/atoms/pages/introduction.mdx index 4a67a32..850cd64 100644 --- a/fern/products/atoms/pages/introduction.mdx +++ b/fern/products/atoms/pages/introduction.mdx @@ -17,14 +17,10 @@ who are great at having conversations. All you have to do is plug in your busine Check the links below to get started. - + For business representatives who wants to set this up. - + For developers who want to get their hands dirty. @@ -32,16 +28,11 @@ Check the links below to get started. ## Integration Options - - Integrate real-time voice and text chat with AI agents directly in your web - applications using our multimodal SDK. + + Integrate real-time voice and text chat with AI agents directly in your web applications using our + multimodal SDK. - - Build powerful backend integrations with our Python and Node.js SDKs for - programmatic agent management. + + Build powerful backend integrations with our Python and Node.js SDKs for programmatic agent management. diff --git a/fern/products/atoms/pages/platform-api/agents.mdx b/fern/products/atoms/pages/platform-api/agents.mdx new file mode 100644 index 0000000..67067e5 --- /dev/null +++ b/fern/products/atoms/pages/platform-api/agents.mdx @@ -0,0 +1,63 @@ +--- +title: "Agents" +description: "Manage agents, templates, and webhooks." +--- + +The `AgentsApi` allows you to programmatically create agents, manage their configuration, and handle webhooks. + +## Clients + +```python +from smallestai.atoms.api_client import ApiClient +from smallestai.atoms.api.agents_api import AgentsApi + +client = ApiClient() +api = AgentsApi(client) +``` + +## Common Operations + +### Get Agent Details + +Retrieve configuration for a specific agent. + +```python +agent = api.agent_id_get(agent_id="your-agent-id") +print(f"Agent Name: {agent.name}") +``` + +### Create Agent from Template + +Spin up a new agent using a pre-defined template. + +```python +from smallestai.atoms.api import AgentTemplatesApi +from smallestai.atoms.models import CreateAgentFromTemplateRequest + +# Initialize Templates API +templates_api = AgentTemplatesApi() + +new_agent = templates_api.agent_from_template_post( + create_agent_from_template_request=CreateAgentFromTemplateRequest( + templateId="template-id", + name="My New Agent" + ) +) +print(f"Created Agent ID: {new_agent.id}") +``` + +### Manage Webhooks + +Subscribe to events for your agent. + +```python +from smallestai.atoms.models import AgentAgentIdWebhookSubscriptionsPostRequest + +api.agent_agent_id_webhook_subscriptions_post( + agent_id="your-agent-id", + agent_agent_id_webhook_subscriptions_post_request=AgentAgentIdWebhookSubscriptionsPostRequest( + url="https://your-server.com/webhook", + events=["call.completed", "transcript.ready"] + ) +) +``` diff --git a/fern/products/atoms/pages/platform-api/calls.mdx b/fern/products/atoms/pages/platform-api/calls.mdx new file mode 100644 index 0000000..29ab4f6 --- /dev/null +++ b/fern/products/atoms/pages/platform-api/calls.mdx @@ -0,0 +1,43 @@ +--- +title: "Calls" +description: "Retrieve call history and transcripts." +--- + +The `Call` module provides access to your agent's call history, transcripts, and analytics. + +## Clients + +```python +from smallestai.atoms.call import Call + +call = Call() +``` + +## Operations + +### List Calls + +Fetch a list of recent calls. + +```python +calls = call.get_calls(limit=10) + +for log in calls["data"]["logs"]: + print(f"Call ID: {log['callId']} | Status: {log['status']}") +``` + +### Get Call Details + +Get detailed information, including transcripts, for a specific call. + +```python +details = call.get_call("call-id-uuid") +data = details["data"] + +print(f"Duration: {data['duration']}s") +print(f"Status: {data['status']}") + +# Transcript +for msg in data.get("transcript", []): + print(f"{msg['role']}: {msg['content']}") +``` diff --git a/fern/products/atoms/pages/platform-api/campaigns.mdx b/fern/products/atoms/pages/platform-api/campaigns.mdx new file mode 100644 index 0000000..7264050 --- /dev/null +++ b/fern/products/atoms/pages/platform-api/campaigns.mdx @@ -0,0 +1,49 @@ +--- +title: "Campaigns" +description: "Manage outbound calling campaigns." +--- + +The `Campaign` module allows you to create and manage bulk outbound calling campaigns easily. + +## Clients + +```python +from smallestai.atoms.campaign import Campaign +from smallestai.atoms.audience import Audience + +campaign = Campaign() +audience = Audience() +``` + +## Operations + +### Create a Campaign + +Launch a new outbound campaign. + +```python +# 1. Create an audience first +aud_response = audience.create( + name="Q3 Outreach List", + phone_numbers=["+14155551234"], + names=[("John", "Doe")] +) +audience_id = aud_response["data"]["_id"] + +# 2. Create the campaign +camp_response = campaign.create( + name="Q3 Outreach", + agent_id="your-agent-id", + audience_id=audience_id, + phone_ids=["your-phone-id"], # Get from client.get_phone_numbers() + description="Sales outreach", + max_retries=2, + retry_delay=15 +) +campaign_id = camp_response["data"]["_id"] +print(f"Campaign Created: {campaign_id}") + +# 3. Start the campaign +campaign.start(campaign_id) +print("Campaign Started") +``` diff --git a/fern/products/atoms/pages/platform-api/logs.mdx b/fern/products/atoms/pages/platform-api/logs.mdx new file mode 100644 index 0000000..922d15b --- /dev/null +++ b/fern/products/atoms/pages/platform-api/logs.mdx @@ -0,0 +1,39 @@ +--- +title: 'Logs' +description: 'Retrieve conversation logs and details.' +--- + +The `Call` module allows you to access detailed logs and analytics for your conversations. + +## Clients + +```python +from smallestai.atoms.call import Call + +call = Call() +``` + +## Get Call Logs + +Retrieve details and transcript for a specific conversation using its `call_id`. + +```python +# specific_call_id is a string, usually a UUID +call_id = "your_call_id_here" + +try: + # Get conversation details + details = call.get_call(call_id) + data = details["data"] + + print(f"Status: {data['status']}") + print(f"Transcript lines: {len(data.get('transcript', []))}") + + # Access post-call analytics if available + analytics = data.get("postCallAnalytics", {}) + if analytics: + print(f"Summary: {analytics.get('summary')}") + +except Exception as e: + print(f"Error fetching logs: {e}") +``` diff --git a/fern/products/atoms/pages/platform-api/organization.mdx b/fern/products/atoms/pages/platform-api/organization.mdx new file mode 100644 index 0000000..7128b6d --- /dev/null +++ b/fern/products/atoms/pages/platform-api/organization.mdx @@ -0,0 +1,28 @@ +--- +title: 'Organization' +description: 'Manage and retrieve organization details.' +--- + +The `OrganizationApi` provides methods to access information about your organization within the Smallest.ai platform. + +## Initialize Organization API + +```python +from smallestai.atoms.api import OrganizationApi + +# Initialize the API client +api = OrganizationApi() +``` + +## Get Organization Details + +Retrieve the current organization's profile and settings. + +```python +try: + # Get organization info + org_details = api.organization_get() + print(f"Organization Details: {org_details}") +except Exception as e: + print(f"Error fetching organization details: {e}") +``` diff --git a/fern/products/atoms/pages/platform-api/overview.mdx b/fern/products/atoms/pages/platform-api/overview.mdx new file mode 100644 index 0000000..1928cfc --- /dev/null +++ b/fern/products/atoms/pages/platform-api/overview.mdx @@ -0,0 +1,54 @@ +--- +title: "Overview" +description: "Programmatically manage your Atoms platform resources." +--- + +The Platform API allows you to manage agents, calls, campaigns, knowledge bases, and more programmatically. + +## The Atoms Client + +The `AtomsClient` is the main entry point for interacting with the Smallest.ai Platform API. It wraps all specific API clients (Agents, Calls, etc.) into a single, cohesive interface. + +### Initialization + +```python +from smallestai.atoms import AtomsClient + +# Initialize the client (picks up ATOMS_API_KEY from environment) +client = AtomsClient() +``` + +### Accessing APIs + +You can access specific functionality through the client's properties or convenience methods: + +```python +# Create an agent using the convenience method +client.create_agent(create_agent_request=...) + +# Or access the specific API directly +client.agents_api.agent_post(...) + +# Managing Knowledge Bases +client.get_knowledge_bases() + +# Checking Logs +client.get_conversation_logs(id="call_id") +``` + +## Explore the APIS + + + + Create and manage AI agents. + + + Run outbound calling campaigns. + + + Access call history and transcripts. + + + Debug with conversation logs. + + diff --git a/fern/products/atoms/pages/platform-api/user.mdx b/fern/products/atoms/pages/platform-api/user.mdx new file mode 100644 index 0000000..de56d5c --- /dev/null +++ b/fern/products/atoms/pages/platform-api/user.mdx @@ -0,0 +1,28 @@ +--- +title: 'User' +description: 'Manage and retrieve user details.' +--- + +The `UserApi` allows you to access information about the authenticated user. + +## Initialize User API + +```python +from smallestai.atoms.api import UserApi + +# Initialize the API client +api = UserApi() +``` + +## Get User Details + +Retrieve the current user's profile information. + +```python +try: + # Get user info + user_details = api.user_get() + print(f"User Details: {user_details}") +except Exception as e: + print(f"Error fetching user details: {e}") +``` diff --git a/fern/products/atoms/pages/platform/analytics/conversation-logs.mdx b/fern/products/atoms/pages/platform/analytics/conversation-logs.mdx new file mode 100644 index 0000000..779faf7 --- /dev/null +++ b/fern/products/atoms/pages/platform/analytics/conversation-logs.mdx @@ -0,0 +1,142 @@ +--- +title: "Conversation Logs" +sidebarTitle: "Conversation Logs" +description: "Review transcripts and debug agent behavior." +--- + +Conversation Logs are your window into every call. See exactly what was said, when events occurred, and how metrics were captured — everything you need to understand and improve your agent. + +**Location:** Top right → **Convo Logs** button + +--- + +## Call Logs List + + + ![Call logs list](../building-agents/images/convo-logs-list.png) + + +The main view shows all calls with key information at a glance: + +| Column | Description | +|--------|-------------| +| **Call Date** | When the call occurred | +| **Call ID** | Unique identifier (click to copy) | +| **From / To** | Phone numbers or sources | +| **Duration** | How long the call lasted | +| **Hangup Cause** | Why the call ended | +| **Status** | Current state of the call | +| **Retries** | Number of retry attempts | +| **Details** | Click to view full conversation | + +--- + +## Filtering + +Click **Filter By** to narrow down your logs: + +| Filter | Options | +|--------|---------| +| **Conversation Type** | Inbound Calls, Outbound Calls, Web Calls, Chat | +| **Status** | Pending, In Queue, In Progress, Active, Completed, Failed, Cancelled, No Answer, Processing | +| **End Reason** | Dial No Answer, User Hangup, Agent Hangup, Busy, Timeout, Error, Voicemail | +| **Call Type** | All Attempts, Retry Attempts, Initial Attempts | +| **Call Duration** | 0-30 seconds, 30-60 seconds, 1-5 minutes, 5+ minutes | + +Use **Search Logs** to find specific calls by ID or content. + +--- + +## Call Details + +Click **Details** on any call to open the full conversation view. You'll see tabs for different types of information: + + + + + ![Overview tab](../building-agents/images/convo-logs-overview.png) + + + High-level summary of the call: + + | Field | Description | + |-------|-------------| + | **Call Summary** | AI-generated summary of what happened | + | **Agent** | Which agent handled the call | + | **Call ID** | Unique identifier | + | **Model** | AI model used | + | **Voice** | Voice used | + | **Date & Time** | When the call occurred | + | **Cost (Credits)** | Credit usage | + | **Disconnection Reason** | Why the call ended | + + Play the audio recording using the waveform player at the top. + + + + + ![Transcript tab](../building-agents/images/convo-logs-transcript.png) + + + The full conversation, word for word. Each message shows: + - Who spoke (Agent or User) + - What was said + - Timestamp + + Use this to understand exactly how the conversation flowed. + + + + + ![Events tab](../building-agents/images/convo-logs-events.png) + + + Timeline of everything that happened: + - Agent responses + - Call End triggers + - Transfers + - API calls + - Other system events + + Each event shows the timestamp and what occurred. + + + + + ![Metrics tab](../building-agents/images/convo-logs-metrics.png) + + + Post-call metrics you've configured, with their values: + + Each metric shows: + - Name + - Data type (Integer, String, Boolean, etc.) + - Value extracted from this call + + Click the dropdown on any metric to see details. + + + +--- + +## Exporting + +Click the **Export** button to download your logs: + +| Format | Best for | +|--------|----------| +| **JSON** | Developers, programmatic analysis | +| **CSV** | Spreadsheets, reporting | + +--- + +## Related + + + + Define what to track from each call + + + Test your agent + + diff --git a/fern/products/atoms/pages/platform/analytics/dashboard-overview.mdx b/fern/products/atoms/pages/platform/analytics/dashboard-overview.mdx new file mode 100644 index 0000000..897f12d --- /dev/null +++ b/fern/products/atoms/pages/platform/analytics/dashboard-overview.mdx @@ -0,0 +1,98 @@ +--- +title: "Analytics Dashboard Overview" +sidebarTitle: "Dashboard Overview" +description: "Monitor your voice AI performance with comprehensive analytics." +--- + +The Analytics dashboard provides a unified view of how your voice AI agents are performing. Track call volumes, success rates, and key metrics across all your agents. + +--- + +## Location + +**Left Sidebar → Observe → Analytics** + +--- + +## Dashboard Layout + +→ **NEEDS PLATFORM INFO:** Analytics dashboard interface + +The dashboard typically includes: + +| Section | Content | +|---------|---------| +| **Summary Cards** | Key metrics at a glance | +| **Charts** | Trends over time | +| **Agent Breakdown** | Per-agent performance | +| **Recent Activity** | Latest calls | + +--- + +## Key Metrics + +### Volume Metrics + +| Metric | Description | +|--------|-------------| +| **Total Calls** | Number of calls handled | +| **Inbound Calls** | Calls received | +| **Outbound Calls** | Calls made (campaigns) | +| **Active Agents** | Agents currently live | + +### Performance Metrics + +| Metric | Description | +|--------|-------------| +| **Avg Call Duration** | Average length of calls | +| **Completion Rate** | % of calls that completed normally | +| **Success Rate** | % achieving desired outcome | +| **Transfer Rate** | % transferred to human | + +### Quality Metrics + +| Metric | Description | +|--------|-------------| +| **Customer Satisfaction** | From post-call metrics | +| **Resolution Rate** | Issues resolved on first call | +| **Escalation Rate** | Calls requiring escalation | + +--- + +## Time Filters + +View data for different periods: + +| Period | Use Case | +|--------|----------| +| Today | Real-time monitoring | +| Last 7 days | Weekly trends | +| Last 30 days | Monthly overview | +| Custom range | Specific analysis | + +--- + +## Agent Comparison + +Compare performance across agents: + +| Agent | Calls | Avg Duration | Success Rate | +|-------|-------|--------------|--------------| +| Support Bot | 1,234 | 3:45 | 87% | +| Sales Agent | 567 | 5:12 | 42% | +| Reminder Bot | 2,890 | 1:23 | 95% | + +Identify top performers and areas for improvement. + +--- + +## What's Next + + + + Deep dive into metrics + + + Optimization tips + + diff --git a/fern/products/atoms/pages/platform/analytics/improving-performance.mdx b/fern/products/atoms/pages/platform/analytics/improving-performance.mdx new file mode 100644 index 0000000..02a0a6b --- /dev/null +++ b/fern/products/atoms/pages/platform/analytics/improving-performance.mdx @@ -0,0 +1,177 @@ +--- +title: "Improving Performance" +sidebarTitle: "Improving Performance" +description: "Use analytics insights to optimize your voice AI agents." +--- + +Analytics tell you what's happening. This page shows you how to turn insights into improvements. + +--- + +## Common Issues and Solutions + +### Low Success Rate + +**Symptoms:** +- Fewer calls achieve desired outcome +- High "unsuccessful" disposition + +**Investigate:** +1. Review conversation logs for failed calls +2. Identify where conversations break down +3. Check if callers are asking unexpected questions + +**Solutions:** +| Finding | Action | +|---------|--------| +| Agent misunderstands requests | Improve prompt clarity | +| Missing information | Expand Knowledge Base | +| Wrong branch taken (Convo Flow) | Refine conditions | +| Caller frustration | Improve tone/empathy in prompt | + +--- + +### High Transfer Rate + +**Symptoms:** +- Many calls end in transfer +- Humans are overloaded + +**Investigate:** +1. What triggers transfers? +2. Are they appropriate escalations? +3. Could the agent handle these? + +**Solutions:** +| Finding | Action | +|---------|--------| +| Legitimate escalations | Transfer rate is appropriate | +| Agent can't answer questions | Add to Knowledge Base | +| Agent gives up too easily | Adjust prompt instructions | +| Caller requests human | Improve agent experience | + +--- + +### Short Call Duration + +**Symptoms:** +- Calls end very quickly +- Low engagement + +**Investigate:** +1. Are callers hanging up? +2. Are end conditions triggering too early? +3. Is the greeting off-putting? + +**Solutions:** +| Finding | Action | +|---------|--------| +| Poor greeting | Improve opening | +| Immediate hang-up | Check voice/audio quality | +| Misunderstood intent | Improve understanding | +| Too aggressive end conditions | Relax end call triggers | + +--- + +### Very Long Calls + +**Symptoms:** +- Calls exceeding expected duration +- Rambling conversations + +**Investigate:** +1. Is the agent getting stuck in loops? +2. Is it asking too many questions? +3. Is caller unable to get answers? + +**Solutions:** +| Finding | Action | +|---------|--------| +| Loops in Convo Flow | Fix workflow logic | +| Agent too verbose | Shorten responses | +| Caller can't get answer | Improve Knowledge Base | +| No clear path to resolution | Add more direct paths | + +--- + +### Low Satisfaction Scores + +**Symptoms:** +- Post-call metrics show unhappy callers + +**Investigate:** +1. Read transcripts of low-scoring calls +2. Identify frustration points +3. Note specific complaints + +**Solutions:** +| Finding | Action | +|---------|--------| +| Agent sounds robotic | Improve prompt for naturalness | +| Answers are wrong | Fix Knowledge Base | +| Process is frustrating | Simplify workflow | +| Agent doesn't empathize | Add empathy instructions | + +--- + +## Optimization Cycle + +Follow this continuous improvement process: + +``` +1. Monitor Analytics + ↓ +2. Identify Issues + ↓ +3. Review Conversation Logs + ↓ +4. Diagnose Root Cause + ↓ +5. Make Changes + ↓ +6. Test Changes + ↓ +7. Deploy and Monitor + ↓ +(Repeat) +``` + +--- + +## A/B Testing + +To test improvements: + +1. Create a new agent version with changes +2. Split traffic between old and new +3. Compare metrics +4. Roll out winner + +→ **NEEDS PLATFORM INFO:** A/B testing capabilities + +--- + +## Quick Wins + +Improvements that often help: + +| Quick Win | Impact | +|-----------|--------| +| Add more FAQ to KB | Better answers | +| Shorten agent responses | Faster calls | +| Add empathy phrases | Better satisfaction | +| Fix pronunciation issues | Clearer communication | +| Improve greeting | Better engagement | + +--- + +## What's Next + + + + Dig into specific calls + + + See optimized examples + + diff --git a/fern/products/atoms/pages/platform/analytics/locking.mdx b/fern/products/atoms/pages/platform/analytics/locking.mdx new file mode 100644 index 0000000..3e36b2c --- /dev/null +++ b/fern/products/atoms/pages/platform/analytics/locking.mdx @@ -0,0 +1,45 @@ +--- +title: "Locking Your Agent" +sidebarTitle: "Locking" +description: "Protect production agents from accidental changes." +--- + +Once your agent is working well, lock it. Locking prevents accidental edits that could break a production agent. + +**Location:** Top right → **Lock Agent** toggle + + + ![Lock agent toggle](../building-agents/images/lock-agent.png) + + +--- + +## How It Works + +Toggle **Lock Agent** to ON — all editing becomes disabled. + +**What's blocked:** +- Prompt editing +- Configuration changes +- Settings modifications +- Node changes (Convo Flow) + +**What still works:** +- Test Agent +- View Conversation Logs +- Live calls continue normally + +To make changes, toggle it back to OFF, edit, test, then re-lock. + +--- + +## Related + + + + Test before locking + + + Deploy to production + + diff --git a/fern/products/atoms/pages/platform/analytics/overview.mdx b/fern/products/atoms/pages/platform/analytics/overview.mdx new file mode 100644 index 0000000..02f994b --- /dev/null +++ b/fern/products/atoms/pages/platform/analytics/overview.mdx @@ -0,0 +1,81 @@ +--- +title: "Analytics" +description: "Monitor call performance across your agents and campaigns" +--- + +The Analytics dashboard gives you a bird's-eye view of all your voice AI agnets activity — call volumes, connection rates, costs, and agent performance. + +**Location:** Left Sidebar → Observe → Analytics + + + Analytics dashboard + + +--- + +## Filters + +Narrow down your data with the filters at the top: + +| Filter | Options | +|--------|---------| +| **Date Range** | Select start and end dates | +| **Filter by Agent** | View specific agent's calls | +| **Filter by Campaign** | View specific campaign's calls | +| **Filter by Call** | Filter by call type or status | + +Click **Refresh** to update the dashboard with your selections. + +--- + +## Summary Cards + +Quick metrics at the top of the dashboard: + +| Metric | Description | +|--------|-------------| +| **Call Counts** | Total number of calls | +| **Avg. Call Duration** | Average length of calls | +| **Avg. Call Latency** | Average response time | +| **Total Cost** | Credits spent on calls | + +--- + +## Charts + +### Call Connected Percentage + +Pie chart showing the connection rate by agent—how many calls connected vs. didn't connect. + +### Disconnection Reason + +Breakdown of why calls ended: +- **Dial No Answer** — Contact didn't pick up +- **User Hangup** — Contact ended the call +- **Agent Hangup** — Agent ended the call + +--- + +## Most Called Agents + +Table showing which agents handled the most calls: + +| Column | Description | +|--------|-------------| +| Agent Name | The agent | +| Number of calls | Total calls handled | +| Call Minutes | Total talk time | +| Credits Spent | Cost for this agent | + +--- + +## Related + + + + Drill into individual call details + + + View campaign-specific analytics + + diff --git a/fern/products/atoms/pages/platform/analytics/testing.mdx b/fern/products/atoms/pages/platform/analytics/testing.mdx new file mode 100644 index 0000000..0f5cabb --- /dev/null +++ b/fern/products/atoms/pages/platform/analytics/testing.mdx @@ -0,0 +1,58 @@ +--- +title: "Testing Your Agent" +sidebarTitle: "Testing" +description: "Validate your agent before deploying to production." +--- + +Testing is the difference between a great agent and a frustrating one. Atoms gives you three ways to test, so you can catch issues before real callers do. + +**Location:** Top right → **Test Agent** button + +--- + +## Three Test Modes + + + + + ![Web call testing](../building-agents/images/test-webcall.png) + + + **Voice call in your browser.** Quick and convenient — no phone needed. + + Best for rapid testing during development. + + + + + ![Telephony testing](../building-agents/images/test-telephony.png) + + + **Real phone call.** The authentic experience — exactly what your callers will hear. + + Best for final validation before launch. + + + + + ![Chat testing](../building-agents/images/test-chat.png) + + + **Text-only conversation.** Test conversation logic without voice. + + Best for testing branching and prompts quickly. + + + +--- + +## Related + + + + Review call transcripts + + + Protect production agents + + diff --git a/fern/products/atoms/pages/platform/analytics/understanding-metrics.mdx b/fern/products/atoms/pages/platform/analytics/understanding-metrics.mdx new file mode 100644 index 0000000..d2d6772 --- /dev/null +++ b/fern/products/atoms/pages/platform/analytics/understanding-metrics.mdx @@ -0,0 +1,174 @@ +--- +title: "Understanding Metrics" +sidebarTitle: "Understanding Metrics" +description: "Learn what each metric means and how to interpret it." +--- + +The analytics dashboard shows many metrics. This page explains what each one means, how it's calculated, and what insights you can draw. + +--- + +## Call Volume Metrics + +### Total Calls + +The number of calls handled in the selected period. + +**Includes:** +- Inbound calls answered +- Outbound calls connected +- Test calls (if enabled) + +**Use it to:** +- Track overall usage +- Identify busy periods +- Plan capacity + +### Inbound vs Outbound + +| Type | Source | +|------|--------| +| **Inbound** | Customers calling your numbers | +| **Outbound** | Campaigns calling customers | + +**Ratio insight:** +- High inbound = reactive support +- High outbound = proactive engagement + +--- + +## Duration Metrics + +### Average Call Duration + +Total call time ÷ Number of calls + +**What it indicates:** +- Short calls → Quick resolutions or early hang-ups +- Long calls → Complex issues or engaged conversations +- Very long calls → Possible stuck conversations + +**Benchmarks vary by use case:** +- Simple FAQ: 1-2 minutes +- Support: 3-5 minutes +- Sales qualification: 5-10 minutes + +### Total Call Time + +Sum of all call durations. Useful for: +- Cost estimation +- Capacity planning +- Usage tracking + +--- + +## Outcome Metrics + +### Completion Rate + +Calls that ended normally ÷ Total calls + +**"Normally" means:** +- End Call function triggered +- Caller said goodbye +- Transfer completed + +**Does NOT include:** +- Caller hung up unexpectedly +- Technical failures +- Timeouts + +### Success Rate + +Calls achieving desired outcome ÷ Total calls + +Based on **disposition** or **post-call metrics**. + +**Examples:** +- Support: Issue resolved +- Sales: Lead qualified +- Reminders: Appointment confirmed + +### Transfer Rate + +Calls transferred to human ÷ Total calls + +**High transfer rate may indicate:** +- Complex issues beyond agent capability +- Prompt needs improvement +- Knowledge Base gaps +- Appropriate escalation (not always bad) + +--- + +## Quality Metrics + +### Customer Satisfaction + +From post-call metrics (if configured). + +**Common scales:** +- 1-5 rating +- NPS (0-10) +- Thumbs up/down + +**Use it to:** +- Track service quality +- Identify problem areas +- Measure improvements + +### Resolution Rate + +Issues resolved on first call. + +**High resolution rate = efficient agent** + +Low resolution may indicate: +- Incomplete knowledge +- Process issues +- Complex problems + +--- + +## Time-Based Analysis + +### Peak Hours + +When do most calls occur? + +**Use it to:** +- Staff humans appropriately +- Schedule campaigns effectively +- Expect higher/lower volumes + +### Day-of-Week Patterns + +Some days are busier than others. + +**Use it to:** +- Plan maintenance windows +- Schedule campaigns +- Set expectations + +### Trends Over Time + +Is performance improving? + +| Trend | Meaning | +|-------|---------| +| Success rate increasing | Agent improving | +| Duration decreasing | More efficient | +| Volume increasing | Growing usage | + +--- + +## What's Next + + + + Act on your insights + + + Configure custom metrics + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/language-settings.mdx b/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/language-settings.mdx new file mode 100644 index 0000000..69bfef5 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/language-settings.mdx @@ -0,0 +1,97 @@ +--- +title: "Language Settings" +sidebarTitle: "Language" +description: "Configure language and localization for your agent." +--- + +Language settings control what language your agent speaks and understands. Get this right and conversations feel natural. Get it wrong and callers struggle to communicate. + +--- + +## Primary Language + +Set the primary language during agent creation or in settings. This affects: + +**Speech Recognition** +What language the system expects to hear from callers. Optimized for the accents and vocabulary of that language. + +**Text-to-Speech** +What language the agent speaks in. Pronunciation, rhythm, and intonation are tuned for that language. + +**LLM Processing** +The language context the AI uses when understanding and generating responses. + +--- + +## Supported Languages + +Atoms supports a growing list of languages. Common options include: + +| Language | Code | Notes | +|----------|------|-------| +| English (US) | en-US | Default, most tested | +| English (UK) | en-GB | British pronunciation | +| Spanish | es | Latin American and Castilian variants | +| French | fr | France and Canadian variants | +| German | de | Standard German | +| Portuguese | pt-BR | Brazilian Portuguese | +| Hindi | hi | Indian Hindi | +| And more... | | Check the language dropdown for full list | + +--- + +## Multi-Language Support + +For agents that need to handle multiple languages: + +### Language Detection + +Enable automatic language detection. The agent listens to the first few seconds and switches to match the caller's language. + +### Language Switching + +You can design flows that explicitly ask about language preference: + +``` +"Hello! Para español, diga 'español'. For English, say 'English'." +``` + +Then route to language-specific nodes or prompts. + +--- + +## Language in Prompts + +Write prompts in the language your agent will speak. If your agent speaks Spanish, write your prompts in Spanish: + +``` +Hola, soy María del servicio al cliente de TechStore. +¿En qué puedo ayudarte hoy? +``` + +The LLM understands prompts in most languages and responds appropriately. + +--- + + +**Test with native speakers.** Automated testing catches errors, but native speakers catch awkward phrasing and cultural mismatches. + +**Consider regional variants.** Spanish in Mexico sounds different from Spanish in Spain. Choose the variant that matches your audience. + +**Keep translations consistent.** If you support multiple languages, maintain the same quality across all of them. A great English experience with a bad Spanish translation frustrates bilingual customers. + +**Handle code-switching.** Some callers mix languages ("Spanglish"). Decide whether your agent should match this or stick to one language. + + +--- + +## Next Steps + + + + Choose a voice that matches your language + + + Configure how calls conclude + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/llm-selection.mdx b/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/llm-selection.mdx new file mode 100644 index 0000000..64dfe77 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/llm-selection.mdx @@ -0,0 +1,105 @@ +--- +title: "LLM Selection" +sidebarTitle: "LLM" +description: "Choose the AI model that powers your agent's understanding and responses." +--- + +The LLM (Large Language Model) is the brain behind your agent. It's what understands caller intent, generates natural responses, and decides what to do next. Different models offer different tradeoffs between intelligence, speed, and cost. + +--- + +## Available Models + +Atoms supports multiple LLM providers and models. Choose based on your use case: + +| Model | Best For | Characteristics | +|-------|----------|-----------------| +| **GPT-4.1** | Complex conversations, nuanced understanding | Most capable, higher cost, slightly slower | +| **GPT-4o-mini** | Balanced performance | Good capability, moderate cost, fast | +| **Albert** | High-volume, simple interactions | Fast, cost-effective, handles routine conversations well | + +--- + +## How to Choose + +### Choose GPT-4.1 when: +- Conversations are complex or nuanced +- You need the best possible understanding +- Accuracy matters more than cost +- Callers may ask unexpected or difficult questions + +### Choose GPT-4o-mini when: +- You want a balance of capability and cost +- Conversations are moderately complex +- Speed matters but so does quality + +### Choose Albert when: +- You're handling high call volumes +- Conversations follow predictable patterns +- Speed and cost are primary concerns +- You're running outbound campaigns at scale + +--- + +## Model Settings + +Beyond choosing a model, you can configure behavior: + +### Temperature + +Controls how "creative" vs "predictable" the model behaves. + +| Value | Behavior | Best For | +|-------|----------|----------| +| **0.0 - 0.3** | Consistent, factual | Support, FAQ, compliance | +| **0.4 - 0.6** | Balanced | General conversations | +| **0.7 - 1.0** | Creative, varied | Sales, engagement | + +Lower temperature = more predictable responses. Higher temperature = more variety and creativity. + +### Max Tokens + +Limits response length. For voice agents, keep this moderate (100-300 tokens). Long responses lose caller attention. + +--- + +## Impact on Agent Behavior + +The LLM affects: + +**Understanding** +How well the agent interprets what callers mean, even when they speak unclearly or use unexpected phrasing. + +**Response Quality** +How natural, helpful, and appropriate the agent's responses are. + +**Prompt Following** +How closely the agent follows your instructions in the system prompt. + +**Edge Case Handling** +How well the agent handles unusual situations not explicitly covered in your prompt. + +--- + + +**Start with GPT-4.1, optimize later.** Build and test with the best model first. Once your agent works well, try cheaper models to see if quality holds up. + +**Match model to use case.** A complex sales conversation needs more intelligence than "Press 1 for billing, press 2 for support." + +**Monitor conversations.** Review conversation logs to see if the model is understanding callers correctly. Frequent misunderstandings might mean you need a more capable model. + +**Consider cost at scale.** For outbound campaigns with thousands of calls, model cost adds up. Test with Albert to see if it handles your specific use case. + + +--- + +## Next Steps + + + + Configure language and localization + + + Choose how your agent sounds + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/voice-selection.mdx b/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/voice-selection.mdx new file mode 100644 index 0000000..8850d6f --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuration/agent-settings/voice-selection.mdx @@ -0,0 +1,97 @@ +--- +title: "Voice Selection" +sidebarTitle: "Voice" +description: "Choose how your agent sounds." +--- + +Voice is the first thing callers notice. Before they hear what your agent says, they hear *how* it says it. A warm, natural voice builds trust. A robotic voice makes people hang up. + +Atoms offers a range of high-quality voices optimized for real-time phone conversations. + +--- + +## Choosing a Voice + +When creating or editing an agent, you'll find voice selection in the agent settings. Click to browse available voices and preview each one before choosing. + +### What to Consider + +**Match your brand** +A luxury concierge service needs a different voice than a tech support line. Think about your brand's personality — formal or casual? Energetic or calm? + +**Match your audience** +Consider who's calling. Different demographics respond to different voice characteristics. + +**Match the use case** +- **Customer support** — Calm, patient, reassuring +- **Sales** — Energetic, confident, engaging +- **Healthcare** — Warm, empathetic, professional +- **Financial services** — Authoritative, trustworthy, clear + +### Available Voice Characteristics + +| Characteristic | Options | +|----------------|---------| +| **Gender** | Male, Female | +| **Tone** | Professional, Friendly, Casual, Authoritative | +| **Energy** | Calm, Neutral, Upbeat | +| **Accent** | American, British, Australian, Indian, and more | + +--- + +## Voice Quality + +All voices on Atoms are powered by **Waves**, our text-to-speech engine optimized for real-time telephony. This means: + +**Ultra-low latency** +Responses start playing immediately. No awkward pauses while audio generates. + +**Natural prosody** +Voices emphasize words naturally, pause appropriately, and sound human — not robotic. + +**Phone-optimized** +Audio is tuned for phone call quality, not podcast production. It sounds clear over phone networks. + +--- + +## Premium Voices + +Some voices are marked as Premium and may have additional per-minute costs. Premium voices typically offer: +- More natural intonation +- Better handling of complex sentences +- Enhanced emotional range + +Check the pricing page for current Premium voice rates. + +--- + +## Voice Cloning (Enterprise) + +For enterprise customers, Atoms supports custom voice cloning. Train the system on audio samples to create a voice unique to your brand. + +Contact sales for voice cloning capabilities. + +--- + + +**Preview with your actual prompts.** The same voice sounds different saying "Welcome to Acme Support" vs "Your payment of $1,247.83 is due tomorrow." + +**Test on actual phone calls.** Browser preview sounds different than phone audio. Make test calls before deploying. + +**Consider multilingual needs.** If your agent handles multiple languages, make sure your chosen voice supports them well. + +**Get feedback.** Play sample calls for colleagues or customers. What sounds good to you might not resonate with your audience. + + +--- + +## Next Steps + + + + Choose the AI model powering your agent + + + Configure language and localization + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuration/call-controls/end-call.mdx b/fern/products/atoms/pages/platform/building-agents/configuration/call-controls/end-call.mdx new file mode 100644 index 0000000..ec299c5 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuration/call-controls/end-call.mdx @@ -0,0 +1,143 @@ +--- +title: "End Call Configuration" +sidebarTitle: "End Call" +description: "Configure how and when your agent ends conversations." +--- + +Every call needs to end eventually. How it ends matters — a graceful conclusion leaves callers satisfied, while an abrupt cutoff frustrates them. + +End call settings control when your agent concludes conversations and what happens when it does. + +--- + +## How End Call Works + +### In Single Prompt Agents + +End call behavior is defined in your prompt's "End Call Conditions" section. You write natural language rules: + +``` +End the call when: +- Customer confirms their issue is resolved +- Customer explicitly asks to end the call +- Customer says they have no more questions +``` + +The AI interprets these conditions and ends the call when appropriate. + +### In Convo Flow Agents + +You explicitly place **End Call** nodes in your workflow. When the conversation reaches one of these nodes, the call ends. + +``` +Confirmation → End Call (Disposition: Successful) +Not Interested → End Call (Disposition: Unsuccessful) +``` + +--- + +## Configuring End Behavior + +### Farewell Message + +What the agent says before hanging up. Make it warm and professional: + +``` +Thanks for calling TechStore! We hope we were able to help. +Have a wonderful day! +``` + +Avoid abrupt endings like "Goodbye" followed by immediate disconnection. + +### Disposition Tagging + +Tag call outcomes for analytics: + +| Disposition | Meaning | +|-------------|---------| +| **Successful** | Goal achieved — booking made, question answered, issue resolved | +| **Unsuccessful** | Goal not achieved — not interested, couldn't help, wrong number | +| **Transferred** | Handed off to a human agent | +| **Voicemail** | Left a message (outbound only) | +| **No Answer** | Caller didn't pick up (outbound only) | +| **Callback Requested** | Caller asked for a follow-up call | + +Dispositions appear in your analytics and help measure agent performance. + +--- + +## Automatic End Conditions + +Beyond explicit end triggers, calls can end automatically: + +### Silence Timeout + +If the caller stops responding for too long, the agent should handle it gracefully: + +``` +"Hello? Are you still there?" +[Wait 5 seconds] +"I haven't heard from you, so I'll let you go. +Feel free to call back if you need anything!" +``` + +Configure the timeout duration and the agent's response. + +### Max Call Duration + +Set a maximum call length as a safety net. Very long calls may indicate the conversation is stuck or the caller is not engaging productively. + +### Error Conditions + +What happens if something breaks — API failure, voice synthesis error, etc. Configure fallback behavior: + +``` +"I'm experiencing a technical issue. Let me transfer you to someone who can help." +``` + +--- + +## Post-Call Actions + +After a call ends, you can trigger actions: + +**CRM Updates** +Log the call outcome, duration, and summary to your CRM. + +**Follow-up Emails** +Send confirmation emails or summaries to the caller. + +**Ticket Creation** +Create support tickets for unresolved issues. + +**Analytics** +Track custom metrics and conversion events. + +In Convo Flow agents, use **Post-Call API** nodes for these actions. In Single Prompt agents, configure them in the integrations panel. + +--- + + +**Always give a proper goodbye.** Even if the caller says "thanks, bye" quickly, have the agent say something warm before disconnecting. + +**Don't end too early.** Make sure your conditions don't trigger on partial confirmations. "Yeah, I think so" isn't the same as "Yes, I'm all set." + +**Don't end too late.** If the caller clearly wants to go, let them. Repeated "is there anything else?" after they've said goodbye is annoying. + +**Handle silence gracefully.** Background noise, thinking pauses, and distractions happen. Give callers a chance before assuming they're gone. + +**Tag dispositions accurately.** Good analytics depend on honest categorization. Don't mark failed calls as successful. + + +--- + +## Next Steps + + + + Hand calls off to humans + + + Test your end call behavior + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuration/call-controls/transfer-call.mdx b/fern/products/atoms/pages/platform/building-agents/configuration/call-controls/transfer-call.mdx new file mode 100644 index 0000000..6139b37 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuration/call-controls/transfer-call.mdx @@ -0,0 +1,161 @@ +--- +title: "Transfer Call Configuration" +sidebarTitle: "Transfer Call" +description: "Configure how your agent hands calls off to human agents." +--- + +Sometimes AI isn't enough. Complex issues, upset customers, high-value opportunities, or regulatory requirements may need a human touch. Transfer call settings control how your agent hands conversations to people. + +--- + +## When to Transfer + +Common transfer scenarios: + +**Escalation requests** +Caller explicitly asks to speak with a person: "Let me talk to a real human." + +**Complex issues** +Problems the AI can't solve: technical issues outside scope, edge cases, account-specific problems requiring manual intervention. + +**High-value opportunities** +Sales conversations that need closing by a human: qualified leads, large deals, hesitant buyers who need persuasion. + +**Compliance requirements** +Certain topics may legally require human handling: financial advice, medical diagnoses, legal matters. + +**Caller frustration** +When a caller is upset and the AI isn't helping: "This is ridiculous, I've already explained this three times." + +--- + +## Configuring Transfers + +### Transfer Destination + +Where the call goes: + +| Type | Format | Use Case | +|------|--------|----------| +| **Phone Number** | +1-555-123-4567 | Direct line to support team | +| **SIP Address** | sip:support@company.com | VoIP system | +| **Queue** | sales-queue | Call center routing | + +### Hold Experience + +What the caller experiences while waiting: + +**Hold Message** +What the caller hears while waiting to connect: +``` +"I'm connecting you with a specialist now. +Please hold for just a moment." +``` + +**Hold Music** +Background audio during the transfer. Keep it professional and not too long — transfers should be quick. + +**Estimated Wait** +If known, tell callers how long they'll wait: "Your estimated wait time is about 2 minutes." + +### Whisper Message + +What the human agent hears *before* connecting (the caller doesn't hear this): + +``` +Incoming transfer from AI assistant. +Customer: John Smith +Account: Premium +Issue: Billing dispute - claims overcharge of $45 +Previous attempts: AI tried to explain charges, customer not satisfied +``` + +Whisper messages give context so humans don't start from scratch. + +--- + +## Transfer in Single Prompt Agents + +Configure transfer behavior in your prompt's Dos and Don'ts section: + +``` +When to transfer: +- Caller explicitly requests a human agent +- Issue involves account security changes +- Caller mentions legal action or regulatory complaints +- You've failed to resolve the issue after 2 attempts + +When transferring: +- Apologize briefly for not being able to help directly +- Tell them what's about to happen +- Stay warm and professional +``` + +Then configure the transfer destination in the agent settings panel. + +--- + +## Transfer in Convo Flow Agents + +Use **Transfer Call** nodes in your workflow: + +1. Drag a Transfer Call node onto the canvas +2. Configure the destination, hold message, and whisper +3. Connect branches that should lead to transfer + +Create branches for different transfer scenarios: +- "User requests human" → Transfer to support +- "User mentions legal" → Transfer to compliance +- "High-value lead" → Transfer to sales + +--- + +## Transfer Failures + +What happens when transfer fails — busy line, no answer, technical issue: + +### Fallback Options + +| Option | When to Use | +|--------|-------------| +| **Voicemail** | "I'll transfer you to voicemail so you can leave a message." | +| **Callback** | "Our team is busy. Can we call you back within the hour?" | +| **Alternate Number** | "Let me try another line..." | +| **Retry** | Attempt transfer again after brief hold | + +### Graceful Handling + +``` +"I'm having trouble reaching the team right now. +Would you prefer to leave a callback number, +or should I try again in a moment?" +``` + +Never leave callers hanging. Always have a backup plan. + +--- + + +**Set expectations before transferring.** Tell callers what's happening, where they're going, and roughly how long it might take. + +**Use whisper messages effectively.** A well-crafted whisper saves time and prevents callers from repeating themselves. + +**Monitor transfer rates.** If too many calls transfer, your AI might not be configured well. Investigate why and improve the prompt. + +**Keep hold times short.** Long holds after "let me transfer you" frustrate callers. If your team can't answer quickly, offer callbacks. + +**Train your human team.** Make sure they know calls are coming from an AI agent and how to pick up the context from whisper messages. + + +--- + +## Next Steps + + + + Configure call endings + + + Test your transfer flows + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/api-calls.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/api-calls.mdx new file mode 100644 index 0000000..4b24165 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/api-calls.mdx @@ -0,0 +1,158 @@ +--- +title: "API Calls" +sidebarTitle: "API Calls" +description: "Connect your agent to external services and data." +--- + +API Calls let your agent fetch and send data during conversations. Look up customer information, check order status, book appointments, create tickets — all in real-time while the conversation is happening. + +**Location:** Config Panel (right sidebar) → API Calls toggle + +--- + +## Setup + +1. Toggle **API Calls** ON +2. Click ⚙️ to open settings +3. Click **+ Add API Call** + +--- + +## Configuration + +The API Call modal has two main sections. + +### Basic Setup & API Configuration + + + ![API call basic setup](../images/api-call-basic.png) + + +| Field | Required | Description | +|-------|----------|-------------| +| **Name** | Yes | Identifier for this API call (e.g., `lookup_customer`) | +| **Description** | Yes | Tells the AI when to trigger this call | +| **LLM Parameters** | No | Parameters the LLM can populate dynamically | +| **URL** | Yes | The API endpoint | +| **Method** | Yes | GET, POST, PUT, or DELETE | +| **Timeout (ms)** | Yes | How long to wait before failing (default: 5000) | + +### Headers, Parameters & Response Extraction + + + ![API call advanced setup](../images/api-call-advanced.png) + + +| Field | Purpose | +|-------|---------| +| **Headers** | Request headers (Authorization, Content-Type, etc.) | +| **Query Parameters** | URL parameters for GET requests | +| **Response Variable Extraction** | Map response fields to variables you can use in prompts | + +--- + +## LLM Parameters + +These are values the AI determines during conversation and passes to your API. Click **+ Add Parameter** to define what the AI should collect. + +For example, if you need an order ID to look up status, add a parameter called `order_id` — the AI will extract it from the conversation and include it in the request. + +--- + +## Response Variable Extraction + +This is where the magic happens. Map fields from the API response to variables you can use in your prompts. + +Click **+ Add Variable** and specify: +- The path in the JSON response (e.g., `customer.name`) +- The variable name (e.g., `customer_name`) + +Then use `{{api.customer_name}}` in your prompt. + +--- + +## Example: Customer Lookup + + + + | Field | Value | + |-------|-------| + | **Name** | `lookup_customer` | + | **Description** | "Look up customer information when the call starts" | + | **URL** | `https://crm.example.com/api/customers` | + | **Method** | GET | + | **Query Parameters** | `phone: {{caller_phone}}` | + + + + | Response Path | Variable | + |---------------|----------| + | `customer.name` | `{{api.customer_name}}` | + | `customer.tier` | `{{api.tier}}` | + | `customer.last_order` | `{{api.last_order}}` | + + + + ``` + Hello {{api.customer_name}}! I see you're a {{api.tier}} + member. Your last order is {{api.last_order}}. + ``` + + + +--- + +## Using Variables in API Calls + +You can use variables anywhere in your API configuration: + +**In URL:** +``` +https://api.example.com/orders/{{collected.order_id}} +``` + +**In Headers:** +``` +Authorization: Bearer {{api_key}} +``` + +**In Query Parameters:** +``` +phone: {{caller_phone}} +``` + +--- + +## Tips + + + The description tells the AI when to make this call. Be specific: + + | Vague | Specific | + |-------|----------| + | "Get customer" | "Look up customer information using their phone number when the call starts" | + | "Check order" | "Fetch order status when the customer provides an order number" | + + + + APIs can fail. In your prompt, tell the agent what to do: + + "If customer data isn't available, greet generically and ask for their name. Don't mention that a lookup failed." + + + + Default is 5000ms (5 seconds). For slow APIs, increase this — but remember the caller is waiting. + + +--- + +## Related + + + + Use API response data in prompts + + + Send data after calls complete + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/editor-overview.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/editor-overview.mdx new file mode 100644 index 0000000..af2182b --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/editor-overview.mdx @@ -0,0 +1,173 @@ +--- +title: "The Agent Editor" +sidebarTitle: "Editor Overview" +description: "Your workspace for building, configuring, and testing voice agents." +--- + +The editor is where your agent comes to life. It's the workspace you'll return to every time you want to tweak behavior, adjust settings, or test a new approach. Understanding the layout means you'll always know exactly where to go. + +This page walks you through every part of the interface — what each area does, why you'd use it, and where to dive deeper. + + + ![Single Prompt editor](../images/sp-editor-full.png) + + +--- + +## The Header + +At the very top, you'll find navigation and identification. + + + ![Editor header](../images/editor-header.png) + + +**← Back arrow** takes you to your agents list. Use it when you're done editing or want to switch to a different agent. + +**Agent name** is clickable — rename your agent anytime. Pick something meaningful; you'll thank yourself when you have twenty agents. + +**Agent ID** is the unique identifier for API calls and integrations. Click to copy it. + +--- + +## The Prompt Section + +This bar runs across the top and controls the fundamentals: which AI model powers your agent, what voice it speaks with, and what language it uses. + + + ![Prompt section](../images/prompt-section.png) + + +These three choices shape the foundation of your agent's personality: + +- **Model** determines intelligence and capability. GPT-4o is recommended for most cases. +- **Voice** is what callers hear. Preview voices before choosing — the right voice builds trust instantly. +- **Language** sets the primary language for responses. Multi-language support is configured separately. + +→ [Model Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/model-selection) · [Voice Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/voice-selection) · [Language Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/language-selection) + +--- + +## The Prompt Editor + +The center of the screen is where you write. This is your agent's brain — the instructions that define who it is, what it knows, and how it behaves. + + + ![Prompt editor](../images/prompt-editor.png) + + +Everything flows from what you write here. A good prompt covers identity, knowledge, behavior guidelines, and end conditions. You'll refine it over time based on real conversations. + +→ [Writing Effective Prompts](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts) + +--- + +## The Configuration Panel + +The right sidebar holds your agent's capabilities — the tools it can use beyond just talking. + + + ![Config panel](../images/config-panel.png) + + +Each toggle enables a feature: + +**End Call** defines when and how your agent hangs up. Without this, calls may never end gracefully. + +**Transfer Call** lets your agent hand off to humans — either immediately (cold) or with a briefing (warm). + +**Knowledge Base** connects reference documents your agent can search during conversations. + +**Variables** enable personalization with dynamic values — caller names, account details, collected information. + +**API Calls** let your agent fetch and send data mid-conversation — look up orders, check availability, create tickets. + +→ [End Call](/atoms/atoms-platform/single-prompt-agents/configuration-panel/end-call) · [Transfer Call](/atoms/atoms-platform/single-prompt-agents/configuration-panel/transfer-call) · [Knowledge Base](/atoms/atoms-platform/single-prompt-agents/configuration-panel/knowledge-base) · [Variables](/atoms/atoms-platform/single-prompt-agents/configuration-panel/variables) · [API Calls](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) + +--- + +## The Left Sidebar + +This is your navigation between different configuration areas. + + + ![Left sidebar](../images/left-sidebar.png) + + +**Prompt** brings you back to the main editor (where you are now in Single Prompt agents). + +**Agent Settings** opens detailed configuration tabs — voice tuning, model behavior, webhooks, and timeouts. This is where fine-tuning happens. + +**Widget** configures the embeddable web widget if you want callers to reach your agent from your website. + +**Integrations** connects third-party services like Salesforce directly, without custom API work. + +**Post Call Metrics** defines what data gets extracted from calls automatically — satisfaction scores, outcomes, categories. + +→ [Voice Settings](/atoms/atoms-platform/single-prompt-agents/agent-settings/voice-settings) · [Model Settings](/atoms/atoms-platform/single-prompt-agents/agent-settings/model-settings) · [Webhooks](/atoms/atoms-platform/single-prompt-agents/agent-settings/webhooks) · [General Settings](/atoms/atoms-platform/single-prompt-agents/agent-settings/general-settings) · [Widget](/atoms/atoms-platform/features/widget) · [Integrations](/atoms/atoms-platform/features/integrations) · [Post-Call Metrics](/atoms/atoms-platform/features/post-call-metrics) + +--- + +## The Action Buttons + +Top right corner — the buttons you'll use constantly. + + + ![Action buttons](../images/action-buttons.png) + + +**Convo Logs** shows transcripts of every conversation your agent has had. Essential for debugging and improvement. + +**Lock Agent** prevents accidental edits. Turn this on for production agents. You can still test locked agents. + +**Test Agent** is how you try your agent before deploying. Choose from phone call, web call, or text chat. + +→ [Testing Your Agent](/atoms/atoms-platform/analytics-logs/testing) · [Conversation Logs](/atoms/atoms-platform/analytics-logs/conversation-logs) · [Locking Agents](/atoms/atoms-platform/analytics-logs/locking) + +--- + +## For Conversational Flow Agents + +If you're building a Conversational Flow agent instead of Single Prompt, the layout changes slightly. + + + ![Convo Flow editor](../images/cf-editor-full.png) + + +The center becomes a visual canvas where you design workflows with nodes and branches. A **Workflow/Settings toggle** at the top switches between the flow builder and configuration options. + +The left panel becomes a **Node Palette** for dragging conversation steps onto the canvas. + +Everything else — action buttons, left sidebar navigation, agent settings — works the same way. + +→ [Workflow Builder](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/workflow-builder) + +--- + +## Quick Reference + +| I want to... | Where to go | +|--------------|-------------| +| Change the AI model | Prompt Section → Model dropdown | +| Pick a different voice | Prompt Section → Voice dropdown | +| Set up call endings | Config Panel → End Call | +| Configure human handoffs | Config Panel → Transfer Call | +| Attach reference documents | Config Panel → Knowledge Base | +| Use dynamic values | Config Panel → Variables | +| Connect to external APIs | Config Panel → API Calls | +| Fine-tune speech speed | Left Sidebar → Agent Settings → Voice | +| Enable language switching | Left Sidebar → Agent Settings → Model | +| Set up event webhooks | Left Sidebar → Agent Settings → Webhook | +| Configure timeouts | Left Sidebar → Agent Settings → General | +| Embed on my website | Left Sidebar → Widget | +| Connect CRM | Left Sidebar → Integrations | +| Set up call analytics | Left Sidebar → Post Call Metrics | +| View past conversations | Top Right → Convo Logs | +| Test my agent | Top Right → Test Agent | +| Prevent accidental edits | Top Right → Lock Agent | + +--- + +## Saving Your Work + +Most changes save automatically as you make them. When they don't, a footer appears: **"You have unsaved changes"** with Discard and Save buttons. Always save before navigating away. diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/end-call.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/end-call.mdx new file mode 100644 index 0000000..50c8af1 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/end-call.mdx @@ -0,0 +1,116 @@ +--- +title: "End Call" +sidebarTitle: "End Call" +description: "Configure when and how your agent ends calls." +--- + +End Call tells your agent when to hang up. Without proper end conditions, calls may drag on awkwardly or never conclude. Define clear triggers so conversations wrap up naturally. + +**Location:** Config Panel (right sidebar) → End Call toggle + +--- + +## Setup + +1. Toggle **End Call** ON +2. Click the ⚙️ icon to open settings +3. Click **+ Add End Call** to create a function + +--- + +## Configuration + + + ![Add end call](../images/end-call-modal.png) + + +Each end call function has two fields: + +| Field | Purpose | Example | +|-------|---------|---------| +| **Name** | Identifier (used internally) | `end_call_resolved` | +| **Description** | Tells the AI when to trigger | "End the call when the customer confirms their issue is resolved" | + +The Description is critical — it's what the AI uses to decide when to end the call. Be specific. + +--- + +## Common End Call Functions + + + + | Field | Value | + |-------|-------| + | **Name** | `end_resolved` | + | **Description** | "End the call when the customer confirms their issue is completely resolved and they have no additional questions." | + + + + | Field | Value | + |-------|-------| + | **Name** | `end_goodbye` | + | **Description** | "End the call when the customer says goodbye, thank you, or indicates they're done." | + + + + | Field | Value | + |-------|-------| + | **Name** | `end_transferred` | + | **Description** | "End the AI portion of the call after successfully transferring to a human agent." | + + + + | Field | Value | + |-------|-------| + | **Name** | `end_no_response` | + | **Description** | "End the call if the caller doesn't respond after three attempts to re-engage them." | + + + +--- + +## Tips + + + | Vague | Specific | + |-------|----------| + | "When done" | "When customer confirms satisfaction" | + | "If goodbye" | "When customer says goodbye, thanks, or indicates they're finished" | + + + + Don't just handle the happy path: + + - Customer explicitly asks to end + - Customer becomes unresponsive after 3 prompts + - Customer requests callback instead + + + + If your main prompt says "end when resolved," your end call description should match that language. + + + + Think through every way a call might conclude: + + | Function | Scenario | + |----------|----------| + | `end_resolved` | Issue fixed, customer happy | + | `end_goodbye` | Customer says goodbye | + | `end_transfer` | After transferring | + | `end_no_response` | Caller stopped responding | + | `end_out_of_scope` | Can't help, directed elsewhere | + + +--- + +## Related + + + + Hand off to human agents + + + Configure idle timeouts + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/general-settings.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/general-settings.mdx new file mode 100644 index 0000000..b06cc36 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/general-settings.mdx @@ -0,0 +1,40 @@ +--- +title: "General Settings" +sidebarTitle: "General Settings" +description: "Configure timeout behavior for idle conversations." +--- + +General Settings control how long your agent waits for a response before prompting the caller — and when to give up. + +**Location:** Left Sidebar → Agent Settings → General tab + + + ![General settings](../images/general-settings.png) + + +--- + +## LLM Idle Timeout Settings + +Configure how long the agent waits for user response before sending an inactivity message. After 3 attempts with no response, the conversation automatically ends. + +| Setting | Default | Description | +|---------|---------|-------------| +| **Chat Timeout** | 60 sec | For text chat conversations | +| **Webcall Timeout** | 20 sec | For browser-based voice calls | +| **Telephony Timeout** | 20 sec | For phone calls | + +Each timeout triggers an inactivity prompt. If the user still doesn't respond after 3 prompts, the agent ends the conversation gracefully. + +--- + +## Related + + + + Speech speed and detection tuning + + + Configure call termination + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/integrations.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/integrations.mdx new file mode 100644 index 0000000..289a90e --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/integrations.mdx @@ -0,0 +1,67 @@ +--- +title: "Integrations" +sidebarTitle: "Integrations" +description: "Connect your agent to third-party services." +--- + +Integrations let your agent connect directly to external services like CRMs and calendars — without custom API configuration. Click, authenticate, and you're connected. + +**Location:** Left Sidebar → Integrations + +--- + +## Available Integrations + +| Integration | Status | Description | +|-------------|--------|-------------| +| **Salesforce** | Available | Sync contacts, log calls, update records | +| **HubSpot** | Coming Soon | CRM integration | +| **Google Calendar** | Coming Soon | Appointment scheduling | +| **Google Sheets** | Coming Soon | Data logging and retrieval | + +--- + +## Salesforce Integration + +### Setup + +1. Click **Configure** on the Salesforce card +2. Authenticate with your Salesforce account +3. Map fields between Atoms and Salesforce +4. Enable the integration + +### What It Does + +| Action | When | +|--------|------| +| **Lookup Contact** | On call start, by phone number | +| **Log Call** | On call end, with transcript | +| **Update Record** | After call, with collected data | + +--- + +## In Conversational Flow + +Integrations also appear in the node palette for Conversational Flow agents. Use them to: + +- Fetch CRM data mid-conversation +- Create records at specific flow points +- Update records based on conversation outcomes + +--- + +## Coming Soon + +| Integration | Capability | +|-------------|------------| +| **HubSpot** | Full CRM sync — contacts, deals, activities | +| **Google Calendar** | Check availability and book appointments | +| **Google Sheets** | Log data and retrieve from spreadsheets | + +Contact us for enterprise integrations. + +--- + +## Need Something Else? + +For services not listed, use [API Calls](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) to connect to any REST API. diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/knowledge-base.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/knowledge-base.mdx new file mode 100644 index 0000000..968af0c --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/knowledge-base.mdx @@ -0,0 +1,65 @@ +--- +title: "Knowledge Base" +sidebarTitle: "Knowledge Base" +description: "Give your agent access to reference documents." +--- + +A Knowledge Base is a repository of information your agent can search during conversations. Instead of cramming everything into the prompt, upload documents — product specs, policies, FAQs — and let the agent retrieve what it needs. + +**Location:** Config Panel (right sidebar) → Knowledge Base toggle + +--- + +## Setup + +1. Toggle **Knowledge Base** ON +2. Select a KB from the dropdown + + +You need to create a Knowledge Base first before you can attach it here. See [Creating Knowledge Bases](/atoms/atoms-platform/features/knowledge-base). + + +--- + +## Local vs Global + +| Type | Scope | Best For | +|------|-------|----------| +| **Local** | This agent only | Agent-specific information | +| **Global** | Shared across agents | Company-wide policies, general FAQs | + +--- + +## How It Works + +1. **Caller asks a question** → "What's your return policy?" +2. **Agent searches KB** → Finds relevant documents automatically +3. **Agent responds** → Uses retrieved information in the answer + +The agent decides when to search based on the question. You don't need to configure triggers. + +--- + +## Best Practices + + + | Put in KB | Put in Prompt | + |-----------|---------------| + | Product details | Personality and tone | + | Policies | Conversation patterns | + | FAQ answers | End conditions | + | Pricing info | Behavioral guidelines | + + + + Smaller, topic-specific documents retrieve better than massive catch-all files. + + | Better | Worse | + |--------|-------| + | `returns-policy.pdf` | `everything.pdf` | + | `pricing-2024.txt` | `company-info.docx` (500 pages) | + + + + Ask your agent questions that should hit the KB. Verify it finds the right information. + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/language-selection.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/language-selection.mdx new file mode 100644 index 0000000..f0959f4 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/language-selection.mdx @@ -0,0 +1,36 @@ +--- +title: "Language Selection" +sidebarTitle: "Language Selection" +description: "Set the primary language for your agent." +--- + +This sets the language your agent speaks by default. Simple as that. + +**Location:** Prompt Section (top bar) → Language dropdown + + +The Prompt Section is only available for **Single Prompt agents**. Conversational Flow agents configure language in [Agent Settings → Model tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/model-settings). + + +--- + +## How It Works + +Click the dropdown, pick your language. The options you see depend on which voice you've selected — different voices support different languages. If you don't see the language you need, try a different voice first. + + +**Need multi-language support?** If your agent should switch languages mid-conversation based on what the caller speaks, configure that in [Model Settings](/atoms/atoms-platform/single-prompt-agents/agent-settings/model-settings). + + +--- + +## Related Settings + + + + Pick your agent's voice + + + Language switching & fine-tuning + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/model-selection.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/model-selection.mdx new file mode 100644 index 0000000..0e76057 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/model-selection.mdx @@ -0,0 +1,54 @@ +--- +title: "Model Selection" +sidebarTitle: "Model Selection" +description: "Choose the right AI model for your agent." +--- + +The model is your agent's brain. It's what understands what callers say, figures out how to respond, and generates the words your agent speaks. Different models have different strengths — some are faster, some are more expressive, some are more affordable. + +**Location:** Prompt Section (top bar) → Model dropdown + + +The Prompt Section is only available for **Single Prompt agents**. Conversational Flow agents configure models in [Agent Settings → Model tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/model-settings). + + + + ![Model dropdown](../images/model-dropdown.png) + + +--- + +## Available Models + +| Model | Provider | Type | Credit Usage | +|-------|----------|------|--------------| +| **Electron** | Smallest AI | Traditional | Lower | +| **GPT-4o** | OpenAI | Traditional | Moderate | +| **GPT-4.1** | OpenAI | Traditional | Moderate | +| **GPT Realtime** | OpenAI | Emotive | Higher | +| **GPT Realtime Mini** | OpenAI | Emotive | Higher | + +**Emotive models** have emotional awareness — they pick up on caller tone and respond with natural expression. + + +**Getting started?** GPT-4o is a great all-rounder — reliable, capable, and well-tested. + + + +**Try Electron.** It's our own model, built specifically for voice. Fast, affordable, and optimized for real-time conversations. + + +You can switch models anytime — nothing breaks when you do. + +--- + +## Related Settings + + + + Pick your agent's voice + + + Language switching & fine-tuning + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/model-settings.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/model-settings.mdx new file mode 100644 index 0000000..b3157de --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/model-settings.mdx @@ -0,0 +1,124 @@ +--- +title: "Model Settings" +sidebarTitle: "Model Settings" +description: "Configure AI model, language, and speech behavior." +--- + +Model Settings control how the AI behaves — the model powering responses, language handling, and formatting preferences. The options vary slightly depending on your agent type. + +**Location:** Left Sidebar → Agent Settings → Model tab + +--- + +## What's Different by Agent Type + + + + + ![Model settings - Single Prompt](../images/model-settings-sp.png) + + + Single Prompt agents have: + - AI Model (LLM Model + Language) + - Speech Formatting + - Language Switching + + The prompt itself lives in the Prompt Section (top of editor). + + + + + ![Model settings - Conversational Flow](../images/model-settings-cf.png) + + + Conversational Flow agents have everything above, plus: + - **Global Prompt** — Set personality and behavior instructions + - **Knowledge Base** — Connect reference material + + + +--- + +## AI Model + +Choose the LLM powering your agent and its primary language. + +| Setting | Description | +|---------|-------------| +| **LLM Model** | The AI model (Electron, GPT-4o, etc.) | +| **Language** | Primary language for responses | + +For Single Prompt agents, you can also set the model in the [Prompt Section](/atoms/atoms-platform/single-prompt-agents/prompt-section/model-selection) dropdown. + +--- + +## Speech Formatting + +When enabled (default: ON), Speech Formatting automatically applies additional formatting to transcripts to improve readability — including punctuation, paragraphs, and entity formatting like dates, times, and numbers. + +--- + +## Language Switching + +Enable your agent to switch languages mid-conversation based on what the caller speaks (default: ON). + +### Advanced Settings + +When Language Switching is enabled, you can fine-tune how the agent detects language changes: + +| Setting | Range | Default | What it does | +|---------|-------|---------|--------------| +| **Minimum Words for Detection** | 1-10 | 2 | How many words before considering a language switch | +| **Strong Signal Threshold** | 0-1 | 0.7 | Confidence level for immediate switch | +| **Weak Signal Threshold** | 0-1 | 0.3 | Confidence level for tentative detection | +| **Consecutive Weak Signals** | 1-8 | 2 | How many weak signals needed to switch | + +**Understanding the thresholds:** + +- **Strong Signal:** The AI is very confident the caller switched languages → switches immediately. +- **Weak Signal:** The AI is somewhat confident → waits for more evidence. +- **Higher thresholds** = More certain before switching (reduces false switches) +- **Lower thresholds** = Quicker to switch (more responsive, but may false-trigger) + +For most cases, the defaults work well. Adjust only if you're seeing unwanted switching behavior. + +--- + +## Global Prompt (Conversational Flow Only) + +Set global instructions for your agent's personality and behavior. This applies across all nodes in your flow (limit: 4,000 characters). + +Use this to define: +- Personality traits ("friendly and professional") +- Behavioral guidelines ("always confirm before transferring") +- Consistent phrasing or style +- Company-specific instructions + + +This is only available for Conversational Flow agents. Single Prompt agents define everything in their [main prompt](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts). + + +--- + +## Knowledge Base (Conversational Flow Only) + +Connect a knowledge base to enhance your agent's responses with reference material. + +Click the dropdown to select from your existing knowledge bases, or leave as "No Knowledge Base" if not needed. + + +Single Prompt agents connect knowledge bases via the [Configuration Panel](/atoms/atoms-platform/single-prompt-agents/configuration-panel/knowledge-base) instead. + + +--- + +## Related + + + + Speech speed, pronunciation, and detection + + + Craft effective agent instructions + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/phone-number.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/phone-number.mdx new file mode 100644 index 0000000..b496f11 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/phone-number.mdx @@ -0,0 +1,38 @@ +--- +title: "Phone Number" +sidebarTitle: "Phone Number" +description: "Assign a phone number to your agent." +--- + +The Phone Number tab lets you connect your agent to a phone number for inbound and outbound calls. Once assigned, callers to that number will reach this agent. + +**Location:** Left Sidebar → Agent Settings → Phone Number tab + + + ![Phone number settings](../images/phone-number-settings.png) + + +--- + +## Select Phone Numbers + +Click the dropdown to choose from your available phone numbers. If you haven't set up any numbers yet, you'll see "No phone numbers selected." + +You can assign multiple numbers to the same agent if needed. + + +You need to purchase or configure phone numbers before they appear here. See [Phone Numbers](/atoms/atoms-platform/deployment/phone-numbers) to get started. + + +--- + +## Related + + + + Get and manage phone numbers + + + Set up outbound calling + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/post-call-metrics.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/post-call-metrics.mdx new file mode 100644 index 0000000..1e2d744 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/post-call-metrics.mdx @@ -0,0 +1,161 @@ +--- +title: "Post-Call Metrics" +sidebarTitle: "Post-Call Metrics" +description: "Extract structured data from every conversation automatically." +--- + +Post-call metrics let you pull specific insights from conversations after they end. Define what you want to know — satisfaction scores, call outcomes, issue categories — and Atoms analyzes each call to fill in the answers. + +**Location:** Left Sidebar → Post Call Metrics + + + ![Post-call metrics list](../images/post-call-metrics-list.png) + + +--- + +## How It Works + +1. **You define metrics** — What questions do you want answered about each call? +2. **Call ends** — Conversation completes normally +3. **AI analyzes** — Atoms reviews the transcript against your metrics +4. **Data populated** — Your metrics get filled in automatically +5. **Access anywhere** — View in logs, receive via webhook, export + +--- + +## Creating a New Metric + +Click the **Add Metrics +** button to open the configuration panel. You'll see two options: + + + + + ![Disposition metrics](../images/post-call-disposition.png) + + + Build a custom metric from scratch. Fill in the Identifier, Data Type, and Prompt — see details below. + + Use **Add Another +** to create multiple metrics at once. + + + Don't forget to hit **Save** in the Disposition tab once you're done. + + + + + + ![Metric templates](../images/post-call-templates.png) + + + Choose from pre-built metrics for common use cases. Just select the ones you want — no manual configuration needed. + + + Don't forget to hit **Save** in the Disposition tab once you're done. + + + + +--- + +## Configuring a Metric + +Each metric needs three things: + +| Field | Required | Description | +|-------|----------|-------------| +| **Identifier** | Yes | Unique name for this metric. Lowercase, numbers, underscores only. | +| **Data Type** | Yes | What kind of value: String, Number, or Boolean | +| **Prompt** | Yes | The question you want answered about the call | + +### Identifier + +This is the key used to reference the metric in exports, webhooks, and the API. + +``` +customer_satisfaction +call_outcome +follow_up_needed +``` + + +**Naming rules:** Lowercase letters, numbers, and underscores only. No spaces or special characters. + + +### Data Type + +| Type | Use for | Example values | +|------|---------|----------------| +| **String** | Free text, categories | "resolved", "escalated", "billing issue" | +| **Boolean** | Yes/no questions | true, false | +| **Integer** | Whole numbers, scores | 1, 5, 10 | +| **Enum** | Fixed set of options | One of: "low", "medium", "high" | +| **Datetime** | Dates and times | "2024-01-15T10:30:00Z" | + +### Prompt + +This is the question the AI answers by analyzing the transcript. Be specific. + +**Good prompts:** +- "Did the agent acknowledge and respond to customer concerns effectively?" +- "Rate customer satisfaction from 1 to 5 based on tone and words used." +- "What was the primary reason for this call? Options: billing, technical, account, other" + +**Vague prompts to avoid:** +- "Was it good?" +- "Customer happy?" + + +**Start with 3-5 metrics.** Too many can slow analysis and clutter your data. Add more as you learn what insights matter most. + + +--- + +## Example Metrics + + + + | Field | Value | + |-------|-------| + | **Identifier** | `call_outcome` | + | **Data Type** | String | + | **Prompt** | "What was the outcome of this call? Options: resolved, escalated, transferred, abandoned, callback_scheduled" | + + + + | Field | Value | + |-------|-------| + | **Identifier** | `satisfaction_score` | + | **Data Type** | Integer | + | **Prompt** | "Rate the customer's apparent satisfaction from 1 to 5, based on their tone and language throughout the call." | + + + + | Field | Value | + |-------|-------| + | **Identifier** | `follow_up_needed` | + | **Data Type** | Boolean | + | **Prompt** | "Does this call require any follow-up action from the team?" | + + + + | Field | Value | + |-------|-------| + | **Identifier** | `issue_category` | + | **Data Type** | Enum | + | **Prompt** | "What was the primary issue category? Options: billing, technical, account, product_info, complaint, other" | + + + +--- + +## Related + + + + View metrics for individual calls + + + See aggregated trends across calls + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/transfer-call.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/transfer-call.mdx new file mode 100644 index 0000000..b8e06ed --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/transfer-call.mdx @@ -0,0 +1,164 @@ +--- +title: "Transfer Call" +sidebarTitle: "Transfer Call" +description: "Configure call transfers to human agents." +--- + +Transfer Call lets your agent hand off conversations to humans when needed. You can do a cold transfer (immediate connection) or a warm transfer (AI briefs the human first). + +**Location:** Config Panel (right sidebar) → Transfer Call toggle + +--- + +## Setup + +1. Toggle **Transfer Call** ON +2. Click ⚙️ to open settings +3. Click **+ Add Transfer Call** + +--- + +## Configuration + + + ![Transfer call modal](../images/transfer-call-modal.png) + + +| Field | Required | Description | +|-------|----------|-------------| +| **Name** | Yes | Identifier (e.g., `transfer_to_sales`) | +| **Description** | Yes | When to trigger this transfer | +| **Transfer Number** | Yes | Destination with country code | +| **Type** | Yes | Cold or Warm | + +--- + +## Transfer Types + + + + Immediate handoff. The caller connects directly to the destination without any briefing. + + | Pros | Cons | + |------|------| + | Fast | No context for receiving agent | + | Simple | Caller may repeat themselves | + + **Best for:** Simple escalations, high call volume, when speed matters most. + + + + AI briefs the human first. The receiving agent gets context before the caller joins. + + | Pros | Cons | + |------|------| + | Human has context | Slightly longer | + | Better experience | More configuration | + + **Best for:** Complex issues, VIP callers, when continuity matters. + + + +--- + +## Warm Transfer Options + +When you select Warm Transfer, additional settings appear: + + + ![Warm transfer options](../images/transfer-call-warm.png) + + +### During Transfer + +| Setting | Purpose | +|---------|---------| +| **On-hold Music** | What the caller hears while waiting | + +### During Agent Connection + +| Setting | Description | +|---------|-------------| +| **Whisper Message** | Message the agent hears privately before connecting | +| **Handoff Message** | What the AI says to brief the agent (can be dynamic or static) | + +### After Connection + +| Setting | Description | +|---------|-------------| +| **Three-way Message** | Message both parties hear when connected | + +--- + +## Examples + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_sales` | + | **Description** | "Transfer when the caller expresses strong purchase intent and wants to speak with sales." | + | **Type** | Cold | + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_support` | + | **Description** | "Transfer when the issue requires manual intervention or the caller requests a human." | + | **Type** | Warm | + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_manager` | + | **Description** | "Transfer when the caller is upset and asks for a manager or supervisor." | + | **Type** | Warm | + + + +--- + +## Tips + + + | Vague | Specific | + |-------|----------| + | "When they ask for help" | "When the caller requests to speak with a human or the issue requires manual intervention" | + | "For sales" | "When the caller expresses strong purchase intent and wants to proceed" | + + + + Only the agent hears this — give them everything they need: + + "Incoming transfer: Customer John calling about a billing dispute. He's been charged twice for order #12345. Already verified his identity." + + + + Both parties hear this — use it for smooth handoffs: + + "I've connected you with Sarah from our billing team. Sarah, John is calling about a duplicate charge on order #12345." + + + + | Transfer | Destination | When | + |----------|-------------|------| + | `transfer_sales` | Sales team | Purchase interest | + | `transfer_support` | Support team | Technical issues | + | `transfer_billing` | Billing team | Payment questions | + | `transfer_manager` | Manager | Escalations | + + +--- + +## Related + + + + Configure call termination + + + Get notified on transfers + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/variables.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/variables.mdx new file mode 100644 index 0000000..df3bb40 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/variables.mdx @@ -0,0 +1,154 @@ +--- +title: "Variables" +sidebarTitle: "Variables" +description: "Use dynamic values in your prompts and conversations." +--- + +Variables let you personalize conversations with dynamic data. Instead of static text, insert values that change based on the caller, API responses, or information collected during the call. + +**Location:** Config Panel (right sidebar) → Variables + +--- + +## Variable Types + + + + + ![User defined variables](../images/variables-user.png) + + + Variables you create with default values. + + | Example | Use Case | + |---------|----------| + | `{{company_name}}` | Your company name | + | `{{support_hours}}` | Operating hours | + | `{{promo_code}}` | Current promotion | + + + + + ![System variables](../images/variables-system.png) + + + Predefined variables provided by the platform. They are generated at runtime, read-only, and always available. + + | Variable | Description | + |----------|-------------| + | `{{caller_phone}}` | Caller's phone number | + | `{{call_time}}` | When call started | + | `{{call_duration}}` | Elapsed seconds | + | `{{call_direction}}` | "inbound" or "outbound" | + | `{{agent_id}}` | This agent's ID | + | `{{call_id}}` | Unique call identifier | + + + + + ![API variables](../images/variables-api.png) + + + Variables populated from API responses. + + | Syntax | Source | + |--------|--------| + | `{{api.customer_name}}` | API response field | + | `{{api.account_tier}}` | API response field | + + → See [API Calls](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) + + + +--- + +## Syntax + +Use double curly braces: + +``` +{{variable_name}} +``` + +--- + +## Example + +**In your prompt:** + +``` +Hello {{customer_name}}! Thanks for calling {{company_name}}. + +I see you're a {{api.tier}} member. Your last order was +{{api.last_order_status}}. + +How can I help you today? +``` + +**At runtime:** + +``` +Hello Sarah! Thanks for calling Acme Corp. + +I see you're a Premium member. Your last order was +shipped on Monday. + +How can I help you today? +``` + +--- + +## Default Values + +Handle missing variables gracefully: + +``` +Hello {{customer_name|there}}! +``` + +If `customer_name` is empty → "Hello there!" + +--- + +## Creating User Variables + +1. Open the Variables panel +2. Go to **User Defined** tab +3. Click **+ Add Variable** +4. Enter name and default value +5. Use in prompts with `{{name}}` syntax + +--- + +## Best Practices + + + | Good | Bad | + |------|-----| + | `{{customer_first_name}}` | `{{n}}` | + | `{{appointment_date}}` | `{{d1}}` | + + + + What if the variable is empty? + + - Use default values: `{{name|there}}` + - Handle in prompt: "If customer name is unknown, greet generically" + + + + Verify variables replace correctly. Check Convo Logs to see actual values. + + +--- + +## Related + + + + Fetch data from external APIs + + + Use variables in your prompts + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/voice-selection.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/voice-selection.mdx new file mode 100644 index 0000000..193bbbe --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/voice-selection.mdx @@ -0,0 +1,49 @@ +--- +title: "Voice Selection" +sidebarTitle: "Voice Selection" +description: "Choose and preview voices for your agent." +--- + +Your agent's voice is often the first thing callers notice. It sets the tone before a single word of your prompt matters. A voice that fits your brand builds immediate trust — one that doesn't can undermine even the best conversation design. + +**Location:** Prompt Section (top bar) → Voice dropdown + + +The Prompt Section is only available for **Single Prompt agents**. Conversational Flow agents configure voices in [Agent Settings → Voice tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/voice-settings). + + + + ![Voice picker](../images/voice-picker.png) + + +--- + +## Finding the Right Voice + +Click the Voice dropdown to open the picker. You'll see: + +| Section | What it does | +|---------|--------------| +| **Search** | Find voices by name or characteristic | +| **Filters** | Narrow by language, accent, age, gender, or model type | +| **Currently Used** | Your selected voice at the top | +| **All Voices** | The full library to browse | + +Take your time here. Filter by the language you need, then explore. Every voice has a personality — some are warm and reassuring, others confident and direct. + + +**Always preview.** Click the ▶️ button next to any voice to hear a sample. Listen with your prompt in mind — does this voice sound like the agent you've written? + + +--- + +## Related Settings + + + + Speed, pronunciation & turn-taking + + + Set the primary language + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/voice-settings.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/voice-settings.mdx new file mode 100644 index 0000000..3092f6d --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/voice-settings.mdx @@ -0,0 +1,166 @@ +--- +title: "Voice Settings" +sidebarTitle: "Voice Settings" +description: "Fine-tune speech behavior, pronunciation, and voice detection." +--- + +Voice Settings give you precise control over how your agent sounds and listens. From speech speed to background ambiance, pronunciation rules to turn-taking — this is where you shape the audio experience. + +**Location:** Left Sidebar → Agent Settings → Voice tab + + + ![Voice settings](../images/voice-settings.png) + + +--- + +## Voice + +Select the voice for your agent. Click the dropdown to browse available voices — you can preview each one before selecting. + +--- + +## Speech Settings + +### Speech Speed + +Control how fast your agent speaks. + +| Control | Range | Default | +|---------|-------|---------| +| Slider | Slow ↔ Fast | 1 | + +Slide left for a more measured, deliberate pace. Slide right for quicker delivery. Find the sweet spot that matches your use case — slower often works better for complex information, faster for simple confirmations. + +--- + +## Pronunciation & Background + +### Pronunciation Dictionaries + +Add custom pronunciations for words that aren't pronounced correctly by the default voice. + +This is especially useful for: +- Brand names +- Technical terms +- Proper nouns +- Industry-specific jargon + +**To add a pronunciation:** Click **Add Pronunciation** to open the modal. + + + ![Add pronunciation](../images/add-pronunciation.png) + + +| Field | Description | +|-------|-------------| +| **Word** | The word as written | +| **Pronunciation** | How it should sound | + +### Background Sound + +Add ambient audio behind your agent's voice for a more natural feel. + +| Option | Description | +|--------|-------------| +| **None** | Silent background (default) | +| **Office** | Subtle office ambiance | +| **Call Center** | Busy call center sounds | +| **Static** | Light static noise | +| **Cafe** | Coffee shop atmosphere | + +--- + +## Advanced Voice Settings + +### Mute User Until First Bot Response + +When enabled, the user's audio is muted until the agent's first response is complete. Useful for preventing early interruptions during the greeting. + +### Voicemail Detection + +Detects when a call goes to voicemail instead of reaching a live person. + + +Voicemail detection may not work as expected if **Release Time** is less than 0.6 seconds. + + +### Personal Info Redaction (PII) + +Automatically redacts sensitive personal information from transcripts and logs. + +### Denoising + +Filters out background noise and improves voice clarity before processing. This helps reduce false detections caused by environmental sounds — useful when callers are in noisy environments. + +--- + +## Voice Detection + +Fine-tune how your agent recognizes when someone is speaking. + +### Confidence + +Defines how strict the system is when deciding if detected sound is speech. + +- **Higher values** → Less likely to trigger on background noise +- **Lower values** → More sensitive to quiet speech + +| Default | Range | +|---------|-------| +| 0.70 | 0 – 1 | + +### Min Volume + +The minimum volume level required to register as speech. + +| Default | Range | +|---------|-------| +| 0.60 | 0 – 1 | + +### Trigger Time (Seconds) + +How long the system waits after detecting the start of user speech (and after the bot has finished speaking) before processing. This helps avoid overlapping speech and false triggers. + +| Default | Range | +|---------|-------| +| 0.10 | 0 – 1 | + +### Release Time (Seconds) + +How long the system waits after the user stops speaking before the bot begins its response. This ensures the user has completely finished their thought. + +| Default | Range | +|---------|-------| +| 0.30 | 0 – 1+ | + + +**Start with defaults.** Only adjust these if you're experiencing specific issues like missed words or premature responses. + + +--- + +## Smart Turn Detection + +Intelligent detection of when the caller is done speaking. When enabled, the agent uses context and speech patterns — not just silence — to determine when it's time to respond. + +--- + +## Interruption Backoff Timer + +Time in seconds to prevent interruptions after the bot starts speaking (default: 0, disabled). + +This helps prevent conversation loops when the user and bot interrupt each other — the agent will wait this duration before allowing itself to be interrupted again. + +--- + +## Related + + + + Configure AI model and language behavior + + + Choose and preview voices + + diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/webhooks.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/webhooks.mdx new file mode 100644 index 0000000..0530215 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/webhooks.mdx @@ -0,0 +1,107 @@ +--- +title: "Webhooks" +sidebarTitle: "Webhooks" +description: "Send real-time event notifications to external systems." +--- + +Webhooks push data to your systems when events happen — call starts, call ends, analytics ready. Use them to trigger workflows, update CRMs, create tickets, or feed data pipelines. + +**Location:** Left Sidebar → Agent Settings → Webhook tab + +--- + +## How It Works + +1. **Event occurs** (call starts, call ends, etc.) +2. **Atoms sends HTTP POST** to your endpoint with event data +3. **Your server processes** the data and takes action + +--- + +## Configuration + +### Select Endpoint + +Choose from your configured webhook endpoints. + + +**Create endpoints first.** Go to Features → Webhooks to set up endpoints before you can select them here. + + +### Subscribe to Events + +| Event | When It Fires | What's Included | +|-------|---------------|-----------------| +| **Start** | Call begins | Caller info, agent info, timestamp | +| **End** | Call ends | Full transcript, duration, outcome | +| **Analytics Completed** | Post-call analysis done | Metrics, scores, extracted data | + +--- + +## Event Payloads + + + + ```json + { + "event": "call.start", + "call_id": "abc123", + "agent_id": "agent_xyz", + "caller_phone": "+15551234567", + "direction": "inbound", + "timestamp": "2024-01-15T10:30:00Z" + } + ``` + + + + ```json + { + "event": "call.end", + "call_id": "abc123", + "agent_id": "agent_xyz", + "duration_seconds": 180, + "transcript": [...], + "outcome": "resolved", + "timestamp": "2024-01-15T10:33:00Z" + } + ``` + + + + ```json + { + "event": "analytics.completed", + "call_id": "abc123", + "metrics": { + "sentiment": "positive", + "resolution": true, + "customer_satisfaction": 4.5 + }, + "timestamp": "2024-01-15T10:35:00Z" + } + ``` + + + +--- + +## Use Cases + +| Use Case | Description | +|----------|-------------| +| **CRM Updates** | Push call data to Salesforce, HubSpot | +| **Ticket Creation** | Auto-create support tickets after calls | +| **Notifications** | Alert teams about important calls | +| **Analytics Pipelines** | Feed data to your analytics systems | + +--- + +## Setup Steps + +1. Create an endpoint in **Features → Webhooks** +2. Return here and select your endpoint +3. Check the events you want to receive +4. Save + +→ See [Creating Webhook Endpoints](/atoms/atoms-platform/features/webhooks) for endpoint setup. diff --git a/fern/products/atoms/pages/platform/building-agents/configuring/widget.mdx b/fern/products/atoms/pages/platform/building-agents/configuring/widget.mdx new file mode 100644 index 0000000..44b675e --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/configuring/widget.mdx @@ -0,0 +1,174 @@ +--- +title: "Widget" +sidebarTitle: "Widget" +description: "Embed your voice agent directly on your website." +--- + +The widget lets visitors talk to your agent without leaving your site — no phone call needed. They can either text or speak, right from their browser. + +**Location:** Left Sidebar → Widget + + + ![Widget configuration](../images/widget-full.png) + + +--- + +## Embed Code + +At the top, you'll find your embed snippet. Copy this and paste it into your website's HTML. + +```html + + +``` + +That's it — the widget will appear on your site. Everything else on this page customizes how it looks and behaves. + +--- + +## Mode + +Choose how users interact with your widget: + +| Mode | Description | +|------|-------------| +| **Chat** | Users type messages. Good for quieter environments or when voice isn't practical. | +| **Voice** | Users speak directly. The full voice agent experience in the browser. | + +Pick based on your audience. Voice feels more natural for most support scenarios. Chat works better for quick questions or when users might be in public. + +--- + +## Allowlist + +By default, any website can embed your widget. That's convenient for testing, but risky for production. + + +**Set up an allowlist** before going live. Without one, anyone could embed your agent on their site, potentially running up your usage or misrepresenting your brand. + + +Click **+ Add host** to specify which domains can use your widget: + +``` +yourdomain.com +app.yourdomain.com +``` + +Only these sites will be able to load the widget. + +--- + +## Appearance + +### Variant + +How much screen space should the widget take? + + + + + ![Tiny widget](../images/widget-tiny.png) + + + Minimal footprint. Just a small button that expands when clicked. Best when you want the widget available but unobtrusive. + + + + + ![Compact widget](../images/widget-compact.png) + + + Balanced size. Visible enough to invite interaction, but doesn't dominate the page. Good default for most sites. + + + + + ![Full widget](../images/widget-full-variant.png) + + + Prominent and hard to miss. Use when the widget is a primary way users should interact with your site. + + + +### Placement + +Where the widget button appears on screen. Currently supports **Bottom-right**. + + +The preview on this page always shows bottom-right. The placement you choose applies when embedded on your actual site. + + +--- + +## Theme + +Make the widget match your brand. + +| Setting | What it affects | Default | +|---------|-----------------|---------| +| **Widget Background Color** | Main widget background | `#ffffff` | +| **Brand Accent Color** | Buttons, highlights | `#2d9d9f` | +| **Agent Message Background** | Agent response bubbles | `#f3f4f6` | +| **Text on Accent Color** | Text on accent-colored elements | `#FFFFFF` | +| **Primary Text Color** | Main text | `#111827` | +| **Secondary Text Color** | Subtitles, hints | `#6b7280` | + +Click any color to open a picker, or paste a hex code directly. + +--- + +## Avatar + +Upload an image to represent your agent in the widget. This appears in the chat interface and helps humanize the experience. + +| Spec | Value | +|------|-------| +| **Recommended size** | 172px × 172px | +| **Maximum file size** | 2MB | +| **Format** | PNG, JPG | + +Drag and drop or click to upload. + +--- + +## Text Contents + +Customize the copy users see. + +| Field | What it controls | Default | +|-------|------------------|---------| +| **Start Button Text** | Button to begin conversation | "Start" | +| **End Button Text** | Button to end conversation | "End" | +| **Chat Placeholder** | Input field placeholder | "Type your message..." | +| **CTA Name** (optional) | Call-to-action text on widget button | "Talk to Atoms" | +| **Widget Name** (optional) | Name displayed in widget header | "Atoms" | + +Make these match your brand voice. "Chat with us" feels different than "Get help" or "Ask a question." + +--- + +## Terms & Conditions + +If you need users to agree to terms before using the widget, enable **Require consent**. + +When enabled, users see a checkbox they must tick before starting. This is useful for: +- GDPR compliance +- Recording consent +- Specific terms of service + +--- + +## Preview + +As you make changes, the widget preview updates in real-time in the bottom-right corner of the page. Test different settings before deploying. + +--- + +## Deploying + +Once configured, copy the embed code and add it to your website. For detailed installation instructions across different platforms: + + + Step-by-step deployment guide + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/ai-assisted.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/ai-assisted.mdx new file mode 100644 index 0000000..f9b87ea --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/ai-assisted.mdx @@ -0,0 +1,89 @@ +--- +title: "AI-Assisted Creation" +sidebarTitle: "AI-Assisted" +description: "Let AI design your Conversational Flow workflow." +--- + +Describing your ideal conversation is easier than building it node by node. Let AI generate the flow, then refine it to perfection. + + + [IMAGE: Create with AI interface with Conversational Flow selected] + + +--- + +## How It Works + + + + Fill in four prompts explaining what your agent should do. + + + Choose voice, model, and optional knowledge base. + + + AI designs your complete flow with nodes, branches, and prompts. + + + The workflow builder opens with your generated flow. + + + + + [IMAGE: Workflow builder showing AI-generated flow with multiple nodes and branches] + + +--- + +## Conversational Flow AI Tips + +When using Create with AI for Conversational Flow agents: + + + The clearer your steps, the better the generated flow. + + **Example:** "First verify the caller's identity with their phone number. Then ask why they're calling. If it's a billing issue, collect the invoice number. If it's technical support, ask what product they're using." + + + + Tell the AI when conversations should split into different paths. + + **Example:** "If the budget is over $10,000, transfer to a senior sales rep. If under $10,000, send them to self-serve resources." + + + + Describe how each path should conclude. + + **Example:** "Qualified leads should be transferred to sales. Unqualified leads should receive a follow-up email. Support issues should end with a ticket confirmation." + + + + List what information must be gathered. + + **Example:** "Must collect: name, company, budget range, timeline, and current solution before qualifying." + + +--- + +## Learn More + +The complete Create with AI guide covers everything: + + + Full guide to AI-assisted creation + + +--- + +## After Generation + +Once AI generates your flow, it opens in the workflow builder. From there: + + + + Adjust nodes and branches + + + Understand what each node does + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/conditions.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/conditions.mdx new file mode 100644 index 0000000..df48f15 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/conditions.mdx @@ -0,0 +1,382 @@ +--- +title: "Conditions & Branching" +sidebarTitle: "Conditions & Branching" +description: "Create dynamic conversation paths based on caller responses." +--- + +Branching is what makes Conversational Flow powerful. Instead of a linear script, your agent can take different paths based on what callers say. + + + [IMAGE: Workflow showing one node splitting into three different paths] + + +--- + +## How Branching Works + + + [IMAGE: Diagram showing caller response → condition evaluation → path selection] + + + + + The current node poses a question or presents options. + + + The caller says something. + + + The AI checks each condition against the response. + + + The matching condition determines the next node. + + + +--- + +## Condition Types + + + + + [IMAGE: Condition panel with text match fields] + + + Match specific words or phrases in the caller's response. + + | Condition | Matches | + |-----------|---------| + | "yes" | "yes", "yeah", "yep" | + | "billing" | "I have a billing question" | + | "cancel" | "I want to cancel" | + + + The AI is smart about variations — "yes", "yeah", "sure", "absolutely" can all match a "yes" condition. + + + + + + [IMAGE: Condition panel with intent selection] + + + Match the caller's intent, not exact words. + + | Intent | Example Phrases | + |--------|-----------------| + | `wants_support` | "I need help", "something's broken" | + | `wants_sales` | "I'm interested in buying" | + | `is_frustrated` | "This is ridiculous", angry tone | + + + Intent detection uses AI understanding — callers don't need to say magic words. + + + + + + [IMAGE: Condition panel with variable comparison] + + + Route based on variable values. + + | Condition | Logic | + |-----------|-------| + | `{{budget}} > 10000` | High-value path | + | `{{api.tier}} == "premium"` | VIP treatment | + | `{{attempts}} >= 3` | Escalation path | + + **Operators:** + - `==` equals + - `!=` not equals + - `>` greater than + - `<` less than + - `>=` greater or equal + - `<=` less or equal + + + + + [IMAGE: Condition panel with default/else option] + + + Catch-all for when no other condition matches. + + **Always include a default branch.** It handles: + - Unexpected responses + - Unclear answers + - Edge cases you didn't anticipate + + ``` + "I didn't quite catch that. Could you tell me again — + are you calling about billing or technical support?" + ``` + + + +--- + +## Creating Branches + + + + Open the node you want to branch from. + + + [IMAGE: Node clicked with configuration panel opening] + + + + + Find the Branches section and click **Add Branch**. + + + [IMAGE: Branch section with Add Branch button] + + + + + Set the condition type and value. + + + [IMAGE: Form showing condition type dropdown and value input] + + + | Field | What to Enter | + |-------|---------------| + | **Label** | Name shown on canvas | + | **Type** | Text match, intent, variable, default | + | **Value** | What to match | + + + + Draw connections from each branch to its destination. + + + [IMAGE: Three branch lines going to three different nodes] + + + + +--- + +## Real-World Examples + +### Support Routing + + + [IMAGE: "How can I help?" node with branches to Billing, Technical, General, Fallback] + + +``` +[Ask Issue Type] +"I'm here to help! Are you calling about billing, +technical support, or something else?" + +Branches: +├── "billing" → [Billing Flow] +├── "technical" / "tech" / "support" → [Technical Flow] +├── "sales" / "buy" / "purchase" → [Sales Flow] +└── default → [Clarify and Re-ask] +``` + +### Lead Qualification + + + [IMAGE: Budget question with branches based on value ranges] + + +``` +[Ask Budget] +"What's your approximate budget for this project?" + +→ Store response as {{budget}} + +Branches: +├── {{budget}} >= 50000 → [Enterprise Path] +├── {{budget}} >= 10000 → [Professional Path] +├── {{budget}} >= 1000 → [Starter Path] +└── {{budget}} < 1000 → [Self-Serve Resources] +``` + +### Confirmation Flow + + + [IMAGE: "Is that correct?" node with Yes/No/Unclear branches] + + +``` +[Confirm Details] +"Just to confirm — you'd like to book a consultation +for Tuesday at 3pm. Is that correct?" + +Branches: +├── yes / correct / "that's right" → [Complete Booking] +├── no / "not quite" / wrong → [Correct Details] +└── default → [Re-confirm] +``` + +--- + +## Best Practices + + + No matter how many conditions you define, something unexpected will happen. + + + [IMAGE: Branches with default path highlighted] + + + Your fallback should: + - Acknowledge the response + - Re-ask the question differently + - Offer options to clarify + + ``` + "I want to make sure I understand. Are you calling about + billing, technical issues, or something else entirely?" + ``` + + + + Avoid overlapping conditions that could both match. + + | Overlapping (Bad) | Exclusive (Good) | + |-------------------|------------------| + | "support" AND "technical support" | "billing" OR "technical" OR "other" | + | `>10` AND `>50` | `10-50` AND `>50` | + + Order matters — conditions are evaluated top to bottom. + + + + It's easy to forget a branch while testing. Be systematic: + + 1. List all possible paths + 2. Test each one deliberately + 3. Try edge cases (silence, gibberish, topic changes) + 4. Review in Convo Logs + + + + Too many nested branches become impossible to manage. + + + [IMAGE: Side-by-side of deeply nested vs flatter flow] + + + If your flow is getting too deep: + - Can you combine some branches? + - Should some paths be separate flows? + - Is Single Prompt better for this use case? + + + + Labels appear on the canvas — make them meaningful. + + | Bad Labels | Good Labels | + |------------|-------------| + | "Option 1" | "Wants Billing Help" | + | "Path A" | "Budget > $10k" | + | "Yes" | "Confirmed Appointment" | + + +--- + +## Complex Branching Patterns + +### Multiple Conditions (AND) + + + [IMAGE: Node checking two conditions before branching] + + +Sometimes you need multiple things to be true: + +``` +If {{api.tier}} == "premium" AND {{budget}} > 10000 + → VIP Sales Path +``` + +Combine in a single variable-based condition: +``` +{{api.tier}} == "premium" && {{budget}} > 10000 +``` + +### Loop Back + + + [IMAGE: Flow with arrow going back to earlier node] + + +For retry scenarios: + +``` +[Ask Budget] + ↓ +[Validate Budget] + ├── Valid → Continue + └── Invalid → Loop back to [Ask Budget] +``` + + +**Avoid infinite loops.** Add a counter variable and exit after N attempts. + + +### Parallel Paths That Merge + + + [IMAGE: Two branches that eventually connect to the same node] + + +Different paths can lead to the same destination: + +``` +[Qualify] +├── High Budget → [Fast Track] → [Schedule Demo] +└── Standard Budget → [Standard Process] → [Schedule Demo] +``` + +--- + +## Debugging Branches + +When branches don't work as expected: + + + [IMAGE: Convo log with branch decision highlighted] + + + + + See exactly which conditions were evaluated and which matched. + + + + Typos in variable names or operators will fail silently. + + + + If fallback triggers too often, your conditions may be too specific. + + + + Conditions are checked top to bottom — earlier matches win. + + + +→ Learn more: [Conversation Logs](/atoms/atoms-platform/analytics-logs/conversation-logs) + +--- + +## Next Steps + + + + Voice, model, and features + + + Validate every branch + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/creating.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/creating.mdx new file mode 100644 index 0000000..1b40cbe --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/creating.mdx @@ -0,0 +1,161 @@ +--- +title: "Creating a Conversational Flow Agent" +sidebarTitle: "Creating Your Agent" +description: "Step-by-step guide to building a Conversational Flow agent." +--- + +This guide walks you through creating a Conversational Flow agent from scratch. By the end, you'll have a working workflow ready for testing. + + + [IMAGE: Blank workflow builder showing node palette, empty canvas, and toggle tabs] + + +--- + +## Steps to Create + +### Step 1: Start Creation + +1. Click the green **"Create Agent"** button (top right of dashboard) +2. Select **"Start from scratch"** +3. Choose **"Conversational Flow"** + +The workflow builder opens. + +--- + +### Step 2: Understand the Layout + +| Area | Location | Purpose | +|------|----------|---------| +| **Toggle Tabs** | Top center | Switch between "Workflow" and "Settings" | +| **Node Palette** | Left panel | Drag-and-drop node types | +| **Canvas** | Center | Visual flow editor | +| **Variables** | Right panel | `{ }` Variables button | + +**Top Right Actions** (same as Single Prompt): +- Convo Logs +- Lock Agent +- Test Agent + +**Header:** +- Back arrow (←) — return to agents list +- Agent name — click to rename +- Agent ID — click to copy + +--- + +### Step 3: Start with the Canvas + +The canvas shows a **Start Node** (green pill) at the top. This is where every conversation begins. + +Your workflow will flow down from this starting point. + +--- + +### Step 4: Add Your First Node + +1. In the Node Palette (left), find **Default Node** +2. Drag it onto the canvas below the Start Node +3. A connection line appears — drop the node to connect it + +--- + +### Step 5: Configure the Node + +1. Click the node to select it +2. A configuration panel opens +3. Write the node's prompt — what should the agent say here? + +**Example for a greeting node:** +``` +Hi! Thanks for calling TechStore. I'm here to help you today. +Are you calling about an existing order, or do you have a product question? +``` + +--- + +### Step 6: Add Conditions and Branches + +From your first node, add branches for different user responses: + +1. In the node config, find the conditions/branches section +2. Add conditions like: + - "User mentions order" → connects to Order Help node + - "User asks product question" → connects to Product Info node + - "Other" → connects to Clarification node + +--- + +### Step 7: Build Out the Flow + +Continue adding nodes for each conversation path: + +1. Drag more nodes from the palette +2. Connect branches to appropriate next nodes +3. Configure each node's prompt +4. Ensure all paths eventually reach an **End Call** node + +--- + +### Step 8: Configure Settings + +Click the **"Settings"** toggle tab to access: + +- Voice settings (same as Single Prompt) +- Model configuration +- Phone number assignment +- Webhooks +- General settings + +These work identically to Single Prompt configuration. + +--- + +### Step 9: Test Your Flow + +1. Click **"Test Agent"** (top right) +2. Choose test mode +3. Walk through your conversation paths +4. Check that each branch works correctly + +**Important:** Test EVERY path in your flow. Untested branches may have issues. + +--- + +## Canvas Controls + +At the bottom of the canvas: + +| Button | Function | +|--------|----------| +| 👍 | Submit feedback | +| ↕️ | Auto-layout (organize nodes neatly) | +| + | Zoom in | +| - | Zoom out | + +**Tip:** Use auto-layout frequently to keep your flow readable. + +--- + +## Your Agent is Ready + +You now have a working Conversational Flow agent. From here, you can: + +- Add more nodes and branches +- Configure API calls for data lookup +- Set up transfer nodes for human handoff +- Test thoroughly before deploying + +--- + +## What's Next + + + + Master the visual editor + + + Learn all node types + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/from-template.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/from-template.mdx new file mode 100644 index 0000000..9e1fdc6 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/from-template.mdx @@ -0,0 +1,241 @@ +--- +title: "From Template" +sidebarTitle: "From Template" +description: "Start with pre-built Conversational Flow templates." +--- + +Templates provide complete, tested workflows for common use cases. Pick one, customize it, and deploy. + + + [IMAGE: Grid of flow template cards showing different use cases] + + +--- + +## Available Templates + + + + Structured qualification flow with budget, timeline, and decision-maker questions. Branches to qualified/not qualified paths. + + + Complete booking flow with availability checking, confirmation, and rescheduling options. + + + Routes callers to appropriate department based on issue type. Handles common requests in-flow. + + + Policy lookup, claim intake, and new policy paths. Built for compliance. + + + Property interest collection, viewing scheduling, and buyer qualification. + + + Patient verification, appointment prep, and pre-visit questionnaire. + + + +--- + +## How to Use a Template + + + + From your dashboard, click the green **Create Agent** button. + + + [IMAGE: Dashboard with button highlighted] + + + + + Choose the second option. + + + [IMAGE: Modal with "Start with Template" highlighted] + + + + + Select the Conversational Flow agent type. + + + [IMAGE: Conversational Flow option selected] + + + + + Explore templates. Click to preview the flow. + + + [IMAGE: Template card expanded showing flow preview] + + + + + Click **Use Template** to load the flow. + + + [IMAGE: Workflow builder with complete template flow visible] + + + + +--- + +## What Templates Include + + + [IMAGE: Annotated template showing all included elements] + + +Each template comes with: + +| Component | Description | +|-----------|-------------| +| **Complete Flow** | All nodes, branches, and connections | +| **Node Prompts** | Pre-written prompts for each step | +| **Conditions** | Branch logic already configured | +| **Variables** | Relevant variables pre-defined | +| **Suggested Voice** | Voice recommendation for the use case | +| **End Nodes** | Proper endings for all paths | + +--- + +## Customizing Templates + +Templates are starting points. Always customize for your business. + + + + Click each node and update the prompts with your specifics. + + + [IMAGE: Node panel with prompt being edited] + + + ```diff + - "Thanks for calling [Company]." + + "Thanks for calling Acme Insurance." + + - "Our offices are open [hours]." + + "Our offices are open 8am-6pm Pacific." + ``` + + + + Not every node fits your process. Adjust as needed. + + + [IMAGE: User dragging new node into existing flow] + + + - **Add nodes** for extra questions + - **Remove nodes** you don't need + - **Reorganize** to match your actual process + + + + Adjust conditions to match your criteria. + + + [IMAGE: Branch condition editor with values being changed] + + + ```diff + - If budget > $5,000 → Qualified + + If budget > $10,000 → Qualified + ``` + + + + Rename or add variables for your needs. + + + [IMAGE: Variables panel with renamed variables] + + + | Template Variable | Your Variable | + |-------------------|---------------| + | `{{company}}` | `{{customer_company}}` | + | `{{budget}}` | `{{project_budget}}` | + + + +--- + +## Template Example: Lead Qualification + +Here's what the Lead Qualification template includes: + + + [IMAGE: Full workflow showing the complete lead qualification flow] + + +### Flow Structure + +``` +[Start] + ↓ +[Greeting] → "Hi! Thanks for reaching out. I'd love to + learn more about your needs." + ↓ +[Company Info] → "What company are you with?" + ↓ +[Current Solution] → "What are you using today?" + ↓ +[Budget Range] → "What's your approximate budget?" + ├── > $10k → [High Value Path] + └── < $10k → [Standard Path] + ↓ +[Timeline] → "When are you looking to get started?" + ├── < 30 days → [Urgent] → [Transfer to Sales] + ├── 30-90 days → [Qualified] → [Schedule Demo] + └── > 90 days → [Nurture] → [Add to List] + ↓ +[End Call] → Appropriate closing for each path +``` + +### Included Variables + +| Variable | Purpose | +|----------|---------| +| `{{contact_name}}` | Caller's name | +| `{{company_name}}` | Their company | +| `{{current_solution}}` | What they use now | +| `{{budget_range}}` | Budget bracket | +| `{{timeline}}` | Decision timeline | +| `{{lead_score}}` | Calculated qualification score | + +--- + +## Tips for Template Success + + + Before customizing, trace every path. Understand why each node and branch exists. + + + + The template structure is battle-tested. Keep the logic, just update the specifics. + + + + Run through the template as-is first. Then test again after every significant change. + + + + It's easy to customize the main path and forget alternate branches. Test every route. + + +--- + +## Next Steps + + + + Understand all available nodes + + + Validate every branch + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/manual-setup.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/manual-setup.mdx new file mode 100644 index 0000000..74431f9 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/manual-setup.mdx @@ -0,0 +1,305 @@ +--- +title: "Manual Setup" +sidebarTitle: "Manual Setup" +description: "Build a Conversational Flow agent from scratch with full control." +--- + +Starting from scratch gives you complete creative freedom. This guide walks you through creating a Conversational Flow agent step by step. + + + [IMAGE: Empty workflow builder canvas with node palette visible] + + +--- + +## Step 1: Start Creation + + + + Find the green **Create Agent** button in the top right of your dashboard. + + + [IMAGE: Dashboard showing Create Agent button location] + + + + + Choose the first option in the modal. + + + [IMAGE: Modal with "Start from scratch" highlighted] + + + + + Select the Conversational Flow agent type. + + + [IMAGE: Conversational Flow option with connected nodes icon] + + + + +The workflow builder opens with an empty canvas. + +--- + +## Step 2: Meet the Workflow Builder + + + [IMAGE: Full builder screenshot with numbered callouts] + + +### Interface Layout + +| Area | Location | What It Does | +|------|----------|--------------| +| **Toggle Tabs** | Top center | Switch between Workflow and Settings | +| **Node Palette** | Left panel | Drag nodes from here | +| **Canvas** | Center | Your visual workflow | +| **Variables** | Right panel | Manage flow variables | +| **Controls** | Bottom | Zoom, auto-layout, feedback | + + + + [IMAGE: Left panel showing all node types] + + + | Section | Contents | + |---------|----------| + | **Nodes** | Default, Transfer, API Call, End Call, Pre-Call API, Post-Call API | + | **Integrations** | Salesforce (available), Google Calendar, HubSpot, Google Sheets (coming soon) | + | **Help** | Need Help?, Keyboard Shortcuts | + + + + + [IMAGE: Bottom control bar with zoom and layout buttons] + + + | Button | Function | + |--------|----------| + | 👍 | Send feedback | + | ↕️ | Auto-layout (organize nodes) | + | + | Zoom in | + | − | Zoom out | + + +--- + +## Step 3: Build Your First Flow + +Let's create a simple lead qualification flow. + +### Add the Start Node + +Every flow begins with a **Start Node** — it's added automatically. + + + [IMAGE: Canvas showing green Start node pill] + + +### Add Your First Conversation Node + + + + From the Node Palette, drag a **Default Node** onto the canvas. + + + [IMAGE: Hand cursor dragging node from palette to canvas] + + + + + Drag from the Start node's handle to your new node. + + + [IMAGE: Connection line being drawn between nodes] + + + + + Click the node to open its settings. Add your prompt: + + ``` + Hi! Thanks for calling [Company]. I'm here to help + you find the right solution. To get started, could + you tell me your name? + ``` + + + [IMAGE: Node settings panel with prompt field] + + + + +### Continue the Flow + +Add more nodes for each step in your conversation: + + + [IMAGE: Canvas showing Start → Greeting → Ask Budget → Ask Timeline] + + + + + ``` + Great, {{name}}! What's your approximate budget + for this project? + ``` + + + + ``` + And when are you looking to get started? + ``` + + + + Based on responses, determine if lead is qualified. + + + +### Add an End Node + +Every path needs an ending. + + + + From the palette, add an **End Call** node. + + + [IMAGE: Red End Call node at end of flow] + + + + + Set the goodbye message: + + ``` + Thanks so much for your time! We'll be in touch + within 24 hours. Have a great day! + ``` + + + +--- + +## Step 4: Add Branches + +Branches let you route conversations based on caller responses. + + + [IMAGE: Node with two output branches going to different paths] + + + + + Open the node you want to branch from. + + + + Define what triggers each branch. + + + [IMAGE: Node settings showing condition inputs] + + + | Condition Type | Example | + |----------------|---------| + | **Text Match** | User says "not interested" | + | **Intent** | User expresses budget concern | + | **Variable** | `{{budget}}` > 10000 | + + + + Draw connections from each condition to its destination node. + + + [IMAGE: One node with three output branches to different paths] + + + + + +**Always add a default/fallback branch.** Handle unexpected responses gracefully. + + +--- + +## Step 5: Configure Settings + +Click the **Settings** tab to access agent configuration. + + + [IMAGE: Top toggle showing Settings tab active] + + +The Settings tab contains the same options as Single Prompt agents: + +| Setting Area | What to Configure | +|--------------|-------------------| +| **Voice** | Speech settings, pronunciation, detection | +| **Model** | LLM selection, language switching | +| **Phone Number** | Assign a number | +| **Webhook** | Event notifications | +| **General** | Timeouts | + +→ See: [Configuring Your Agent](/atoms/atoms-platform/single-prompt-agents/overview) + +--- + +## Step 6: Test Your Flow + + + [IMAGE: Modal showing Telephony, Web Call, and Chat options] + + + + + Top right corner of the builder. + + + + - **Web Call** — Quick browser test + - **Telephony** — Real phone experience + - **Chat** — Text-only for logic testing + + + + Test every branch. Try edge cases. + + + + Check Convo Logs to see exactly which nodes were hit. + + + + +**Test every branch.** It's easy to miss an edge case that breaks the flow. + + +--- + +## Your Flow is Ready! + + + [IMAGE: Polished workflow with all nodes connected and organized] + + +From here you can: + + + + Master all available nodes + + + Complex condition logic + + + Voice, model, webhooks & more + + + Assign a real number + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/node-types.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/node-types.mdx new file mode 100644 index 0000000..f59a587 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/node-types.mdx @@ -0,0 +1,382 @@ +--- +title: "Node Types Reference" +sidebarTitle: "Node Types" +description: "Complete guide to every node type in Conversational Flow." +--- + +Nodes are the building blocks of your conversation flow. Each type serves a specific purpose in guiding the conversation. + + + [IMAGE: Node palette showing all six node types with icons] + + +--- + +## Node Types at a Glance + +| Node | Icon | Purpose | When to Use | +|------|------|---------|-------------| +| [Default](#default-node) | 🔷 | Conversation step | Each conversation point | +| [Transfer Call](#transfer-call-node) | 📞 | Handoff to human | Connect to live agent | +| [API Call](#api-call-node) | ⬆️ | External data | Fetch/send mid-conversation | +| [End Call](#end-call-node) | ✂️ | Terminate call | Natural endings | +| [Pre-Call API](#pre-call-api-node) | 🔵🟠 | Load context | Get data before call starts | +| [Post-Call API](#post-call-api-node) | 🟠🔵 | Save data | Send data after call ends | + +--- + +## Default Node + +The workhorse of your flow. Each Default node represents one step in the conversation. + + + [IMAGE: Default node card showing title and connection points] + + +### Configuration + + + [IMAGE: Panel showing all Default node settings] + + +| Field | Purpose | +|-------|---------| +| **Name** | Identifier shown on canvas | +| **Prompt** | What the agent says at this step | +| **Branches** | Output paths based on caller response | + +### Example Prompts + + + + ``` + Hi! Thanks for calling Acme Support. My name is Alex. + How can I help you today? + ``` + + + + ``` + I'd be happy to help with that. First, could you tell + me your account number or the email on file? + ``` + + + + ``` + Just to confirm — you'd like to schedule a demo for + next Tuesday at 2pm. Is that correct? + ``` + + + +### Branching + +Default nodes can have multiple output branches: + + + [IMAGE: Node showing three output connections with condition labels] + + +→ See: [Conditions & Branching](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/conditions) + +--- + +## Transfer Call Node + +Hands the conversation to a human agent via phone transfer. + + + [IMAGE: Transfer Call node card with phone icon] + + +### Configuration + + + [IMAGE: Transfer settings panel showing all options] + + +| Field | Purpose | +|-------|---------| +| **Name** | Identifier | +| **Description** | When this transfer should trigger | +| **Phone Number** | Transfer destination (with country code) | +| **Transfer Type** | Cold or Warm | + +### Transfer Types + + + + + [IMAGE: Diagram showing immediate handoff] + + + **Immediate handoff.** The caller is connected directly without any briefing to the receiving agent. + + **Best for:** + - Simple escalations + - When context isn't needed + - Time-sensitive transfers + + + + + [IMAGE: Diagram showing AI briefing the human, then connecting caller] + + + **AI briefs the agent first.** The receiving agent gets context before the caller joins. + + **Additional Options:** + + | Option | Description | + |--------|-------------| + | On-hold Music | What caller hears while waiting | + | Transfer if Human | Skip if voicemail (coming soon) | + | Whisper Message | Private message to agent only | + | Handoff Message | What the agent hears | + | Three-way Message | What both parties hear | + + **Best for:** + - Complex issues needing context + - VIP callers + - When continuity matters + + + +--- + +## API Call Node + +Makes external API requests during the conversation. + + + [IMAGE: API Call node card with arrow icon] + + +### Configuration + + + [IMAGE: API settings panel with all fields] + + +| Field | Purpose | +|-------|---------| +| **Name** | Identifier | +| **URL** | Endpoint to call | +| **Method** | GET, POST, PUT, DELETE | +| **Headers** | Request headers | +| **Body** | Request payload | +| **Response Mapping** | Map response to variables | + +### Example: Customer Lookup + + + [IMAGE: API node configured for customer lookup] + + +``` +URL: https://api.example.com/customers +Method: GET +Query: ?phone={{caller_phone}} + +Response Mapping: + data.name → {{api.customer_name}} + data.tier → {{api.account_tier}} +``` + +Then use in a later node: + +``` +"Hello {{api.customer_name}}! I see you're a {{api.account_tier}} +member. How can I help you today?" +``` + +→ Learn more: [API Calls](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) + +--- + +## End Call Node + +Terminates the conversation. + + + [IMAGE: End Call node card with scissors icon] + + +### Configuration + + + [IMAGE: End Call settings panel] + + +| Field | Purpose | +|-------|---------| +| **Name** | Identifier | +| **Closing Message** | Final words before hanging up | + +### Example Closings + + + + ``` + Great! You're all set. Is there anything else I can + help you with today? ... Perfect, thank you for calling + Acme. Have a wonderful day! + ``` + + + + ``` + I'm connecting you now. Thanks for calling Acme, and + have a great day! + ``` + + + + ``` + Thanks so much for your interest. I'll have someone + send over some resources that might be helpful. Take care! + ``` + + + + +**Every path must end.** Make sure all branches eventually reach an End Call or Transfer node. + + +--- + +## Pre-Call API Node + +Fetches data before the conversation begins. + + + [IMAGE: Pre-Call API node with distinctive coloring] + + +### When It Runs + + + [IMAGE: Timeline showing API call happening before Start node] + + +The Pre-Call API runs: +1. Phone rings +2. **Pre-Call API executes** ← Here +3. Data available +4. Conversation starts + +### Use Cases + +| Scenario | API Call | +|----------|----------| +| **CRM Lookup** | Get caller history before greeting | +| **Account Status** | Check if caller has issues | +| **Personalization** | Load name, preferences | +| **Routing** | Determine which flow to use | + +### Example + + + [IMAGE: Pre-Call API configured for customer lookup] + + +``` +URL: https://crm.example.com/lookup?phone={{caller_phone}} + +Response: + customer_name → {{api.name}} + last_issue → {{api.last_ticket}} + tier → {{api.tier}} +``` + +First node can now say: + +``` +"Hi {{api.name}}! Thanks for calling back. Are you still +having trouble with {{api.last_ticket}}?" +``` + +--- + +## Post-Call API Node + +Sends data after the conversation ends. + + + [IMAGE: Post-Call API node with distinctive coloring] + + +### When It Runs + + + [IMAGE: Timeline showing API call happening after End Call] + + +The Post-Call API runs: +1. Conversation ends +2. Call terminates +3. **Post-Call API executes** ← Here +4. Data saved externally + +### Use Cases + +| Scenario | What to Send | +|----------|--------------| +| **CRM Update** | Call summary, outcome | +| **Ticket Creation** | Issue details, priority | +| **Lead Capture** | Collected qualification data | +| **Analytics** | Custom metrics, disposition | + +### Example + + + [IMAGE: Post-Call API configured to update CRM] + + +``` +URL: https://crm.example.com/calls +Method: POST +Body: +{ + "phone": "{{caller_phone}}", + "duration": "{{call_duration}}", + "outcome": "{{disposition}}", + "notes": "{{call_summary}}", + "collected": { + "name": "{{collected.name}}", + "budget": "{{collected.budget}}", + "timeline": "{{collected.timeline}}" + } +} +``` + +--- + +## Choosing the Right Node + + + [IMAGE: Flowchart: "What do you need?" → branches to different nodes] + + +| If you need to... | Use | +|-------------------|-----| +| Ask a question or give info | Default Node | +| Transfer to a human | Transfer Call Node | +| Get external data mid-conversation | API Call Node | +| End the conversation | End Call Node | +| Load data before call starts | Pre-Call API Node | +| Save data after call ends | Post-Call API Node | + +--- + +## Next Steps + + + + Create complex conversation paths + + + Voice, model, and more settings + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/nodes.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/nodes.mdx new file mode 100644 index 0000000..b79e91f --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/nodes.mdx @@ -0,0 +1,360 @@ +--- +title: "Working with Nodes" +sidebarTitle: "Nodes" +description: "Master the building blocks of Convo Flow agents." +--- + +Nodes are the building blocks of every Convo Flow agent. Each node represents a step in your conversation — a greeting, a question, an API call, or a handoff to a human. + +Think of nodes like stations on a train line. Your conversation travels from one station to the next, with branches determining which route to take based on what the caller says. + +--- + +## Node Types + + + Node types + + +| Node | Purpose | When to Use | +|------|---------|-------------| +| **Default** | Speak and listen | Greetings, questions, information delivery | +| **API Call** | Connect to external services | Fetch data, book appointments, update CRMs | +| **Transfer Call** | Hand off to a human | Escalations, complex issues, sales closes | +| **End Call** | Conclude the conversation | Successful completion, dead ends | +| **Pre-Call API** | Actions before the call | Load customer data, set variables | +| **Post-Call API** | Actions after the call | Log outcomes, trigger follow-ups | + +--- + +## Default Node + +The workhorse of Convo Flow. Use it for any conversational turn where your agent speaks and expects a response. + + + Default node + + +### How to Use + +1. Drag **Default Node** from the left panel onto your canvas +2. Click the node to select it +3. In the right panel, write your prompt: + +``` +Hi {{customer_name}}! Thanks for calling Acme Support. +How can I help you today? +``` + +The prompt is what your agent says. Variables like `{{customer_name}}` get replaced with actual data. + +### Setting Up Branches + +Branches determine what happens next based on the caller's response. Click **+ Add Branch** and configure: + +| Field | What It Does | Example | +|-------|--------------|---------| +| **Branch Name** | Internal label for the branch | "Billing Question" | +| **Condition** | When to take this path | "User asks about billing" | +| **Next Node** | Where to go next | → Billing Help node | + +Write conditions in natural language. The AI interprets them: +- "User confirms" +- "User says no or declines" +- "User asks about pricing" +- "User wants to speak to a person" +- "Anything else" (fallback) + + +**Always add a fallback.** Create a branch with condition "Anything else" that handles unexpected responses. Route it back to the same node with a clarification prompt or to a help node. + +**Order matters.** Branches are evaluated top to bottom. Put specific conditions first, fallback last. + +**Keep conditions clear.** "User wants to reschedule" is better than "User mentions changing times or dates or rescheduling or..." + + +--- + +## API Call Node + +Connect your agent to external systems — fetch data, book appointments, update records mid-conversation. + +### How to Use + +1. Drag **API Call** node onto the canvas +2. Configure the request in the right panel: + +| Field | What to Enter | +|-------|---------------| +| **URL** | Your API endpoint (e.g., `https://api.example.com/appointments`) | +| **Method** | GET, POST, PUT, or DELETE | +| **Headers** | Authentication tokens, content-type | +| **Body** | Data to send (supports variables like `{{customer_phone}}`) | + +### Handling Responses + +Map API response fields to variables: + +```json +// API returns: +{ + "available_slots": ["9:00 AM", "2:00 PM", "4:30 PM"], + "next_available": "9:00 AM" +} + +// Map to variables: +{{available_times}} → available_slots +{{suggested_time}} → next_available +``` + +Your agent can now say: *"I have {{suggested_time}} available. Does that work?"* + +### Branching on Results + +Set up branches for different API outcomes: +- **Success** — Continue to confirmation +- **No availability** — Offer waitlist or callback +- **API error** — Apologize and offer to transfer + + +**Calendar Booking** +- Google Calendar API +- Calendly webhooks +- Microsoft Graph + +**CRM Updates** +- Salesforce REST API +- HubSpot API +- Custom CRM endpoints + +**Data Lookup** +- Customer databases +- Order management systems +- Inventory systems + + +--- + +## Transfer Call Node + +Hand the conversation to a human when needed — for escalations, complex issues, or high-value opportunities. + +### How to Use + +1. Drag **Transfer Call** node to your canvas +2. Configure the transfer: + +| Field | What It Does | +|-------|--------------| +| **Transfer To** | Phone number or SIP address of the destination | +| **Whisper Message** | What the human agent hears before connecting (caller doesn't hear this) | +| **Hold Message** | What the caller hears while waiting | + +### Whisper Messages + +Give your human agents context before they pick up: + +``` +Incoming transfer from AI agent. +Customer: {{customer_name}} +Issue: {{issue_type}} +Account: {{account_number}} +``` + +The human knows what's happening before saying hello. + + +**Set expectations.** Before transferring, tell the caller what's happening and roughly how long they'll wait. + +**Pass context.** Use whisper messages so humans don't make callers repeat everything. + +**Have a backup.** Configure what happens if the transfer fails — voicemail, callback offer, or alternate number. + + +--- + +## End Call Node + +Gracefully conclude the conversation. + + + End Call node + + +### How to Use + +1. Drag **End Call** node to mark conversation endpoints +2. Write a closing message: + +``` +Great, you're all set! Your appointment is confirmed for +{{appointment_date}} at {{appointment_time}}. +Have a wonderful day! +``` + +### Dispositions + +Tag the outcome for analytics: + +| Disposition | When to Use | +|-------------|-------------| +| **Successful** | Goal achieved (booking made, issue resolved) | +| **Unsuccessful** | Goal not achieved (not interested, wrong number) | +| **Transferred** | Handed to human | +| **Callback Requested** | Customer asked for follow-up | + +Dispositions help you track how conversations end and measure agent performance. + +--- + +## Pre-Call API Node + +Execute actions *before* your agent says hello. Perfect for loading personalized data. + + + Pre-Call API node + + +### How to Use + +This special node runs immediately when the call connects, before any conversation happens. + +### Common Use Cases + +| Use Case | What to Fetch | +|----------|---------------| +| **Personalization** | Customer name, account status, preferences | +| **Context loading** | Open tickets, recent orders, appointment history | +| **Routing logic** | VIP status, language preference, time zone | + +### Example + +``` +GET https://api.example.com/customers/{{caller_phone}} + +Response → Variables: +{{customer_name}} +{{account_type}} +{{last_order_date}} +``` + +Now your greeting can be: *"Hi {{customer_name}}! I see you placed an order on {{last_order_date}}. How can I help?"* + +--- + +## Post-Call API Node + +Trigger actions *after* the call ends. Great for logging and follow-ups. + + + Post-Call API node + + +### How to Use + +This node runs automatically after every call, regardless of how it ended. + +### Common Use Cases + +| Use Case | What to Send | +|----------|--------------| +| **CRM logging** | Call summary, outcome, duration | +| **Ticket creation** | Create tickets for unresolved issues | +| **Follow-up triggers** | Send email confirmations, SMS receipts | +| **Analytics** | Custom metrics, conversion tracking | + +### Example + +``` +POST https://crm.example.com/activities + +Body: +{ + "contact_phone": "{{caller_phone}}", + "call_duration": "{{call_duration}}", + "outcome": "{{disposition}}", + "summary": "{{call_summary}}", + "agent_id": "voice-bot-001" +} +``` + +--- + +## Connecting Nodes + +Branches are the arrows between nodes — they define your conversation's flow. + +### Creating Connections + +1. Click on a node to select it +2. Scroll to **Branches** in the right panel +3. Click **+ Add Branch** +4. Set the condition and target node +5. Drag from the branch's output handle to the target node + +### Branch Types + +| Type | How It Works | +|------|--------------| +| **Intent-based** | AI interprets caller's meaning — "User wants to reschedule" | +| **Keyword-based** | Matches specific words — "User says 'yes'" | +| **Variable-based** | Checks variable values — `{{account_type}} == 'premium'` | +| **Default** | Catch-all fallback — "Anything else" | + + +Every branch should lead somewhere. Use the **Validation** feature (top right) to find: +- Nodes with no outgoing branches +- Branches pointing to deleted nodes +- Disconnected node clusters + +Fix all validation errors before deploying. + + + +It's fine to loop back (e.g., fallback → same node for clarification), but: +- Set a maximum retry count +- After 2-3 failures, offer transfer or end gracefully +- Don't let callers get stuck in infinite loops + + +--- + +## Variables in Nodes + +Variables make your conversations dynamic. Use them anywhere in prompts. + +### Syntax + +``` +{{variable_name}} +``` + +### System Variables + +| Variable | Contains | +|----------|----------| +| `{{caller_phone}}` | Caller's phone number | +| `{{call_duration}}` | Time elapsed | +| `{{current_date}}` | Today's date | +| `{{current_time}}` | Current time | + +### Custom Variables + +Create your own from: +- **Pre-Call API** responses +- **Contact list** imports (for outbound campaigns) +- **API Call** responses during conversation +- **Collected** from caller responses + +--- + +## Next Steps + + + + Choose how your agent sounds + + + Validate before going live + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/overview.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/overview.mdx new file mode 100644 index 0000000..86576e4 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/overview.mdx @@ -0,0 +1,164 @@ +--- +title: "Conversational Flow Agents" +sidebarTitle: "Overview" +description: "Build structured, goal-oriented agents with visual workflows." +--- + +Conversational Flow agents give you complete control over conversation paths. Design visual workflows with nodes and branches, ensuring callers get exactly the experience you intend. + + + [IMAGE: Visual workflow showing nodes connected with branches, caller moving through] + + +--- + +## How It Works + + + + Create nodes for each conversation step. Connect them to define paths. + + + Define conditions that route callers down different paths based on their responses. + + + As callers speak, the AI guides them through your designed flow. + + + Each path leads to a defined outcome — transfer, booking, qualification, etc. + + + + + [IMAGE: Clean screenshot of workflow builder with node palette, canvas, and example flow] + + +--- + +## When to Use Conversational Flow + + + + - Lead qualification + - Appointment scheduling + - Multi-step intake forms + - Compliance-required conversations + - Processes with clear stages + - When you need specific data collected + + + - Completely open-ended conversations + - Simple FAQ (overkill) + - When flexibility matters more than structure + - Very short interactions + + + +--- + +## Real-World Examples + + + + + [IMAGE: Flow diagram: Greeting → Company Info → Budget → Timeline → Qualify/Disqualify → Transfer] + + + **The flow:** + ``` + [Greeting] → [Company Size?] → [Current Solution?] + → [Budget Range?] → [Timeline?] + → Qualified? → [Transfer to Sales] + → Not Qualified? → [Send Resources] + ``` + + **Why Conversational Flow works:** You need specific information in a specific order. Every lead gets the same thorough qualification. + + + + + [IMAGE: Flow diagram: Verify → Service Type → Date/Time → Confirm → End] + + + **The flow:** + ``` + [Verify Caller] → [What Service?] → [Preferred Date?] + → Check Availability + → Available? → [Confirm Booking] + → Not Available? → [Offer Alternatives] + ``` + + **Why Conversational Flow works:** Booking requires checking availability and confirming multiple details. Branches handle the "what if not available" scenario gracefully. + + + + + [IMAGE: Flow diagram with multiple branches for different coverage types] + + + **The flow:** + ``` + [Verify Identity] → [Reason for Call?] + → New Policy? → [Coverage Type] → [Quote Details] + → Existing Policy? → [Policy Lookup] → [Handle Request] + → File Claim? → [Claim Intake] + ``` + + **Why Conversational Flow works:** Different intents require completely different paths. The visual workflow makes this complexity manageable. + + + +--- + +## Key Concepts + + + [IMAGE: Visual showing nodes, branches, and paths annotated] + + +| Concept | What It Is | Example | +|---------|------------|---------| +| **Node** | A step in the conversation | "Ask about budget" | +| **Branch** | A conditional path | "If budget > $10k, go to Premium Path" | +| **Path** | A route through nodes | Start → Qualify → Transfer | +| **Prompt** | Text in each node | "What's your approximate budget?" | + +--- + +## Pros & Cons + +| Pros | Cons | +|------|------| +| Complete control over conversation | More setup time | +| Predictable, consistent outcomes | Can feel scripted if not done well | +| Easy to ensure data collection | Harder to handle unexpected topics | +| Visual — easy to understand | Requires more maintenance | +| Great for compliance | Branches can get complex | + +--- + +## Get Started + + + | Method | Description | + |--------|-------------| + | **Start from scratch** | Blank canvas. Full control over your workflow design. | + | **Start with Template** | Pre-built flows for common use cases. Customize from there. | + | **Create with AI** | Describe your conversation, and AI generates the flow for you. | + + + + + Build flows from scratch + + + Start with pre-built flows + + + Let AI design your flow + + + + + Master the visual workflow editor + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/quick-start.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/quick-start.mdx new file mode 100644 index 0000000..3218678 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/quick-start.mdx @@ -0,0 +1,210 @@ +--- +title: "Quick Start" +sidebarTitle: "Quick Start" +description: "Create your first Convo Flow agent in under 15 minutes." +--- + +This guide walks you through creating a working Convo Flow agent. By the end, you'll have a multi-step conversation flow you can actually call and test. + +--- + +## Step 1: Create Your Agent + +From your dashboard, click **Create Agent** in the top-right corner. + + + Create Agent button on dashboard + + +--- + +## Step 2: Fill In Basic Details + +A modal appears asking for basic information: + + + Create Agent modal + + +| Field | What to Enter | +|-------|---------------| +| **Agent Name** | Something descriptive — "Appointment Reminder", "Lead Qualifier", etc. | +| **Call Type** | **Outbound** (agent makes calls) or **Inbound** (customers call in) | +| **Voice** | Click to preview voices, pick one that matches your brand | +| **Language** | Primary language for the conversation | + +--- + +## Step 3: Select Conversational Flow + +Choose **Conversational flow agent** — it's labeled "For stronger guardrails". + + + Choosing Convo Flow + + +This gives you access to the visual workflow builder. + +--- + +## Step 4: Choose a Starting Point + +**Start from Scratch** +Opens a blank canvas with just a Start node. Best when you have a specific flow in mind. + +**Use a Template** +Pick a pre-built flow for your industry — Healthcare, E-commerce, Real Estate, Banking, Sales. Templates come with nodes and branches already configured that you can customize. + + +Even if your use case is unique, starting from a template is often faster. Delete what you don't need, keep what works. + + +--- + +## Step 5: Create the Agent + +Click **Create agent**. + +Atoms processes your configuration (about 30 seconds), then drops you into the **Workflow Canvas**. + +--- + +## Step 6: Explore the Canvas + + + Workflow canvas interface + + +Take a moment to orient yourself: + +**Left Panel** — Node palette. This is where you drag nodes from. + +**Center** — Your workflow canvas. You'll see a Start node (or a template flow if you picked one). + +**Right Panel** — Node configuration. Click any node to see its settings here. + +**Top Bar** — Test Agent button, Conversation Logs, Lock Agent toggle. + +--- + +## Step 7: Add Your First Node + +Let's build a simple greeting: + +1. From the left panel, drag a **Default Node** onto the canvas +2. Click the node to select it +3. In the right panel, find the **Prompt** field +4. Write your greeting: + +``` +Hi, this is an automated call from Dr. Smith's office. +Am I speaking with {{customer_name}}? +``` + +The `{{customer_name}}` is a variable that gets replaced with actual data during the call. + +--- + +## Step 8: Connect the Nodes + +Draw a line from your Start node to your greeting node: + +1. Hover over the Start node's output handle (small circle on the right) +2. Click and drag to your greeting node's input handle (circle on the left) +3. Release to create the connection + +Your flow now has a path: Start → Greeting. + +--- + +## Step 9: Add Branches + +The caller will respond to your greeting. You need to handle different responses: + +1. With your greeting node selected, find **Branches** in the right panel +2. Click **+ Add Branch** +3. Configure the first branch: + - **Name:** "Confirmed" + - **Condition:** "User confirms their identity" + - **Next Node:** (we'll create this next) + +4. Add another branch: + - **Name:** "Wrong Person" + - **Condition:** "User says it's the wrong number" + +5. Add a fallback: + - **Name:** "Unclear" + - **Condition:** "Anything else" + +--- + +## Step 10: Build Out the Flow + +Now add nodes for each branch: + +**For "Confirmed"** — Add a Default Node with the appointment details: +``` +Great! I'm calling to remind you about your appointment on +{{appointment_date}} at {{appointment_time}}. +Would you like to confirm or reschedule? +``` + +**For "Wrong Person"** — Add an End Call Node with an apology: +``` +I apologize for the confusion. I must have the wrong number. +Have a great day! +``` + +**For "Unclear"** — Loop back to the greeting with a clarification: +``` +I'm sorry, I didn't quite catch that. +Am I speaking with {{customer_name}}? +``` + +Connect each branch to its corresponding node. + +--- + +## Step 11: Add an End Point + +Every path needs to end somewhere. Drag an **End Call** node and connect it to successful conversation completions: + +``` +Perfect! Your appointment is confirmed for {{appointment_date}} +at {{appointment_time}}. We'll see you then. Have a wonderful day! +``` + +Set the **Disposition** to "Successful" so you can track outcomes. + +--- + +## Step 12: Test Your Flow + +Click **Test Agent** in the top bar. + +1. Choose your testing method (web call or phone) +2. Walk through the conversation as a customer would +3. Try different paths — confirm identity, say wrong number, give unclear responses + +After testing, check **Conversation Logs** to see exactly how the AI interpreted responses and which paths it took. + +--- + +## You're Live! + +Your Convo Flow agent is ready. From here, you can: + + + + Deep dive into each node type + + + Connect to external systems + + + Make your agent live + + + Run outbound calls at scale + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/when-to-use.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/when-to-use.mdx new file mode 100644 index 0000000..18dac8e --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/when-to-use.mdx @@ -0,0 +1,123 @@ +--- +title: "When to Use Conversational Flow" +sidebarTitle: "When to Use" +description: "Determine if Conversational Flow agents are right for your use case." +--- + +Conversational Flow agents give you precise control over every step of a conversation through a visual workflow builder. They're powerful but require more setup than Single Prompt agents. + +--- + +## Ideal Use Cases + +Conversational Flow agents excel when you need **structure and control**: + +### Lead Qualification Workflows + +When you need to ask specific qualifying questions, score responses, and route leads based on criteria, Conversational Flow makes this logic visual and clear. + +### Appointment Booking + +Scheduling with multiple conditions — available times, service types, locations, staff preferences — benefits from defined decision paths. + +### Multi-Step Data Collection + +Insurance intake, loan applications, or any process collecting specific information in a specific order works well with structured flows. + +### Compliance-Required Conversations + +When regulations require specific disclosures, verifications, or sequences, Conversational Flow ensures nothing is skipped. + +### Support Ticket Routing + +Triage calls into specific categories and route to the right team based on a defined decision tree. + +### Surveys and Feedback Collection + +Structured questionnaires with branching logic based on responses. + +--- + +## Not Ideal For + +Conversational Flow is overkill when you need **flexibility over structure**: + +### Completely Open-Ended Conversations + +If callers might discuss anything and you want natural flow, Single Prompt is simpler. + +### Simple FAQ Bots + +For basic Q&A without specific paths, Single Prompt handles variety better. + +### When Flexibility Matters More + +If conversations should feel natural rather than guided, Single Prompt is more appropriate. + +### Quick Prototypes + +Single Prompt is faster to set up when testing ideas. + +--- + +## Decision Checklist + +| Question | If Yes → | +|----------|----------| +| Must steps happen in exact order? | Conversational Flow ✓ | +| Do you need branching based on answers? | Conversational Flow ✓ | +| Are there compliance requirements? | Conversational Flow ✓ | +| Do you need to score or qualify? | Conversational Flow ✓ | +| Is the conversation unpredictable? | Single Prompt | +| Do you want quick setup? | Single Prompt | + +--- + +## What You'll Build + +With Conversational Flow, you create a visual workflow: + +``` +[Start] → [Greeting] → [Ask Question 1] + ↓ + ┌───────────────┼───────────────┐ + ↓ ↓ ↓ + [Option A] [Option B] [Other] + ↓ ↓ ↓ + [Next Step] [Different Path] [Clarify] + ↓ ↓ ↓ + ... ... ... + └───────────────┼───────────────┘ + ↓ + [End Call] +``` + +Each box is a **node**. Lines between them are **branches** with conditions. + +--- + +## Example Scenarios + +**Good for Conversational Flow:** +- "Let me qualify you for our service" +- "I need to verify your identity" +- "Let's schedule your appointment" +- "I have a few questions for our survey" + +**Better for Single Prompt:** +- "I have a random question" +- "Tell me about your company" +- "I'm not sure what I need" + +--- + +## What's Next + + + + Start building a Conversational Flow agent + + + Compare with Single Prompt + + diff --git a/fern/products/atoms/pages/platform/building-agents/convo-flow/workflow-builder.mdx b/fern/products/atoms/pages/platform/building-agents/convo-flow/workflow-builder.mdx new file mode 100644 index 0000000..58cd714 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/convo-flow/workflow-builder.mdx @@ -0,0 +1,301 @@ +--- +title: "Workflow Builder" +sidebarTitle: "Workflow Builder" +description: "Master the visual canvas for designing conversation flows." +--- + +The workflow builder is your visual studio for creating conversation flows. Drag, drop, connect, and design exactly how your agent should guide callers. + + + [IMAGE: Full workflow builder with an example flow, all areas visible] + + +--- + +## Interface Overview + + + + + [IMAGE: Left panel showing all node types and sections] + + + The left panel contains everything you can add to your flow: + + **Nodes Section** + | Node | Icon | Quick Description | + |------|------|-------------------| + | Default | 🔷 | Conversation step | + | Transfer Call | 📞 | Handoff to human | + | API Call | ⬆️ | External data request | + | End Call | ✂️ | Terminate conversation | + | Pre-Call API | 🔵🟠 | Load data before call | + | Post-Call API | 🟠🔵 | Send data after call | + + **Integrations Section** + | Integration | Status | + |-------------|--------| + | Salesforce | Available | + | Google Calendar | Coming Soon | + | HubSpot | Coming Soon | + | Google Sheets | Coming Soon | + + **Help Section** + - Need Help? (expandable) + - Keyboard Shortcuts (expandable) + + + + + [IMAGE: Center canvas area with nodes and connections] + + + The center area is your visual workspace: + + - **Nodes** appear as cards you can move around + - **Connections** are lines between nodes + - **Start node** (green pill) is where every call begins + - **Drag** to pan around the canvas + - **Scroll** to zoom in/out + + + | Action | How | + |--------|-----| + | Pan | Click and drag on empty space | + | Zoom | Scroll wheel or +/- buttons | + | Select node | Click on it | + | Multi-select | Shift + click | + | Delete | Select + Delete/Backspace | + + + + + + [IMAGE: Bottom control bar with all buttons] + + + The bottom bar provides quick actions: + + | Button | Icon | Function | + |--------|------|----------| + | Feedback | 👍 | Send feedback to Atoms | + | Auto-layout | ↕️ | Automatically organize nodes | + | Zoom in | + | Closer view | + | Zoom out | − | Wider view | + + + **Use Auto-layout often.** It keeps your flow readable as it grows. + + + + + + [IMAGE: Right panel showing variables button and panel] + + + Click **{ } Variables** to manage flow-level variables: + + | Tab | Contents | + |-----|----------| + | User Defined | Variables you create | + | System | Platform-provided variables | + | API | Variables from API responses | + + → Learn more: [Variables](/atoms/atoms-platform/single-prompt-agents/configuration-panel/variables) + + + +--- + +## Working with Nodes + +### Adding Nodes + + + [IMAGE: Animation-style showing node being dragged from palette] + + + + + Locate the node type in the left palette. + + + Click, hold, and drag onto the canvas. + + + Let go where you want the node. + + + +### Connecting Nodes + + + [IMAGE: Line being drawn from one node to another] + + + + + Each node has connection handles (small circles on edges). + + + Click and drag from an output handle. + + + Release on another node's input handle. + + + + +Connections flow **top to bottom** and **left to right** by convention. Auto-layout helps maintain this. + + +### Configuring Nodes + + + [IMAGE: Selected node with configuration panel visible] + + +Click any node to open its settings. Each type has different options: + +| Node Type | Configuration Options | +|-----------|----------------------| +| **Default** | Prompt text, output branches, conditions | +| **Transfer** | Phone number, transfer type, messages | +| **API Call** | Endpoint, headers, body, response mapping | +| **End Call** | Closing message | + +### Deleting Nodes + + + [IMAGE: Node selected with delete indication] + + +1. **Select** the node (click on it) +2. Press **Delete** or **Backspace** +3. Confirm if prompted + + +Deleting a node also removes all its connections. Make sure to reconnect orphaned nodes. + + +### Moving Nodes + +Simply drag any node to reposition it on the canvas. + + + [IMAGE: Node being dragged to new position] + + + +After moving nodes manually, click **Auto-layout** to clean up the arrangement. + + +--- + +## Working with Connections + +### Understanding Connections + + + [IMAGE: Close-up of a connection line with labels] + + +Connections define the conversation path: +- **Lines** show flow direction +- **Arrows** indicate where the conversation goes next +- **Labels** can show condition names + +### Editing Connections + + + [IMAGE: Connection line being redirected] + + +| Action | How | +|--------|-----| +| Remove | Click connection → Delete | +| Redirect | Delete and create new | +| Add label | Configure in source node's branch settings | + +### Branch Connections + +A single node can have multiple output connections for branching: + + + [IMAGE: One node with three outgoing connections to different targets] + + +→ Learn more: [Conditions & Branching](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/conditions) + +--- + +## Best Practices + + + + [IMAGE: Side-by-side of organized flow vs tangled flow] + + + - Use **Auto-layout** regularly + - Flow top-to-bottom, left-to-right + - Group related nodes visually + - Don't cross lines if avoidable + + + + + [IMAGE: Nodes with clear names like "Ask Budget" vs generic "Step 3"] + + + | Good Names | Bad Names | + |------------|-----------| + | "Verify Identity" | "Step 1" | + | "Budget Question" | "Node 5" | + | "Transfer to Sales" | "Transfer" | + + + + Don't wait until the flow is complete to test. Validate each section: + + 1. Build a few nodes + 2. Test that section + 3. Continue building + 4. Test the new section + 5. Repeat + + + + Before building, sketch out: + - What questions lead to branches? + - What are all possible answers? + - Where does each path end? + + +--- + +## Keyboard Shortcuts + + + [IMAGE: Shortcuts help panel expanded] + + +| Shortcut | Action | +|----------|--------| +| `Delete` / `Backspace` | Delete selected | +| `Cmd/Ctrl + Z` | Undo | +| `Cmd/Ctrl + Shift + Z` | Redo | +| `Cmd/Ctrl + A` | Select all | +| `Escape` | Deselect | + +--- + +## Next Steps + + + + Understand every node type + + + Create complex conversation paths + + diff --git a/fern/products/atoms/pages/platform/building-agents/create-with-ai.mdx b/fern/products/atoms/pages/platform/building-agents/create-with-ai.mdx new file mode 100644 index 0000000..f83bdf5 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/create-with-ai.mdx @@ -0,0 +1,454 @@ +--- +title: "Create with AI" +sidebarTitle: "Create with AI" +description: "Let AI generate your agent from natural language descriptions." +--- + +Don't want to start from scratch? Describe what you need in plain English, and our AI will generate a complete agent — whether Single Prompt or Conversational Flow. + + + [IMAGE: Full Create with AI page showing configuration panel on left and prompts on right] + + +--- + +## How It Works + + + + Select whether you want a Single Prompt or Conversational Flow agent. + + + + Set voice, model, call direction, and optional knowledge base. + + + + Describe your agent's role, conversation flow, behaviors, and end conditions. + + + + AI generates your complete agent. + + + [IMAGE: Loading modal showing "Analyzing requirements → Designing prompt → Creating your agent"] + + + + + The editor opens with your generated agent. Customize as needed. + + + + +**Pro tip from the platform:** "Use caller context (like past tickets) to greet by name. Even a small touch of personalization boosts trust." + + +--- + +## Configuration Options + + + [IMAGE: Left panel showing all dropdowns and toggles] + + +The left panel controls your agent's foundation: + +### Agent Type + + + [IMAGE: Dropdown showing Single Prompt and Conversational Flow options] + + +| Type | What AI Generates | +|------|-------------------| +| **Single Prompt** | One comprehensive prompt | +| **Conversational Flow** | Complete workflow with nodes and branches | + +### Call Direction + + + [IMAGE: Dropdown showing Inbound and Outbound options] + + +| Direction | Use Case | +|-----------|----------| +| **Inbound** | Agent receives calls (support, inquiries) | +| **Outbound** | Agent makes calls (campaigns, reminders) | + +### Emotive Model + + + [IMAGE: Toggle switch for emotive model] + + + +**Beta Feature:** Emotive models use speech-to-speech technology for more natural, emotionally-aware responses. Experimental but impressive. + + +### Voice Selection + + + [IMAGE: Voice selection dropdown with preview button] + + +Choose the voice your agent will use. Click ▶️ to preview before selecting. + +### Knowledge Base + + + [IMAGE: KB dropdown showing None and available KBs] + + +Optionally attach an existing knowledge base for the agent to reference. + +--- + +## The Four Prompts + + + [IMAGE: Right side with four text areas stacked] + + +Each prompt shapes a different aspect of your agent. All require **minimum 50 characters**. + + + + + [IMAGE: First prompt field highlighted] + + + **What it defines:** Who the agent is and what it's trying to accomplish. + + **Label:** "Provide the role & objective of your agent" + + **Good example:** + ``` + You are a friendly and confident customer support agent for + a fintech platform. Your goal is to quickly understand the + user's issue, respond with empathy, and provide clear, + step-by-step solutions that make them feel heard and supported. + + Maintain a calm and reassuring tone throughout the conversation, + ensuring that the user feels confident in your assistance. Only + mark the issue as resolved once the user has confirmed they are + fully satisfied with the outcome. + ``` + + **Tips:** + - Be specific about personality (friendly, professional, casual) + - State the primary goal clearly + - Mention the industry/context + + + + + [IMAGE: Second prompt field highlighted] + + + **What it defines:** The step-by-step conversation structure. + + **Label:** "Provide conversational flow that the agent should follow" + + **Good example:** + ``` + As a NorthLoop Support agent, greet customers warmly and + confirm they are NorthLoop customers. Ask one focused question + to identify if the issue is billing or connectivity. + + For connectivity, get the service address; for billing, collect + the email or phone number. Verify the account and, if needed, + try an alternate identifier. + + Diagnose the issue: check for outages first, then guide through + reboot or cabling steps. If the outage lasted over four hours, + issue a refund. If local fixes fail, schedule a technician. + + Confirm resolution and next steps, invite questions, and close + politely. + ``` + + **Tips:** + - Describe the conversation as a journey + - Include decision points (if X, then Y) + - Cover the main paths callers will take + + + + + [IMAGE: Third prompt field highlighted] + + + **What it defines:** Behavioral guardrails and edge case handling. + + **Label:** "Set your agent's dos, don'ts, and fallback behaviors — especially for edge cases or sensitive situations" + + **Good example:** + ``` + Your agent should always remain calm, polite, and solution-oriented. + + DO: + - Listen actively and show empathy + - Provide clear next steps at every stage + - Confirm understanding before proceeding + - Escalate politely when needed + + DON'T: + - Make assumptions about the issue + - Overpromise on timelines or resolutions + - Share unverified information + - Rush the customer + + FALLBACKS: + - Account lookup failures → Offer alternate verification methods + - Incomplete verification → Politely re-request information + - Unclear issues → Ask clarifying questions, don't guess + - System limitations → Acknowledge honestly and offer alternatives + + Always prioritize accuracy, empathy, and clarity over speed. + ``` + + **Tips:** + - List specific behaviors, not vague goals + - Cover sensitive scenarios (angry callers, edge cases) + - Define what to do when things go wrong + + + + + [IMAGE: Fourth prompt field highlighted] + + + **What it defines:** When and how the conversation should end. + + **Label:** "Specify the conditions under which the call should end" + + **Good example:** + ``` + End the call when: + + 1. Issue fully resolved and customer confirms satisfaction + - Thank them for calling + - Offer additional help before closing + + 2. Customer requests to end the call + - Acknowledge and close gracefully + - Offer callback if issue unresolved + + 3. Successful transfer to human agent + - Confirm transfer is happening + - Brief goodbye + + 4. Customer becomes unresponsive + - After 3 attempts to re-engage + - Leave callback information + + 5. Issue outside scope + - Acknowledge limitation + - Provide alternative resources + - End with helpful next steps + ``` + + **Tips:** + - Cover all the ways a call might end + - Include what to say in each scenario + - Don't forget edge cases (no response, wrong department) + + + +--- + +## Using Templates + + + [IMAGE: Row of template tabs: Real Estate, Credit Card, Customer Support, etc.] + + +Templates pre-fill all four prompts with tested content for common use cases. + +### Available Templates + +| Template | Best For | +|----------|----------| +| **Real Estate** | Property inquiries, viewing scheduling | +| **Credit Card** | Card services, payment questions | +| **Customer Support Electronics** | Tech support, troubleshooting | +| **General Sales** | Lead qualification, product info | +| **Onboarding Assist** | New user guidance, setup help | + +### How to Use + + + + + [IMAGE: Template tab being selected] + + + + + All four prompt fields populate with template content. + + + [IMAGE: All four fields now containing text] + + + + + Update company names, policies, and specifics for your business. + + + + Click **Create agent** to generate. + + + +### Clear All + + + [IMAGE: Clear All button location] + + +Click **Clear All** to reset all prompts and start fresh. + +--- + +## Tips for Single Prompt Agents + +When generating Single Prompt agents: + + + Since there's no visual flow, the prompt needs to cover everything the agent should know and how it should behave. + + Include: + - Detailed role description + - All topics it should handle + - Communication style + - Edge case handling + + + + Even without visual nodes, describe the typical conversation arc: + + ``` + Start with a warm greeting. Ask how you can help. + Listen to the issue. Acknowledge and confirm understanding. + Provide the solution or collect needed information. + Confirm resolution. Offer additional help. Close warmly. + ``` + + + + Single Prompt agents should handle topic jumps gracefully: + + ``` + If the customer changes topics, acknowledge the shift and + adapt. Don't force them back to previous subjects. + ``` + + +--- + +## Tips for Conversational Flow Agents + +When generating Conversational Flow agents: + + + Tell the AI exactly when paths should split: + + ``` + After greeting, ask if calling about billing or technical. + + BILLING PATH: + - Verify account + - Ask about specific charge + - Explain or process refund + + TECHNICAL PATH: + - Identify product + - Troubleshoot steps + - Schedule technician if needed + ``` + + + + List what information must be gathered and when: + + ``` + Must collect before qualifying: + 1. Company name + 2. Company size (employees) + 3. Current solution + 4. Budget range + 5. Decision timeline + ``` + + + + Every path needs a conclusion: + + ``` + End points: + - Qualified lead → Transfer to sales with context + - Not qualified → Send resources, end call + - Requested callback → Schedule and confirm + - Angry customer → Escalate to manager + ``` + + +--- + +## After Generation + +Once AI creates your agent: + + + [IMAGE: Editor with AI-generated content loaded] + + + + + Read through the generated prompt (Single Prompt) or walk through the flow (Conversational Flow). + + + + Click **Test Agent** to try a conversation before making changes. + + + + Adjust anything that doesn't match your vision: + - Edit prompts + - Rearrange nodes + - Add missing branches + - Update conditions + + + + Add features from the Configuration Panel: + - End Call triggers + - Transfer numbers + - Knowledge Base + - Variables + - API Calls + + + +--- + +## Next Steps + + + + Refine your Single Prompt agent + + + Deprecated + + Master workflow building + + + Voice, model, and features + + + Validate before deploying + + diff --git a/fern/products/atoms/pages/platform/building-agents/create-with-ai/configuration.mdx b/fern/products/atoms/pages/platform/building-agents/create-with-ai/configuration.mdx new file mode 100644 index 0000000..8166c35 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/create-with-ai/configuration.mdx @@ -0,0 +1,135 @@ +--- +title: "Configuration Options" +sidebarTitle: "Configuration Options" +description: "Settings available when using Create with AI." +--- + +When using Create with AI, the left panel contains configuration options that shape your generated agent. This page explains each setting. + + + [IMAGE: Left panel of Create with AI with all fields visible] + + +--- + +## Configuration Fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| Select agent type | Dropdown | Yes | Single prompt or Conversational flow | +| Select agent call direction | Dropdown | Yes | Inbound or Outbound | +| Use Emotive Model | Toggle | No | Enable speech-to-speech with emotion | +| Select a voice | Dropdown | Yes | Choose from voice library | +| Select knowledge base | Dropdown | No | Attach existing KB | + +--- + +## Agent Type + +Choose what type of agent the AI will generate: + +| Option | Description | +|--------|-------------| +| **Single prompt** | AI generates one comprehensive prompt for flexible conversations | +| **Conversational flow** | AI creates a visual workflow with nodes and branches | + +**How to choose:** +- Single prompt → Open-ended, flexible conversations +- Conversational flow → Structured, goal-oriented conversations + +--- + +## Call Direction + +Define whether this agent receives or makes calls: + +| Direction | Description | Use Cases | +|-----------|-------------|-----------| +| **Inbound** | Agent receives calls from customers | Support, FAQ, orders | +| **Outbound** | Agent makes calls to customers | Reminders, campaigns, follow-ups | + +This affects how the AI generates the opening of conversations: +- Inbound: Agent answers and greets caller +- Outbound: Agent initiates and introduces purpose + +--- + +## Emotive Model (Beta) + +Toggle to enable speech-to-speech AI with emotional expression. + +| State | Behavior | +|-------|----------| +| **OFF** | Standard text-to-speech, consistent tone | +| **ON** | Emotive model with natural emotional variation | + +**When to use Emotive Model:** +- Conversations requiring warmth and empathy +- Sales calls needing enthusiasm +- Support calls needing patience + +**Considerations:** +- Beta feature — may have quirks +- More natural sounding +- Slightly higher latency possible + +--- + +## Voice Selection + +Choose the voice your agent will use. + +Clicking the dropdown opens the voice picker: +- Search by language, accent +- Filter by gender, age, model +- Preview voices before selecting + +**Tip:** Choose a voice that matches your brand personality: +- Professional service → Clear, neutral voice +- Friendly support → Warm, approachable voice +- Sales → Energetic, confident voice + +--- + +## Knowledge Base + +Optionally attach a knowledge base for the agent to reference. + +| Option | Description | +|--------|-------------| +| **None** | Agent relies only on prompt | +| **[Your KB]** | Agent can reference KB content | + +**When to attach:** +- Agent needs to answer detailed questions +- You have FAQs, docs, or product info +- Accuracy from specific sources matters + +**Note:** You must create a Knowledge Base first before it appears here. See [Knowledge Base](/atoms/atoms-platform/features/knowledge-base) for setup. + +--- + +## Field Requirements + +| Field | Validation | +|-------|------------| +| Agent type | Must select one | +| Call direction | Must select one | +| Voice | Must select one | +| Emotive Model | Optional toggle | +| Knowledge base | Optional selection | + +You cannot proceed without completing required fields. + +--- + +## What's Next + + + + Master each required prompt + + + Start from pre-built examples + + diff --git a/fern/products/atoms/pages/platform/building-agents/create-with-ai/four-prompts.mdx b/fern/products/atoms/pages/platform/building-agents/create-with-ai/four-prompts.mdx new file mode 100644 index 0000000..7c32689 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/create-with-ai/four-prompts.mdx @@ -0,0 +1,189 @@ +--- +title: "Writing the Four Prompts" +sidebarTitle: "Writing the Four Prompts" +description: "Master each required prompt section in Create with AI." +--- + +Create with AI requires four prompts that define your agent's behavior. Each prompt focuses on a specific aspect, and together they create a complete picture for the AI to work from. + + + [IMAGE: Right panel showing all four prompt input fields] + + + +All prompts require a minimum of 50 characters. + + +--- + +## Prompt 1: Role & Objective + +**Label:** "Provide the role & objective of your agent*" + +**Purpose:** Define who the agent is and what it's trying to achieve. + +### What to Include + +- Agent's identity/persona +- Company they represent +- Primary goal of conversations +- Tone and personality +- Key responsibilities + +### Example + +``` +You are a friendly and confident customer support agent for a +fintech platform. Your goal is to quickly understand the user's +issue, respond with empathy, and provide clear, step-by-step +solutions that make them feel heard and supported. Maintain a +calm and reassuring tone throughout the conversation, ensuring +that the user feels confident in your assistance. Only mark the +issue as resolved once the user has confirmed they are fully +satisfied with the outcome. +``` + +### Tips + +- Be specific about personality ("friendly and confident" not just "helpful") +- State the goal clearly +- Include tone guidance + +--- + +## Prompt 2: Conversational Flow + +**Label:** "Provide conversational flow that the agent should follow*" + +**Purpose:** Define the step-by-step structure of conversations. + +### What to Include + +- How to open conversations +- Key questions to ask +- Order of information gathering +- How to handle different scenarios +- When to provide what information + +### Example + +``` +As a NorthLoop Support agent, greet customers warmly and confirm +they are NorthLoop customers. Ask one focused question to identify +if the issue is billing or connectivity. For connectivity, get the +service address; for billing, collect the email or phone number. +Verify the account and, if needed, try an alternate identifier. +Diagnose the issue: check for outages first, then guide through +reboot or cabling steps. If the outage lasted over four hours, +issue a refund. If local fixes fail, schedule a technician. +Confirm resolution and next steps, invite questions, and close +politely. +``` + +### Tips + +- Think step-by-step +- Cover different paths (billing vs. technical) +- Include resolution confirmation + +--- + +## Prompt 3: Dos, Don'ts & Fallbacks + +**Label:** "Set your agent's dos, don'ts, and fallback behaviors -- especially for edge cases or sensitive situations*" + +**Purpose:** Define behavioral guardrails and edge case handling. + +### What to Include + +- Things the agent should always do +- Things the agent should never do +- How to handle difficult situations +- Escalation triggers +- Fallback responses + +### Example + +``` +Your agent should always remain calm, polite, and solution-oriented. +Do listen actively, show empathy, and provide clear next steps at +every stage. Don't make assumptions, overpromise, or share +unverified information. In edge cases like account lookup failures, +incomplete verification, or unclear issues, fall back to offering +general guidance, escalate politely when needed, and reassure the +customer that their concern will be handled promptly. Always +prioritize accuracy, empathy, and clarity over speed. +``` + +### Tips + +- Be explicit about what NOT to do +- Include fallback behaviors +- Cover sensitive situations (angry customers, privacy) + +--- + +## Prompt 4: End Conditions + +**Label:** "Specify the conditions under which the call should end*" + +**Purpose:** Define when and how conversations should conclude. + +### What to Include + +- Successful resolution triggers +- Customer-initiated endings +- Transfer completions +- Timeout/unresponsive handling +- Failed conversation handling + +### Examples + +``` +Issue fully resolved and customer confirms satisfaction +``` + +``` +Customer requests to end the call +``` + +``` +Successful transfer to human agent completed +``` + +``` +Customer becomes unresponsive after 3 attempts to re-engage +``` + +### Tips + +- Cover positive endings (resolved) +- Cover neutral endings (customer done) +- Cover negative endings (can't help) +- Include unresponsive scenarios + +--- + +## Complete Example + +Here's how all four prompts work together: + +| Prompt | Summary | +|--------|---------| +| **1. Role & Objective** | "You are Alex, a support agent for TechCorp. Help with orders and returns. Be friendly and patient." | +| **2. Conversational Flow** | "Greet warmly. Ask if calling about existing order or new question. For orders, get order number. Verify. Help resolve. Confirm satisfaction." | +| **3. Dos, Don'ts & Fallbacks** | "Do: verify before changes. Don't: promise refunds over $100. Fallback: transfer to supervisor for complex issues." | +| **4. End Conditions** | "End when: issue resolved and confirmed, customer says goodbye, successful transfer, or no response after 3 tries." | + +--- + +## What's Next + + + + Start from pre-built examples + + + Review the full process + + diff --git a/fern/products/atoms/pages/platform/building-agents/create-with-ai/how-it-works.mdx b/fern/products/atoms/pages/platform/building-agents/create-with-ai/how-it-works.mdx new file mode 100644 index 0000000..de3b624 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/create-with-ai/how-it-works.mdx @@ -0,0 +1,149 @@ +--- +title: "How Create with AI Works" +sidebarTitle: "How It Works" +description: "AI-assisted agent creation for faster setup." +--- + +Create with AI accelerates agent building by generating prompts from your descriptions. Instead of writing everything from scratch, you describe what you want, and AI helps create it. + + + [IMAGE: Create with AI full page showing left config panel and right prompts panel] + + +--- + +## Overview + +Create with AI is a guided process that: + +1. Takes your configuration choices (agent type, voice, etc.) +2. Accepts your descriptions of what the agent should do +3. Generates a complete agent based on your input +4. Delivers a fully customizable agent in the standard editor + +Think of it as a smart starting point. You provide the requirements, AI does the heavy lifting, and you refine the result. + +--- + +## The Process + +### Step 1: Select Create with AI + +From the "Create Agent" modal, click **"Create with AI"**. + +### Step 2: Configure Left Panel + +Set basic agent settings: +- Agent type (Single Prompt or Conversational Flow) +- Call direction (Inbound or Outbound) +- Voice selection +- Knowledge Base (optional) +- Emotive Model toggle (optional) + +### Step 3: Fill Four Prompts (Right Panel) + +Describe your agent across four sections: +1. **Role & Objective** — Who is the agent and what's the goal? +2. **Conversational Flow** — How should conversations progress? +3. **Dos, Don'ts & Fallbacks** — Behavioral guidelines +4. **End Conditions** — When should calls end? + +Each prompt requires at least 50 characters. + +### Step 4: Click Create + +Click **"Create agent"** button. + +### Step 5: Wait for Generation + +A loading modal appears: + + + [IMAGE: Loading modal with progress steps] + + +``` +Hold up, your agent is getting ready + +✅ Analyzing requirements +✅ Designing prompt +⏳ Creating your agent +``` + +Generation takes about 30-60 seconds. + +### Step 6: Review and Customize + +After generation, you land in the standard agent editor. Everything is fully editable: +- Modify the generated prompt +- Adjust voice settings +- Add knowledge base +- Configure all options + +--- + +## What Gets Generated + +Depending on your agent type: + +**Single Prompt Agent:** +- Complete prompt covering all four sections +- Voice and model configuration +- Ready for testing + +**Conversational Flow Agent:** +- Visual workflow with nodes +- Prompts for each node +- Basic branching structure +- Ready for refinement + +--- + +## Pro Tips + + +**Use caller context (like past tickets) to greet by name. Even a small touch of personalization boosts trust.** + + +### Be Specific in Descriptions + +The more detail you provide, the better the output: + +**Vague:** "Help customers with orders" + +**Specific:** "Help customers track existing orders, process returns within 30 days, and answer questions about shipping times. Always verify the order number before providing details." + +### Use Templates as Starting Points + +If your use case is similar to a template, start there. Templates provide well-structured examples that AI can build on. + +### Plan to Refine + +AI generation is a starting point, not a final product. Expect to: +- Review and adjust prompts +- Test thoroughly +- Iterate based on conversations + +--- + +## After Generation + +Your agent is ready but not deployed. Next steps: + +1. **Test** — Use Test Agent to verify behavior +2. **Refine** — Adjust prompts based on testing +3. **Configure** — Set up end call, transfers, webhooks +4. **Deploy** — Assign phone number or embed widget + +--- + +## What's Next + + + + Understand all settings + + + Master each prompt section + + diff --git a/fern/products/atoms/pages/platform/building-agents/create-with-ai/templates.mdx b/fern/products/atoms/pages/platform/building-agents/create-with-ai/templates.mdx new file mode 100644 index 0000000..252606d --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/create-with-ai/templates.mdx @@ -0,0 +1,165 @@ +--- +title: "Using Templates" +sidebarTitle: "Using Templates" +description: "Start from pre-built examples to accelerate agent creation." +--- + +Templates provide pre-written prompts for common use cases. Instead of starting from scratch, you can use a template as a foundation and customize it for your needs. + + + [IMAGE: Template tabs showing Real Estate, Credit Card, Customer Support, General Sales, Onboarding Assist] + + +--- + +## Available Templates + +| Template | Use Case | +|----------|----------| +| **Real Estate** | Property inquiries, scheduling viewings, buyer/seller support | +| **Credit Card** | Card services, payment inquiries, account management | +| **Customer Support Electronics** | Tech support, troubleshooting, warranty questions | +| **General Sales** | Lead qualification, product information, demo scheduling | +| **Onboarding Assist** | New user guidance, setup help, feature introduction | + +--- + +## How to Use Templates + +### Step 1: Open Create with AI + +From the "Create Agent" modal, select **"Create with AI"**. + +### Step 2: Click a Template Tab + +At the top of the right panel, you'll see template tabs. Click the one closest to your use case. + +### Step 3: Review Pre-Filled Content + +The template fills all four prompt sections with example content tailored to that use case. + +### Step 4: Customize + +Edit each prompt to match your specific needs: +- Change company names +- Adjust products/services mentioned +- Modify procedures to match your process +- Update escalation paths + +### Step 5: Create + +Click **"Create agent"** to generate. + +--- + +## Template Details + +### Real Estate + +Pre-filled for property-focused conversations: +- Greeting potential buyers/sellers +- Qualifying interest level +- Scheduling property viewings +- Answering property questions +- Capturing contact information + +### Credit Card + +Pre-filled for card service support: +- Account verification +- Balance and payment inquiries +- Card activation +- Dispute handling +- Security concerns + +### Customer Support Electronics + +Pre-filled for tech support: +- Troubleshooting common issues +- Warranty verification +- Return/exchange process +- Technical guidance +- Escalation to specialists + +### General Sales + +Pre-filled for lead engagement: +- Initial qualification +- Understanding needs +- Product/service matching +- Demo scheduling +- Follow-up coordination + +### Onboarding Assist + +Pre-filled for new user support: +- Welcome and orientation +- Feature walkthroughs +- Setup assistance +- Best practices +- Resource direction + +--- + +## Customizing Templates + +Templates are starting points. Always customize: + +### Replace Placeholder Content + +Templates use generic names like "[Company Name]" or "[Product]". Replace these with your actual information. + +### Adjust Procedures + +Your processes may differ from the template. Update steps to match how your team actually works. + +### Add Specifics + +Templates are generic by design. Add: +- Your specific products/services +- Your pricing (if applicable) +- Your escalation procedures +- Your verification requirements + +### Remove Irrelevant Sections + +If a template includes something you don't need, remove it. Simpler is often better. + +--- + +## Clear All Button + +If you've selected a template but want to start fresh: + +1. Find the **"Clear All"** button +2. Click to reset all four prompts to blank +3. Start writing from scratch or choose a different template + +--- + +## Best Practices + +### Choose the Closest Match + +If no template is perfect, choose the closest one. It's easier to modify existing content than start from nothing. + +### Test Before Deploying + +Template-based agents still need testing. Run through conversations to verify the generated agent works for your use case. + +### Iterate + +Your first version won't be perfect. Use conversation logs to identify issues and refine the prompts. + +--- + +## What's Next + + + + Master writing effective prompts + + + Explore full configuration options + + diff --git a/fern/products/atoms/pages/platform/building-agents/images/README.md b/fern/products/atoms/pages/platform/building-agents/images/README.md new file mode 100644 index 0000000..bac4bf9 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/images/README.md @@ -0,0 +1,178 @@ +# Building Agents — Image Guide + +Drop screenshots and diagrams here. Name them exactly as listed below. + +--- + +## Overview Page + +| Filename | What to capture | +|----------|-----------------| +| `create-agent-modal.png` | The "Create a new agent" modal showing all 3 options (Start from scratch, Start with Template, Create with AI) | + +--- + +## Single Prompt Section + +| Filename | What to capture | +|----------|-----------------| +| `sp-editor-full.png` | Full Single Prompt editor with a sample prompt filled in. Show top bar, prompt area, right sidebar. | +| `sp-editor-blank.png` | Blank Single Prompt editor (fresh agent) | +| `sp-test-modal.png` | "Test Agent" modal showing the 3 test modes | +| `sp-voice-picker.png` | Voice picker panel with filters visible | +| `sp-config-panel.png` | Right sidebar configuration panel with toggles visible | + +--- + +## Conversational Flow Section + +| Filename | What to capture | +|----------|-----------------| +| `cf-builder-full.png` | Workflow builder with a sample flow (5-7 nodes connected) | +| `cf-builder-blank.png` | Empty workflow builder canvas | +| `cf-node-palette.png` | Left panel node palette showing all node types | +| `cf-node-config.png` | A node selected with its configuration panel open | +| `cf-branch-example.png` | A node with multiple branches/conditions visible | + +--- + +## Create with AI + +| Filename | What to capture | +|----------|-----------------| +| `ai-create-full.png` | Full Create with AI page (left config panel + right prompts) | +| `ai-create-loading.png` | The loading modal ("Hold up, your agent is getting ready") | +| `ai-templates.png` | Template tabs visible (Real Estate, Credit Card, etc.) | + +--- + +## Configuring Section + +| Filename | What to capture | +|----------|-----------------| +| `config-editor-overview.png` | Full editor with all areas labeled/annotated | +| `config-model-dropdown.png` | Model selection dropdown expanded | +| `config-voice-picker.png` | Voice picker panel open | +| `end-call-modal.png` | Add End Call modal (Name + Description fields) | +| `transfer-call-modal.png` | Transfer Call configuration modal | +| `transfer-call-warm.png` | Warm transfer options (expanded modal) | +| `variables-user.png` | User Defined variables tab | +| `variables-system.png` | System variables tab | +| `variables-api.png` | API variables tab | +| `api-call-basic.png` | API Call modal - top section (Name, Description, LLM Params, URL, Method, Timeout) | +| `api-call-advanced.png` | API Call modal - bottom section (Headers, Query Params, Response Variable Extraction) | +| `config-webhook-modal.png` | Webhook configuration modal | + +### Voice Settings + +| Filename | What to capture | +|----------|-----------------| +| `voice-settings.png` | Full Voice tab with all sections visible | +| `add-pronunciation.png` | Add Pronunciation modal (Word + Pronunciation fields) | + +### Model Settings + +| Filename | What to capture | +|----------|-----------------| +| `model-settings-sp.png` | Model tab for Single Prompt agent | +| `model-settings-cf.png` | Model tab for Conversational Flow agent (shows Global Prompt + KB) | + +### Phone Number + +| Filename | What to capture | +|----------|-----------------| +| `phone-number-settings.png` | Phone Number tab showing dropdown | + +### General Settings + +| Filename | What to capture | +|----------|-----------------| +| `general-settings.png` | General tab showing 3 timeout fields | + +### Widget + +| Filename | What to capture | +|----------|-----------------| +| `widget-embed.png` | Embed code section | +| `widget-mode.png` | Mode selection (Chat/Voice) | +| `widget-allowlist.png` | Allowlist configuration | +| `widget-tiny.png` | Widget in Tiny variant | +| `widget-compact.png` | Widget in Compact variant | +| `widget-full.png` | Widget in Full variant | + +### Post-Call Metrics + +| Filename | What to capture | +|----------|-----------------| +| `post-call-metrics-list.png` | Landing page showing list of configured metrics | +| `post-call-disposition.png` | Disposition Metrics form (creating from scratch) | +| `post-call-templates.png` | Templates selection panel | + +--- + +## Testing & Launch + +| Filename | What to capture | +|----------|-----------------| +| `test-webcall.png` | Web Call test interface | +| `test-telephony.png` | Telephony test interface | +| `test-chat.png` | Chat test interface | +| `lock-agent.png` | Lock Agent toggle | +| `convo-logs-list.png` | Call Logs landing page with list of calls | +| `convo-logs-overview.png` | Call details - Overview tab | +| `convo-logs-transcript.png` | Call details - Transcript tab | +| `convo-logs-events.png` | Call details - Events tab | +| `convo-logs-metrics.png` | Call details - Metrics tab | + +--- + +## Deployment — Phone Numbers + +| Filename | What to capture | +|----------|-----------------| +| `phone-numbers.png` | Main Phone Numbers page with list | +| `rent-number.png` | Rent Number modal | +| `import-sip.png` | Import SIP Number modal | + +--- + +## Deployment — Audiences + +| Filename | What to capture | +|----------|-----------------| +| `audiences.png` | Main Audiences list page | +| `upload-csv.png` | Step 1: Upload CSV with consent warning | +| `map-phone.png` | Step 2: Map Phone Number with dropdown + CSV preview | +| `add-contacts.png` | Step 3: Audience Name and Description fields | +| `audience-members.png` | Individual audience view with members table | +| `add-manually.png` | Add members modal — "Add Manually" tab | +| `import-csv.png` | Add members modal — "Import CSV" tab | + +--- + +## Deployment — Campaigns + +| Filename | What to capture | +|----------|-----------------| +| `campaigns.png` | Main Campaigns list page | +| `create-campaign.png` | Create Campaign form with all fields | +| `campaign-call-logs.png` | Campaign detail — Call Logs tab | +| `campaign-events.png` | Campaign detail — Campaign Events tab | +| `campaign-executions.png` | Campaign detail — Executions tab | + +--- + +## Analytics + +| Filename | What to capture | +|----------|-----------------| +| `analytics.png` | Full Analytics dashboard with filters, summary cards, charts, and Most Called Agents table | + +--- + +## Notes + +- **Format:** PNG preferred, keep under 500KB each +- **Size:** Capture at 2x resolution if possible for retina displays +- **Annotations:** If you want callouts/arrows, add them before saving +- **Sensitive data:** Blur or use fake data for any real customer info diff --git "a/fern/products/atoms/pages/platform/building-agents/images/Screenshot 2026-01-28 at 9.04.27\342\200\257AM.png" "b/fern/products/atoms/pages/platform/building-agents/images/Screenshot 2026-01-28 at 9.04.27\342\200\257AM.png" new file mode 100644 index 0000000..229cbdd Binary files /dev/null and "b/fern/products/atoms/pages/platform/building-agents/images/Screenshot 2026-01-28 at 9.04.27\342\200\257AM.png" differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/add-contacts.png b/fern/products/atoms/pages/platform/building-agents/images/add-contacts.png new file mode 100644 index 0000000..3bfe0d1 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/add-contacts.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/add-manually.png b/fern/products/atoms/pages/platform/building-agents/images/add-manually.png new file mode 100644 index 0000000..b65b0c7 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/add-manually.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/add-pronunciation.png b/fern/products/atoms/pages/platform/building-agents/images/add-pronunciation.png new file mode 100644 index 0000000..8e89a3c Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/add-pronunciation.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/analytics.png b/fern/products/atoms/pages/platform/building-agents/images/analytics.png new file mode 100644 index 0000000..f1916c0 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/analytics.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/api-call-advanced.png b/fern/products/atoms/pages/platform/building-agents/images/api-call-advanced.png new file mode 100644 index 0000000..23fa11c Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/api-call-advanced.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/api-call-basic.png b/fern/products/atoms/pages/platform/building-agents/images/api-call-basic.png new file mode 100644 index 0000000..9c08aa0 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/api-call-basic.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/audience-members.png b/fern/products/atoms/pages/platform/building-agents/images/audience-members.png new file mode 100644 index 0000000..2c0b26b Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/audience-members.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/audiences.png b/fern/products/atoms/pages/platform/building-agents/images/audiences.png new file mode 100644 index 0000000..5239ddc Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/audiences.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/browse-select-template.png b/fern/products/atoms/pages/platform/building-agents/images/browse-select-template.png new file mode 100644 index 0000000..573d28d Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/browse-select-template.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/campaign-call-logs.png b/fern/products/atoms/pages/platform/building-agents/images/campaign-call-logs.png new file mode 100644 index 0000000..5fdfd8b Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/campaign-call-logs.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/campaign-events.png b/fern/products/atoms/pages/platform/building-agents/images/campaign-events.png new file mode 100644 index 0000000..6cf0688 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/campaign-events.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/campaign-executions.png b/fern/products/atoms/pages/platform/building-agents/images/campaign-executions.png new file mode 100644 index 0000000..b9ea357 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/campaign-executions.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/campaigns.png b/fern/products/atoms/pages/platform/building-agents/images/campaigns.png new file mode 100644 index 0000000..81f65c4 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/campaigns.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-add-pronunciation.png b/fern/products/atoms/pages/platform/building-agents/images/cf-add-pronunciation.png new file mode 100644 index 0000000..8e89a3c Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-add-pronunciation.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-branching-example.png b/fern/products/atoms/pages/platform/building-agents/images/cf-branching-example.png new file mode 100644 index 0000000..d7f0cf3 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-branching-example.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-editor-settings-tab.png b/fern/products/atoms/pages/platform/building-agents/images/cf-editor-settings-tab.png new file mode 100644 index 0000000..7744c07 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-editor-settings-tab.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-editor-workflow-tab.png b/fern/products/atoms/pages/platform/building-agents/images/cf-editor-workflow-tab.png new file mode 100644 index 0000000..98417e1 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-editor-workflow-tab.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-general-settings.png b/fern/products/atoms/pages/platform/building-agents/images/cf-general-settings.png new file mode 100644 index 0000000..76f653e Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-general-settings.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-languages-tab.png b/fern/products/atoms/pages/platform/building-agents/images/cf-languages-tab.png new file mode 100644 index 0000000..045fd96 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-languages-tab.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-model-settings.png b/fern/products/atoms/pages/platform/building-agents/images/cf-model-settings.png new file mode 100644 index 0000000..0d7f85d Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-model-settings.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-node-api-call.png b/fern/products/atoms/pages/platform/building-agents/images/cf-node-api-call.png new file mode 100644 index 0000000..26f6a36 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-node-api-call.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-node-default.png b/fern/products/atoms/pages/platform/building-agents/images/cf-node-default.png new file mode 100644 index 0000000..969e042 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-node-default.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-node-end-call.png b/fern/products/atoms/pages/platform/building-agents/images/cf-node-end-call.png new file mode 100644 index 0000000..faafda4 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-node-end-call.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-node-post-call-api.png b/fern/products/atoms/pages/platform/building-agents/images/cf-node-post-call-api.png new file mode 100644 index 0000000..7694840 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-node-post-call-api.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-node-pre-call-api.png b/fern/products/atoms/pages/platform/building-agents/images/cf-node-pre-call-api.png new file mode 100644 index 0000000..a438661 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-node-pre-call-api.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-node-transfer-call.png b/fern/products/atoms/pages/platform/building-agents/images/cf-node-transfer-call.png new file mode 100644 index 0000000..c5d1310 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-node-transfer-call.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-phone-number.png b/fern/products/atoms/pages/platform/building-agents/images/cf-phone-number.png new file mode 100644 index 0000000..0042961 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-phone-number.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-variables-panel.png b/fern/products/atoms/pages/platform/building-agents/images/cf-variables-panel.png new file mode 100644 index 0000000..969e042 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-variables-panel.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-voice-settings.png b/fern/products/atoms/pages/platform/building-agents/images/cf-voice-settings.png new file mode 100644 index 0000000..3b5d8ac Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-voice-settings.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/cf-workflow-builder.png b/fern/products/atoms/pages/platform/building-agents/images/cf-workflow-builder.png new file mode 100644 index 0000000..98417e1 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/cf-workflow-builder.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/choose-start-with-template.png b/fern/products/atoms/pages/platform/building-agents/images/choose-start-with-template.png new file mode 100644 index 0000000..b1d6fff Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/choose-start-with-template.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/configure-agent-editor.png b/fern/products/atoms/pages/platform/building-agents/images/configure-agent-editor.png new file mode 100644 index 0000000..209d2f5 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/configure-agent-editor.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/convo-logs-events.png b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-events.png new file mode 100644 index 0000000..d3ab6d7 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-events.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/convo-logs-list.png b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-list.png new file mode 100644 index 0000000..6adfe14 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-list.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/convo-logs-metrics.png b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-metrics.png new file mode 100644 index 0000000..483f774 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-metrics.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/convo-logs-overview.png b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-overview.png new file mode 100644 index 0000000..63e7247 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-overview.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/convo-logs-transcript.png b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-transcript.png new file mode 100644 index 0000000..27848a5 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/convo-logs-transcript.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/create-agent-modal.png b/fern/products/atoms/pages/platform/building-agents/images/create-agent-modal.png new file mode 100644 index 0000000..00e52d2 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/create-agent-modal.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/create-campaign.png b/fern/products/atoms/pages/platform/building-agents/images/create-campaign.png new file mode 100644 index 0000000..c9b2158 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/create-campaign.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/create-with-ai-modal.png b/fern/products/atoms/pages/platform/building-agents/images/create-with-ai-modal.png new file mode 100644 index 0000000..10c9aac Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/create-with-ai-modal.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/create-with-ai.png b/fern/products/atoms/pages/platform/building-agents/images/create-with-ai.png new file mode 100644 index 0000000..efe2dcc Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/create-with-ai.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/customize-template-editor.png b/fern/products/atoms/pages/platform/building-agents/images/customize-template-editor.png new file mode 100644 index 0000000..8fcd36e Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/customize-template-editor.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/dashboard.png b/fern/products/atoms/pages/platform/building-agents/images/dashboard.png new file mode 100644 index 0000000..18bf541 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/dashboard.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/end-call-modal.png b/fern/products/atoms/pages/platform/building-agents/images/end-call-modal.png new file mode 100644 index 0000000..f22e0f7 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/end-call-modal.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/general-settings.png b/fern/products/atoms/pages/platform/building-agents/images/general-settings.png new file mode 100644 index 0000000..93b4afc Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/general-settings.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/import-csv.png b/fern/products/atoms/pages/platform/building-agents/images/import-csv.png new file mode 100644 index 0000000..c9c6250 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/import-csv.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/import-sip.png b/fern/products/atoms/pages/platform/building-agents/images/import-sip.png new file mode 100644 index 0000000..4f7593c Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/import-sip.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/integrations.png b/fern/products/atoms/pages/platform/building-agents/images/integrations.png new file mode 100644 index 0000000..23c3ee0 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/integrations.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/kb-add-document.png b/fern/products/atoms/pages/platform/building-agents/images/kb-add-document.png new file mode 100644 index 0000000..cc9245a Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/kb-add-document.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/kb-create-modal.png b/fern/products/atoms/pages/platform/building-agents/images/kb-create-modal.png new file mode 100644 index 0000000..c490b73 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/kb-create-modal.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/kb-with-documents.png b/fern/products/atoms/pages/platform/building-agents/images/kb-with-documents.png new file mode 100644 index 0000000..a45c1f5 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/kb-with-documents.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/language-settings.png b/fern/products/atoms/pages/platform/building-agents/images/language-settings.png new file mode 100644 index 0000000..5b7109b Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/language-settings.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/lock-agent.png b/fern/products/atoms/pages/platform/building-agents/images/lock-agent.png new file mode 100644 index 0000000..4553f60 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/lock-agent.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/map-phone.png b/fern/products/atoms/pages/platform/building-agents/images/map-phone.png new file mode 100644 index 0000000..44a82ca Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/map-phone.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/model-dropdown.png b/fern/products/atoms/pages/platform/building-agents/images/model-dropdown.png new file mode 100644 index 0000000..915ccbf Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/model-dropdown.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/model-settings-cf.png b/fern/products/atoms/pages/platform/building-agents/images/model-settings-cf.png new file mode 100644 index 0000000..f17300b Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/model-settings-cf.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/model-settings-sp.png b/fern/products/atoms/pages/platform/building-agents/images/model-settings-sp.png new file mode 100644 index 0000000..2f7f993 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/model-settings-sp.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/phone-number-settings.png b/fern/products/atoms/pages/platform/building-agents/images/phone-number-settings.png new file mode 100644 index 0000000..447ae5a Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/phone-number-settings.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/phone-numbers.png b/fern/products/atoms/pages/platform/building-agents/images/phone-numbers.png new file mode 100644 index 0000000..3d17608 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/phone-numbers.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/post-call-disposition.png b/fern/products/atoms/pages/platform/building-agents/images/post-call-disposition.png new file mode 100644 index 0000000..57c1ee0 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/post-call-disposition.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/post-call-metrics-list.png b/fern/products/atoms/pages/platform/building-agents/images/post-call-metrics-list.png new file mode 100644 index 0000000..192b8cb Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/post-call-metrics-list.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/post-call-templates.png b/fern/products/atoms/pages/platform/building-agents/images/post-call-templates.png new file mode 100644 index 0000000..404477e Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/post-call-templates.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/prompt-editor.png b/fern/products/atoms/pages/platform/building-agents/images/prompt-editor.png new file mode 100644 index 0000000..9aa3bfa Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/prompt-editor.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/rent-number.png b/fern/products/atoms/pages/platform/building-agents/images/rent-number.png new file mode 100644 index 0000000..7d08718 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/rent-number.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/salesforce-connect.png b/fern/products/atoms/pages/platform/building-agents/images/salesforce-connect.png new file mode 100644 index 0000000..b7e7046 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/salesforce-connect.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/single-prompt-select.png b/fern/products/atoms/pages/platform/building-agents/images/single-prompt-select.png new file mode 100644 index 0000000..bfb42e5 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/single-prompt-select.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/sp-editor.png b/fern/products/atoms/pages/platform/building-agents/images/sp-editor.png new file mode 100644 index 0000000..9aa3bfa Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/sp-editor.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/start-from-scratch.png b/fern/products/atoms/pages/platform/building-agents/images/start-from-scratch.png new file mode 100644 index 0000000..8ed16de Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/start-from-scratch.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/template-gallery.png b/fern/products/atoms/pages/platform/building-agents/images/template-gallery.png new file mode 100644 index 0000000..a0496be Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/template-gallery.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/test-agent-from-template.png b/fern/products/atoms/pages/platform/building-agents/images/test-agent-from-template.png new file mode 100644 index 0000000..c2712c6 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/test-agent-from-template.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/test-chat.png b/fern/products/atoms/pages/platform/building-agents/images/test-chat.png new file mode 100644 index 0000000..e61207d Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/test-chat.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/test-telephony.png b/fern/products/atoms/pages/platform/building-agents/images/test-telephony.png new file mode 100644 index 0000000..81d904d Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/test-telephony.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/test-webcall.png b/fern/products/atoms/pages/platform/building-agents/images/test-webcall.png new file mode 100644 index 0000000..166166f Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/test-webcall.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/transfer-call-modal.png b/fern/products/atoms/pages/platform/building-agents/images/transfer-call-modal.png new file mode 100644 index 0000000..835c0a5 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/transfer-call-modal.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/transfer-call-warm.png b/fern/products/atoms/pages/platform/building-agents/images/transfer-call-warm.png new file mode 100644 index 0000000..ffdabbe Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/transfer-call-warm.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/upload-csv.png b/fern/products/atoms/pages/platform/building-agents/images/upload-csv.png new file mode 100644 index 0000000..919f358 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/upload-csv.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/variables-api.png b/fern/products/atoms/pages/platform/building-agents/images/variables-api.png new file mode 100644 index 0000000..0cea222 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/variables-api.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/variables-system.png b/fern/products/atoms/pages/platform/building-agents/images/variables-system.png new file mode 100644 index 0000000..9336235 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/variables-system.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/variables-user.png b/fern/products/atoms/pages/platform/building-agents/images/variables-user.png new file mode 100644 index 0000000..1883754 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/variables-user.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/voice-picker.png b/fern/products/atoms/pages/platform/building-agents/images/voice-picker.png new file mode 100644 index 0000000..c7fe423 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/voice-picker.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/voice-settings.png b/fern/products/atoms/pages/platform/building-agents/images/voice-settings.png new file mode 100644 index 0000000..4c6b20f Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/voice-settings.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/webhook-agent.png b/fern/products/atoms/pages/platform/building-agents/images/webhook-agent.png new file mode 100644 index 0000000..5cf932b Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/webhook-agent.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/webhook-create.png b/fern/products/atoms/pages/platform/building-agents/images/webhook-create.png new file mode 100644 index 0000000..f227cf1 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/webhook-create.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/webhook-detail.png b/fern/products/atoms/pages/platform/building-agents/images/webhook-detail.png new file mode 100644 index 0000000..683d4b9 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/webhook-detail.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/webhook-modal.png b/fern/products/atoms/pages/platform/building-agents/images/webhook-modal.png new file mode 100644 index 0000000..03eaca7 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/webhook-modal.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/webhooks-dashboard.png b/fern/products/atoms/pages/platform/building-agents/images/webhooks-dashboard.png new file mode 100644 index 0000000..747d8dd Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/webhooks-dashboard.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/widget-compact.png b/fern/products/atoms/pages/platform/building-agents/images/widget-compact.png new file mode 100644 index 0000000..5cbcc0d Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/widget-compact.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/widget-full-variant.png b/fern/products/atoms/pages/platform/building-agents/images/widget-full-variant.png new file mode 100644 index 0000000..c2398b4 Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/widget-full-variant.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/widget-full.png b/fern/products/atoms/pages/platform/building-agents/images/widget-full.png new file mode 100644 index 0000000..e68035a Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/widget-full.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/images/widget-tiny.png b/fern/products/atoms/pages/platform/building-agents/images/widget-tiny.png new file mode 100644 index 0000000..b5d95ed Binary files /dev/null and b/fern/products/atoms/pages/platform/building-agents/images/widget-tiny.png differ diff --git a/fern/products/atoms/pages/platform/building-agents/overview.mdx b/fern/products/atoms/pages/platform/building-agents/overview.mdx new file mode 100644 index 0000000..fff5936 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/overview.mdx @@ -0,0 +1,116 @@ +--- +title: "Building Agents" +sidebarTitle: "Overview" +description: "Understand voice AI agents and choose the right type for your use case." +--- + +## What is an Agent? + +An agent is an AI-powered voice assistant that handles phone conversations in real time. It listens, understands, and responds — all within milliseconds. Agents can answer questions, look up information, take actions through connected tools, and guide callers through processes, whether that's checking an order, booking an appointment, or resolving a support issue. + +Unlike basic IVR systems or scripted bots, agents reason through conversations. They understand context, remember what was said earlier in the call, and adapt their responses accordingly. To the caller, it feels like talking to a knowledgeable human. + +--- + +## Two Ways to Build + +Atoms gives you two approaches. They solve different problems. + + + + **One prompt. Infinite conversations.** + + You write a single set of instructions — who the agent is, what it knows, how it behaves. The AI handles whatever comes next. + + Single Prompt agents can use **tools** dynamically. Need to check inventory? Look up an order? Book an appointment? The agent decides what to do based on the conversation. + + ``` + You're Alex from Rivian support. Help owners with + charging, features, and service questions. + + You can: + - Look up vehicle details by VIN + - Check service center availability + - Schedule appointments + + Be helpful and genuine. If something needs hands-on + service, offer to book it. + ``` + + The caller asks about charging, then mentions a weird noise, then wants to schedule service — the agent adapts, using the right tools as needed. + + + + **A visual workflow. Predictable outcomes.** + + You design the conversation as a flowchart — steps for each part, branches for different responses. The AI guides callers through your defined paths. + + ``` + [Greeting] → [Verify Identity] → [What's the issue?] + ↓ + ┌─────────────────────┼─────────────────────┐ + ↓ ↓ ↓ + [Billing] [Technical] [Other] + ↓ ↓ ↓ + [Lookup Account] [Troubleshoot] [Transfer] + ``` + + Every caller goes through the same steps. You control exactly what gets asked, when tools get called, and where each path leads. Nothing is left to chance. + + + +--- + +## Which Should You Choose? + +| | Single Prompt | Conversational Flow | +|---|---|---| +| **How you build** | Write one prompt | Design a visual flowchart | +| **Conversation style** | Flexible, adapts on the fly | Structured, follows defined paths | +| **Tool usage** | Agent decides when to use tools | You define exactly when tools run | +| **Best for** | Support, FAQ, info lines | Lead qualification, booking, intake | +| **Setup time** | Minutes | Longer, but more control | + +**Decision guide:** + +| If you need... | Choose | +|----------------|--------| +| Quick setup with minimal configuration | Single Prompt | +| Agent to handle unpredictable questions | Single Prompt | +| Specific data collected in a specific order | Conversational Flow | +| Different paths based on caller responses | Conversational Flow | +| Compliance with a defined script | Conversational Flow | +| Flexibility to handle topic changes | Single Prompt | + + +**Not sure?** Start with Single Prompt. It's faster to set up, and you'll quickly learn whether you need more structure. + + +--- + +## Get Started + +To create an agent, head to **Agents** in the sidebar, then click the **Create Agent** button in the top right. + + + ![Create agent modal](images/create-agent-modal.png) + + +| Method | What it is | +|--------|------------| +| **Start from scratch** | Blank canvas. Full control over every setting. | +| **Start with Template** | Pre-built starting points for common use cases like support, sales, or booking. | +| **Create with AI** | Describe what you want in plain language, and AI generates the agent for you. | + +All three methods work for both Single Prompt and Conversational Flow agents. + + + + Flexible conversations powered by one prompt + + + Deprecated + + Structured workflows with visual builder + + diff --git a/fern/products/atoms/pages/platform/building-agents/single-prompt/ai-assisted.mdx b/fern/products/atoms/pages/platform/building-agents/single-prompt/ai-assisted.mdx new file mode 100644 index 0000000..35d7f7b --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/single-prompt/ai-assisted.mdx @@ -0,0 +1,91 @@ +--- +title: "AI-Assisted Creation" +sidebarTitle: "AI-Assisted" +description: "Let AI help you build your Single Prompt agent." +--- + +Don't want to start from scratch? Describe what you need, and our AI will generate a complete Single Prompt agent for you. + + + [IMAGE: Create with AI page showing configuration panel and prompt inputs] + + +--- + +## How It Works + + + + Fill in four prompts describing what you want your agent to do. + + + Choose voice, model, and optional knowledge base. + + + AI generates your complete agent. + + + Your agent opens in the editor — tweak as needed. + + + + + [IMAGE: Loading modal showing "Analyzing requirements → Designing prompt → Creating your agent"] + + +--- + +## Single Prompt AI Tips + +When using Create with AI for Single Prompt agents: + + + The more context you provide about the agent's purpose, the better the generated prompt. + + **Instead of:** "Customer support agent" + + **Try:** "A friendly customer support agent for a B2B software company that helps users with account issues, billing questions, and basic troubleshooting" + + + + Include personality traits in your description — the AI will weave them into the prompt. + + **Example:** "Warm and approachable, uses simple language, occasionally adds light humor" + + + + List the main subjects your agent should handle. The AI will structure the prompt around these. + + **Example:** "Handles questions about pricing tiers, feature comparisons, trial extensions, and upgrade paths" + + + + Tell the AI what the agent should NOT do — this creates important guardrails. + + **Example:** "Should never quote specific prices, always directs pricing questions to sales team" + + +--- + +## Learn More + +The full Create with AI guide covers everything — from the four prompts to templates: + + + Complete guide to AI-assisted agent creation + + +--- + +## After Generation + +Once AI generates your agent, it opens in the standard editor. From there: + + + + Polish the AI-generated content + + + Adjust voice, model, and features + + diff --git a/fern/products/atoms/pages/platform/building-agents/single-prompt/creating.mdx b/fern/products/atoms/pages/platform/building-agents/single-prompt/creating.mdx new file mode 100644 index 0000000..c0f0f8a --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/single-prompt/creating.mdx @@ -0,0 +1,154 @@ +--- +title: "Creating a Single Prompt Agent" +sidebarTitle: "Creating Your Agent" +description: "Step-by-step guide to building a Single Prompt agent." +--- + +This guide walks you through creating a Single Prompt agent from scratch. By the end, you'll have a working agent ready for testing. + + + [IMAGE: Blank single prompt editor with Prompt Section, Configuration Panel, and Left Sidebar labeled] + + +--- + +## Steps to Create + +### Step 1: Start Creation + +1. Click the green **"Create Agent"** button (top right of dashboard) +2. Select **"Start from scratch"** +3. Choose **"Single Prompt"** + +The agent editor opens. + +--- + +### Step 2: Understand the Layout + +The editor has three main areas: + +| Area | Location | Purpose | +|------|----------|---------| +| **Prompt Section** | Top bar | Model, Voice, Language dropdowns | +| **Prompt Editor** | Center | Write your agent's instructions | +| **Configuration Panel** | Right sidebar | End Call, Transfer, KB, Variables, API Calls | + +**Left Sidebar Navigation:** + +| Item | Icon | Description | +|------|------|-------------| +| Prompt | 📄 | Main prompt editor (you are here) | +| Agent Settings | ⚙️ | Voice, Model, Phone, Webhook, General tabs | +| Widget | `` | Embed widget configuration | +| Integrations | 🔗 | Third-party connections | +| Post Call Metrics | 📊 | Analytics configuration | + +**Top Right Actions:** + +| Button | Function | +|--------|----------| +| Convo Logs | View conversation transcripts | +| Lock Agent | Prevent accidental edits | +| Test Agent | Test via Telephony/Web/Chat | + +**Header Info:** +- Back arrow (←) — return to agents list +- Agent name — click to rename +- Agent ID — click to copy + +--- + +### Step 3: Configure Basic Settings + +**In the Prompt Section (top bar):** + +1. **Model** — Select your LLM (GPT-4o recommended) +2. **Voice** — Choose from the voice library +3. **Language** — Set primary language + +Click each dropdown to see options. For voices, use the play button to preview. + +--- + +### Step 4: Write Your Prompt + +In the center Prompt Editor, write instructions for your agent. A good prompt includes: + +1. **Role** — Who is the agent? +2. **Objective** — What should it accomplish? +3. **Key Information** — What should it know? +4. **Guidelines** — How should it behave? +5. **End Conditions** — When should it end calls? + +**Example Prompt:** + +``` +You are an agent which tells the users about Smallest.ai. + +Smallest.ai is a unified AI platform that specializes in real-time +applications using small language models (SLMs). The company focuses +on providing fast, efficient, and hyper-personalized AI solutions. + +Key advantages to mention: +1. Small Models: Easier integrations and faster fine-tuning +2. Hyper Personalization: SLMs that actively learn from interactions +3. Minimal Latency: 100ms streaming responses +4. On Edge Deployment: Works from mobile to enterprise clouds +5. Low Cost: 10x cheaper than traditional large models + +Be friendly, concise, and helpful. Keep responses under 30 words +when possible. If you don't know something, say so honestly. +``` + +--- + +### Step 5: Configure Right Panel (Optional) + +The Configuration Panel offers additional features: + +| Feature | Purpose | +|---------|---------| +| End Call | Define when AI terminates calls | +| Transfer Call | Set up human handoffs | +| Knowledge Base | Attach reference documents | +| Variables | Add dynamic values | +| API Calls | Connect external services | + +Toggle each feature ON and click the settings icon (⚙️) to configure. + +--- + +### Step 6: Test Your Agent + +1. Click **"Test Agent"** (top right) +2. Choose test mode: + - **Telephony Call** — Real phone call + - **Web Call** — Browser voice call + - **Chat** — Text-only +3. Have a conversation +4. Check Convo Logs to review + +--- + +## Your Agent is Ready + +You now have a working Single Prompt agent. From here, you can: + +- Refine your prompt based on test conversations +- Add a Knowledge Base for more information +- Configure voice settings in Agent Settings +- Deploy to a phone number or widget + +--- + +## What's Next + + + + Master prompt engineering + + + Explore all configuration options + + diff --git a/fern/products/atoms/pages/platform/building-agents/single-prompt/from-template.mdx b/fern/products/atoms/pages/platform/building-agents/single-prompt/from-template.mdx new file mode 100644 index 0000000..0795392 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/single-prompt/from-template.mdx @@ -0,0 +1,236 @@ +--- +title: "From Template" +sidebarTitle: "From Template" +description: "Jumpstart your Single Prompt agent with pre-built templates." +--- + +Templates give you a proven starting point. Pick one that matches your use case, customize it, and you're ready to go. + + + [IMAGE: Grid of template cards showing different use cases] + + +--- + +## Available Templates + + + + Handle support inquiries, troubleshooting, and general help. Pre-configured with common support patterns. + + + Qualify leads, answer product questions, and guide potential customers. + + + Handle property inquiries, schedule viewings, and collect buyer/renter information. + + + Handle card services, payment inquiries, and account questions. + + + Technical troubleshooting and product support for electronics. + + + Guide new users through setup and initial questions. + + + +--- + +## How to Use a Template + + + + From your dashboard, click the green **Create Agent** button. + + + [IMAGE: Dashboard with Create Agent button highlighted] + + + + + Choose the second option in the modal. + + + [IMAGE: Modal with "Start with Template" highlighted] + + + + + Select the Single Prompt agent type. + + + [IMAGE: Single Prompt option selected] + + + + + Explore available templates. Click any to preview. + + + [IMAGE: Template cards with preview option] + + + + + Click **Use Template** to load it into the editor. + + + [IMAGE: Editor with template prompt pre-filled] + + + + +--- + +## What Templates Include + +Each template comes pre-configured with: + +| Component | What's Included | +|-----------|-----------------| +| **Prompt** | Complete prompt with role, objectives, guidelines | +| **Suggested Voice** | Voice that fits the use case | +| **Example Scenarios** | Common conversations the agent should handle | +| **Best Practices** | Tips specific to that use case | + + + [IMAGE: Editor showing template with annotations pointing to included elements] + + +--- + +## Customizing Your Template + +Templates are starting points, not final products. Always customize for your business. + + + + Replace placeholder text with your specifics: + + ```diff + - You are a support agent for [Company]. + + You are a support agent for Acme Corporation. + + - Our support hours are [hours]. + + Our support hours are 9am-6pm EST. + ``` + + + [IMAGE: Prompt with [Company] and [hours] placeholders visible] + + + + + Update with your actual business rules: + + ```diff + - Returns accepted within [X] days. + + Returns accepted within 30 days with receipt. + + - Shipping takes [X-X] business days. + + Shipping takes 3-5 business days. + ``` + + + [IMAGE: Prompt section with policy text highlighted] + + + + + Adjust the personality to match your brand: + + ```diff + - Be professional and courteous. + + Be warm, friendly, and occasionally use casual language. + + Feel free to use emojis when appropriate! 😊 + ``` + + + [IMAGE: Behavioral guidelines section of prompt] + + + + + +**Always test after customizing.** Template defaults work well together — your changes might need fine-tuning. + + +--- + +## Template Example: Customer Support + +Here's what the Customer Support template looks like: + + + [IMAGE: Full editor view with Customer Support template loaded] + + +``` +You are a friendly customer support agent for [Company Name]. Your +mission is to help customers quickly and efficiently while maintaining +a positive, helpful attitude. + +WHAT YOU HANDLE: +- Account questions and updates +- Billing and payment inquiries +- Product troubleshooting +- Returns and exchanges +- General information + +KEY POLICIES: +- Returns: [X] days with receipt +- Support hours: [hours] +- Response time: [timeframe] + +HOW TO BEHAVE: +- Be friendly and patient +- Listen before solving +- Confirm understanding +- Keep responses concise (under 30 words) +- Offer alternatives when possible +- Know when to escalate + +IF YOU CAN'T HELP: +- Acknowledge the limitation honestly +- Offer to transfer to a specialist +- Provide alternative contact methods + +END THE CALL WHEN: +- Issue is resolved and confirmed +- Customer thanks you or says goodbye +- You've successfully transferred them +``` + +--- + +## Tips for Template Success + + + Even if no template is perfect, pick the closest one. It's easier to modify an existing structure than start from nothing. + + + + Templates are organized intentionally. Keep the section headers and overall structure — just replace the content. + + + + After customizing, test with actual questions your customers ask. Use Convo Logs to refine. + + + + Review conversation logs after launch. Update your prompt based on what works and what doesn't. + + +--- + +## Next Steps + + + + Master prompt engineering + + + Validate before deploying + + diff --git a/fern/products/atoms/pages/platform/building-agents/single-prompt/manual-setup.mdx b/fern/products/atoms/pages/platform/building-agents/single-prompt/manual-setup.mdx new file mode 100644 index 0000000..00ce4be --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/single-prompt/manual-setup.mdx @@ -0,0 +1,292 @@ +--- +title: "Manual Setup" +sidebarTitle: "Manual Setup" +description: "Build a Single Prompt agent from scratch with complete control." +--- + +Starting from scratch gives you a blank canvas and full control. This guide walks you through every step. + + + [IMAGE: Dashboard with green Create Agent button highlighted] + + +--- + +## Step 1: Start Creation + + + + Find the green **Create Agent** button in the top right of your dashboard. + + + [IMAGE: Close-up of Create Agent button] + + + + + In the modal that appears, choose the first option. + + + [IMAGE: Modal with "Start from scratch" option highlighted] + + + + + Select the Single Prompt agent type. + + + [IMAGE: Modal showing Single Prompt option with icon] + + + + +The editor opens with a blank canvas ready for your prompt. + +--- + +## Step 2: Meet the Editor + + + [IMAGE: Full editor screenshot with numbered callouts for each area] + + +### Editor Layout + +| Area | Location | What It Does | +|------|----------|--------------| +| **Prompt Section** | Top bar | Model, voice, and language selection | +| **Prompt Editor** | Center | Where you write your agent's instructions | +| **Config Panel** | Right sidebar | End call, transfer, KB, variables, APIs | +| **Navigation** | Left sidebar | Switch between Prompt, Settings, Widget, etc. | + + + | Item | Purpose | + |------|---------| + | **Prompt** | Main prompt editor (current view) | + | **Agent Settings** | Voice, model, phone, webhook, general tabs | + | **Widget** | Embed configuration for web | + | **Integrations** | Third-party connections | + | **Post Call Metrics** | Analytics setup | + + +--- + +## Step 3: Configure Basics + +Before writing your prompt, set up the fundamentals in the **Prompt Section** (top bar). + + + [IMAGE: Top bar showing Model, Voice, Language dropdowns] + + +### Select Your Model + +Click the **Model** dropdown to choose your LLM. + + + [IMAGE: Dropdown showing available models like GPT-4o] + + + +**GPT-4o** is recommended for most use cases — great balance of capability and speed. + + +→ Learn more: [Model Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/model-selection) + +### Select Your Voice + +Click the **Voice** dropdown to open the voice picker. + + + [IMAGE: Voice picker showing search, filters, and voice list with play buttons] + + +- Use **filters** to narrow by language, accent, gender, age +- Click **▶️** to preview any voice +- Click a voice name to select it + +→ Learn more: [Voice Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/voice-selection) + +### Select Language + +Choose the primary language for your agent's responses. + + + [IMAGE: Language dropdown showing available languages] + + + +Available languages depend on your selected voice. Choose voice first. + + +→ Learn more: [Language Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/language-selection) + +--- + +## Step 4: Write Your Prompt + +Now the important part — telling your agent who to be and how to behave. + + + [IMAGE: Clean prompt editor with blinking cursor] + + +### Prompt Structure + +A great prompt covers these elements: + + + + Who is this agent? What company do they represent? + + ``` + You are a friendly customer service agent for Acme Inc. + ``` + + + + What's the agent trying to accomplish? + + ``` + Your goal is to help customers with questions about + their orders, returns, and account issues. + ``` + + + + What does the agent need to know? + + ``` + Key policies: + - Returns accepted within 30 days + - Free shipping on orders over $50 + - Support hours: 9am-6pm EST + ``` + + + + How should the agent communicate? + + ``` + Be friendly and patient. Keep responses concise + (under 30 words when possible). If you don't know + something, say so honestly. + ``` + + + + When should the call end? + + ``` + End the call when: + - The customer's issue is resolved + - They explicitly say goodbye + - You've transferred them to a human + ``` + + + +### Complete Example + + + [IMAGE: Editor showing complete prompt below] + + +``` +You are a friendly customer service agent for Acme Inc. Your goal +is to help customers with questions about orders, returns, and +account issues. + +Key policies: +- Returns accepted within 30 days with receipt +- Free shipping on orders over $50 +- Support hours: 9am-6pm EST Monday-Friday + +Behavior guidelines: +- Be friendly, patient, and professional +- Keep responses under 30 words when possible +- If you don't know something, say so honestly +- Offer to transfer to a human for complex issues + +End the call when: +- The customer's issue is resolved +- They say goodbye or thank you +- You've transferred them to a specialist +``` + +→ Deep dive: [Writing Effective Prompts](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts) + +--- + +## Step 5: Configure Features (Optional) + +The **Configuration Panel** (right sidebar) offers additional capabilities. + + + [IMAGE: Right sidebar showing End Call, Transfer Call, KB, Variables, API toggles] + + +| Feature | What It Does | Learn More | +|---------|--------------|------------| +| **End Call** | Define when/how AI ends calls | [End Call →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/end-call) | +| **Transfer Call** | Set up human handoffs | [Transfer Call →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/transfer-call) | +| **Knowledge Base** | Attach reference documents | [Knowledge Base →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/knowledge-base) | +| **Variables** | Use dynamic values in prompts | [Variables →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/variables) | +| **API Calls** | Connect external services | [API Calls →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) | + +Toggle any feature **ON**, then click the **⚙️** icon to configure. + +--- + +## Step 6: Test Your Agent + +Time to hear your agent in action. + + + [IMAGE: Top right showing Test Agent button, with modal showing three test modes] + + + + + Find it in the top right corner of the editor. + + + + | Mode | Best For | + |------|----------| + | **Web Call** | Quick voice testing in browser | + | **Telephony** | Real phone call experience | + | **Chat** | Text-only logic testing | + + + + Test different scenarios. Try edge cases. + + + + Click **Convo Logs** to see the transcript and identify improvements. + + + +→ Learn more: [Testing Your Agent](/atoms/atoms-platform/analytics-logs/testing) + +--- + +## You're Live! + +Your Single Prompt agent is ready. From here you can: + + + + Master prompt engineering + + + Voice, model, webhooks & more + + + Give your agent reference docs + + + Assign a real phone number + + diff --git a/fern/products/atoms/pages/platform/building-agents/single-prompt/overview.mdx b/fern/products/atoms/pages/platform/building-agents/single-prompt/overview.mdx new file mode 100644 index 0000000..6d3d349 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/single-prompt/overview.mdx @@ -0,0 +1,104 @@ +--- +title: "Single Prompt Agents" +sidebarTitle: "Overview" +description: "Build flexible, conversational agents powered by a single prompt." +--- + +A Single Prompt agent runs on one set of instructions. You write a prompt that defines who the agent is, what it knows, and how it should behave — and that prompt governs the entire conversation. The AI interprets your instructions and applies them dynamically, adapting to whatever direction the caller takes. + +--- + +## How It Works + +Think of your prompt as a briefing. You're telling the agent: here's your role, here's what you know, here's how to handle situations. The AI internalizes this once and then uses that understanding for every exchange. + +When a caller speaks, the agent doesn't follow a script. It reasons through the conversation based on your instructions. This is why Single Prompt agents feel natural — they're not jumping between pre-written responses, they're thinking through each moment. + +--- + +## Capabilities + +**Dynamic tool usage.** You can connect your agent to APIs, databases, and external services. The agent decides when to use them based on the conversation. If a caller asks about their order, the agent can look it up. If they want to book something, it can check availability. + +**Conversation memory.** Everything said in the call stays in context. The agent remembers details from earlier in the conversation and can reference them naturally. + +**Handling the unexpected.** Without a rigid flow, the agent adapts to topic changes, follow-up questions, and tangents. Real conversations rarely follow a straight line — Single Prompt agents are designed for that reality. + +--- + +## Building a Single Prompt Agent + +You'll create three things: + +**1. The Prompt** + +This is the core. Your prompt should cover: +- **Identity** — Who is this agent? What's their name, role, personality? +- **Knowledge** — What do they know? Products, policies, FAQs, context. +- **Behavior** — How should they sound? What's off-limits? How do they handle edge cases? +- **Endings** — When should the call wrap up? When should they transfer? + +**2. Tools** (optional) + +If you want the agent to take actions — look up records, check calendars, create tickets — you'll configure the tools it can access and describe when to use them. + +**3. Voice and Model** + +Pick the voice your agent speaks with and the AI model that powers its reasoning. + +--- + +## The Editor + +Once you create a Single Prompt agent, you land in the editor — your workspace for everything. + + + ![Single Prompt editor](../images/sp-editor-full.png) + + +| Area | Location | What It Does | +|------|----------|--------------| +| **Prompt Section** | Top bar | Model, voice, and language selection | +| **Prompt Editor** | Center | Where you write your agent's instructions | +| **Config Panel** | Right sidebar | End call, transfer, knowledge base, variables, APIs | +| **Navigation** | Left sidebar | Switch between Prompt, Settings, Widget, Integrations | + +→ [Full editor guide](/atoms/atoms-platform/single-prompt-agents/overview) + +--- + +## After You Launch + +Once your agent is live, refinement happens in a few places: + +**Prompt updates.** You'll review call logs, find where the agent struggled, and tighten your instructions. Most improvements come from prompt iteration. + +**Voice tuning.** Adjust speech speed, add pronunciation rules for tricky words, tweak turn-taking behavior. + +**Tool adjustments.** Add new capabilities, modify API connections, or change when tools get triggered. + +**Configuration.** Fine-tune end call conditions, transfer settings, timeout behavior, and more. + +--- + +## Get Started + + + | Method | Description | + |--------|-------------| + | **Start from scratch** | Blank canvas. Full control over every setting. | + | **Start with Template** | Pre-built prompts for common use cases. Customize from there. | + | **Create with AI** | Describe what you want, and AI generates the prompt for you. | + + + + + Set up your agent + + + Voice, model, tools & more + + + Validate before going live + + diff --git a/fern/products/atoms/pages/platform/building-agents/single-prompt/quick-start.mdx b/fern/products/atoms/pages/platform/building-agents/single-prompt/quick-start.mdx new file mode 100644 index 0000000..174ec6e --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/single-prompt/quick-start.mdx @@ -0,0 +1,164 @@ +--- +title: "Quick Start" +sidebarTitle: "Quick Start" +description: "Create your first Single Prompt agent in under 10 minutes." +--- + +This guide walks you through creating a working Single Prompt agent. By the end, you'll have an agent you can actually call and talk to. + +--- + +## Step 1: Start Creating + +From your dashboard, click **Create Agent** in the top-right corner. + + + Create Agent button on dashboard + + +--- + +## Step 2: Fill In Basic Details + +A modal appears asking for basic information: + + + Create Agent modal + + +| Field | What to Enter | +|-------|---------------| +| **Agent Name** | Something descriptive — "Customer Support Bot", "Sales Assistant", etc. | +| **Call Type** | **Inbound** if customers call in, **Outbound** if the agent makes calls | +| **Voice** | Click to preview voices, pick one that matches your brand | +| **Language** | Primary language for the conversation | + +--- + +## Step 3: Select Agent Type + +Choose **Single prompt agent** — it's marked as Recommended. + + + Choosing Single Prompt + + +The other option, Conversational Flow, is for complex step-by-step workflows. Stick with Single Prompt for natural conversations. + +--- + +## Step 4: Choose a Starting Point + +You can start from scratch or use a template: + +**Start from Scratch** +Select "Start with custom prompt" to write everything yourself. Best when you have specific requirements. + +**Use a Template** +Pick a pre-built template for your industry — Healthcare, E-commerce, Banking, Sales, Real Estate. Templates give you a solid foundation that you can customize. + + +Templates save time. Even if your use case is unique, starting from a related template is often faster than writing from scratch. + + +--- + +## Step 5: Add Knowledge Base (Optional) + +If you want your agent to reference specific information (product docs, policies, FAQs), connect a Knowledge Base now. + +Click **Select knowledge base** and either choose an existing one or create new. The agent will search this content when answering questions. + +You can skip this step and add it later. + +--- + +## Step 6: Create the Agent + +Click **Create agent** in the top-right. + +Atoms will process your configuration — this takes about 30 seconds. When complete, you'll see a success message. + +Click **Go to Agent** to open the prompt editor. + +--- + +## Step 7: Write Your Prompt + +You're now in the main editor. The center area shows four prompt sections: + + + Single Prompt editor interface + + +Fill in each section: + +**Role & Objective** +Who your agent is and what it should accomplish. + +``` +You are Alex, a friendly customer support agent for TechStore. +Your goal is to help customers with orders, returns, and product questions. +``` + +**Conversation Flow** +How the agent should guide conversations. + +``` +1. Greet warmly and ask how you can help +2. Listen and understand their need +3. Provide helpful information or take action +4. Confirm they're satisfied +5. Ask if there's anything else +``` + +**Dos and Don'ts** +Behavioral guardrails. + +``` +DO: Be patient, confirm before making changes, offer to transfer if stuck +DON'T: Share other customers' info, make promises you can't keep +``` + +**End Call Conditions** +When to conclude the call. + +``` +End when: Issue resolved, customer requests it, or transferred successfully +``` + +For detailed guidance on writing great prompts, see [Writing Prompts](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts). + +--- + +## Step 8: Test Your Agent + +Click **Test Agent** in the top bar. Make a test call and try different scenarios: + +- Ask a normal question +- Ask something unexpected +- Interrupt mid-response +- Try to get it to break a rule + +Listen for how natural it sounds. Check that it follows your guidelines. + +--- + +## You're Live! + +Your agent is now ready. From here, you can: + + + + Ground responses in your documentation + + + Get a number customers can call + + + Personalize with customer data + + + Let your agent take actions + + diff --git a/fern/products/atoms/pages/platform/building-agents/single-prompt/when-to-use.mdx b/fern/products/atoms/pages/platform/building-agents/single-prompt/when-to-use.mdx new file mode 100644 index 0000000..4bdd6f6 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/single-prompt/when-to-use.mdx @@ -0,0 +1,105 @@ +--- +title: "When to Use Single Prompt" +sidebarTitle: "When to Use" +description: "Determine if Single Prompt agents are right for your use case." +--- + +Single Prompt agents are the simpler of the two agent types. They use one comprehensive prompt to guide all conversations, letting the AI handle the flow naturally. But they're not right for every situation. + +--- + +## Ideal Use Cases + +Single Prompt agents shine when conversations are **flexible and open-ended**: + +### Customer FAQ Bots + +Customers ask all kinds of questions. A Single Prompt agent can handle diverse topics naturally, moving between subjects as the conversation flows. + +### General Product Information + +When callers want to learn about your offerings, a Single Prompt agent can discuss features, pricing, and comparisons without rigid scripts. + +### Basic Support Triage + +For first-line support that identifies issues and provides general guidance before escalating, Single Prompt handles the variety well. + +### Creative or Advisory Interactions + +Conversations involving recommendations, advice, or creative problem-solving benefit from the flexible nature of Single Prompt. + +### Simple Information Lookup + +When callers need quick answers from a knowledge base — store hours, order status, basic policies — Single Prompt delivers naturally. + +--- + +## Not Ideal For + +Single Prompt agents struggle when you need **strict control over conversation flow**: + +### Strict Multi-Step Workflows + +If callers must complete steps in a specific order (collect info A, then B, then C), Conversational Flow gives you that control. + +### Lead Qualification with Specific Criteria + +When you need to score leads based on exact questions and route based on answers, the visual workflow of Conversational Flow is clearer. + +### Appointment Booking with Complex Branching + +Scheduling with multiple conditions (time slots, service types, locations) often needs the precision of defined paths. + +### Compliance-Required Conversations + +When regulations require specific disclosures in specific order, Conversational Flow ensures nothing is missed. + +### Processes Requiring Exact Sequences + +Any process where the order of steps matters more than natural conversation should use Conversational Flow. + +--- + +## Decision Checklist + +Answer these questions about your use case: + +| Question | If Yes → | +|----------|----------| +| Can the conversation go in many directions? | Single Prompt ✓ | +| Do callers ask unpredictable questions? | Single Prompt ✓ | +| Is flexibility more important than structure? | Single Prompt ✓ | +| Must steps happen in exact order? | Conversational Flow | +| Do you need branching logic based on answers? | Conversational Flow | +| Are there compliance requirements? | Conversational Flow | + +--- + +## Example Scenarios + +**Good for Single Prompt:** +- "Tell me about your return policy" +- "What products do you offer?" +- "I have a question about my order" +- "Can you help me choose between X and Y?" + +**Better for Conversational Flow:** +- Lead qualification surveys +- Insurance intake forms +- Appointment scheduling systems +- Payment collection flows + +--- + +## What's Next + + + + Start building a Single Prompt agent + + + Deprecated + + Compare with Conversational Flow + + diff --git a/fern/products/atoms/pages/platform/building-agents/single-prompt/writing-prompts.mdx b/fern/products/atoms/pages/platform/building-agents/single-prompt/writing-prompts.mdx new file mode 100644 index 0000000..03c17b5 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/single-prompt/writing-prompts.mdx @@ -0,0 +1,270 @@ +--- +title: "Writing Effective Prompts" +sidebarTitle: "Writing Prompts" +description: "Craft prompts that make your agent intelligent, helpful, and consistent." +--- + +Your prompt is everything. It defines who your agent is, what it knows, how it speaks, and when it takes action. A well-written prompt creates an agent that feels genuinely helpful. A sloppy one creates confusion and frustration. + +This page covers how to structure prompts, what to include, and how to refine them over time. + +--- + +## The Prompt Editor + +The prompt editor is a markdown editor. You can use headings, bullet points, numbered lists, bold, and other formatting to organize your instructions clearly. + + + ![Prompt editor](../images/prompt-editor-sections.png) + + +### Prompt Section Navigation + +Click the **Prompt Section** dropdown above the editor to jump between sections of your prompt. This is especially useful for long, detailed prompts. + +The dropdown shows all your markdown headings, letting you navigate directly to: +- Role & Objective +- Personality & Tone +- User Details +- Conversational Flow +- Common Questions & Responses +- And any other sections you create + + +**Templates use this structure.** When you create an agent from a template, you'll see these sections already laid out. Even when starting from scratch, following this structure keeps your prompt organized and maintainable. + + +--- + +## Prompt Structure + +A great prompt covers these areas. You don't need to use these exact headings, but you should address each concept. + +### Role & Objective + +Who is this agent? What's it trying to accomplish? + +```markdown +## Role & Objective + +You are Sarah, a customer support specialist for Acme Insurance. +Your goal is to help policyholders with claim status inquiries, +provide clear updates, and guide them through next steps. +``` + +### Personality & Tone + +How should the agent communicate? What's the vibe? + +```markdown +## Personality & Tone + +- Warm and reassuring — insurance claims are stressful +- Clear and direct — avoid jargon, explain terms simply +- Patient — never rush the caller, let them process +- Professional but human — not robotic +``` + +### User Details + +What do you know about callers? What context should the agent use? + +```markdown +## User Details + +Callers are existing policyholders. They've already filed a claim +and are calling for updates. You have access to their claim details +via the lookup API. +``` + +### Conversational Flow + +What's the typical structure of a call? + +```markdown +## Conversational Flow + +1. Greet warmly and confirm you're speaking with the policyholder +2. Verify identity (last 4 of SSN or date of birth) +3. Ask about the purpose of the call +4. Look up their claim and provide status +5. Answer follow-up questions +6. Confirm next steps +7. End warmly +``` + +### Common Questions & Responses + +What questions come up repeatedly? How should the agent handle them? + +```markdown +## Common Questions & Responses + +**"How long will my claim take?"** +Explain that most claims are processed within 5-7 business days, +but complex claims may take longer. Offer to check the specific +timeline for their claim. + +**"Can I speak to an adjuster?"** +Explain that adjusters work on multiple claims and aren't available +for direct calls, but you can schedule a callback. +``` + +### Knowledge Base Topics + +What subjects might require document lookup? + +```markdown +## Knowledge Base Topics + +For questions about coverage limits, deductibles, or policy terms, +search the knowledge base. Don't guess at policy details. +``` + +### Objection & Concern Handling + +How should the agent handle frustration or pushback? + +```markdown +## Objection & Concern Handling + +If the caller is frustrated about delays: +- Acknowledge their frustration genuinely +- Explain what's causing the delay if known +- Offer concrete next steps +- Never be defensive +``` + +### End Conditions + +When and how should calls end? + +```markdown +## End Conditions + +End the call when: +- The caller's questions are answered and they confirm satisfaction +- They explicitly say goodbye or thank you +- You've successfully transferred them +- They become unresponsive after 3 attempts +``` + +--- + +## Best Practices + + +| Vague | Specific | +|-------|----------| +| "Be helpful" | "Answer questions directly, offer next steps, never leave the caller wondering what happens next" | +| "Be professional" | "Use complete sentences, avoid slang, but don't sound robotic" | +| "Handle complaints" | "If the caller is frustrated, acknowledge their feelings first, then address the issue" | + + + +Show the agent what you mean: + +```markdown +When confirming identity, say something like: +"Before I can access your account, I just need to verify a few details. +Can you confirm your date of birth?" +``` + + + +What happens when things go wrong? + +- What if the API is down? +- What if the caller doesn't pass verification? +- What if they ask something completely off-topic? +- What if they're angry? + + + +Your first prompt won't be perfect. Review conversation logs, find where the agent stumbled, and update your instructions. The best prompts evolve over time. + + +--- + +## Using Variables + +Make prompts dynamic with variables: + +```markdown +Hello {{customer_name}}! I see you're calling about claim #{{claim_id}}. +``` + +At runtime, these get replaced with actual values from your variables or API responses. + +→ [Variables](/atoms/atoms-platform/single-prompt-agents/configuration-panel/variables) + +--- + +## Example: Complete Prompt + +Here's a full example putting it all together: + +```markdown +## Role & Objective + +You are Alex, a claims support specialist for Reliable Insurance. +Help policyholders check claim status, understand next steps, and +feel supported through the process. + +## Personality & Tone + +- Warm and empathetic — claims are stressful +- Clear and jargon-free +- Patient and never rushed +- Confident but not dismissive + +## Conversational Flow + +1. Greet and confirm you're speaking with the policyholder +2. Verify identity (date of birth) +3. Ask how you can help +4. Look up their claim +5. Provide clear status update +6. Answer questions +7. Confirm next steps +8. Close warmly + +## Common Questions + +**"When will I hear back?"** +Most claims are reviewed within 5-7 business days. I can check +your specific timeline if you'd like. + +**"Why is it taking so long?"** +Sometimes we need additional documentation or verification. +Let me check what's happening with yours specifically. + +## Sensitive Situations + +If the caller is upset or frustrated: +- Listen fully before responding +- Acknowledge their feelings: "I completely understand this is frustrating" +- Focus on what you CAN do, not what you can't +- Offer concrete next steps + +## End Conditions + +End the call when: +- Caller confirms they have what they need +- They say goodbye or thank you +- Transfer is complete +- No response after 3 attempts (end politely) +``` + +--- + +## What's Next + + + + See how your prompt performs + + + Find where to improve + + diff --git a/fern/products/atoms/pages/platform/building-agents/testing-launch/conversation-logs.mdx b/fern/products/atoms/pages/platform/building-agents/testing-launch/conversation-logs.mdx new file mode 100644 index 0000000..553c190 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/testing-launch/conversation-logs.mdx @@ -0,0 +1,142 @@ +--- +title: "Conversation Logs" +sidebarTitle: "Conversation Logs" +description: "Review transcripts and debug agent behavior." +--- + +Conversation Logs are your window into every call. See exactly what was said, when events occurred, and how metrics were captured — everything you need to understand and improve your agent. + +**Location:** Top right → **Convo Logs** button + +--- + +## Call Logs List + + + ![Call logs list](../images/convo-logs-list.png) + + +The main view shows all calls with key information at a glance: + +| Column | Description | +|--------|-------------| +| **Call Date** | When the call occurred | +| **Call ID** | Unique identifier (click to copy) | +| **From / To** | Phone numbers or sources | +| **Duration** | How long the call lasted | +| **Hangup Cause** | Why the call ended | +| **Status** | Current state of the call | +| **Retries** | Number of retry attempts | +| **Details** | Click to view full conversation | + +--- + +## Filtering + +Click **Filter By** to narrow down your logs: + +| Filter | Options | +|--------|---------| +| **Conversation Type** | Inbound Calls, Outbound Calls, Web Calls, Chat | +| **Status** | Pending, In Queue, In Progress, Active, Completed, Failed, Cancelled, No Answer, Processing | +| **End Reason** | Dial No Answer, User Hangup, Agent Hangup, Busy, Timeout, Error, Voicemail | +| **Call Type** | All Attempts, Retry Attempts, Initial Attempts | +| **Call Duration** | 0-30 seconds, 30-60 seconds, 1-5 minutes, 5+ minutes | + +Use **Search Logs** to find specific calls by ID or content. + +--- + +## Call Details + +Click **Details** on any call to open the full conversation view. You'll see tabs for different types of information: + + + + + ![Overview tab](../images/convo-logs-overview.png) + + + High-level summary of the call: + + | Field | Description | + |-------|-------------| + | **Call Summary** | AI-generated summary of what happened | + | **Agent** | Which agent handled the call | + | **Call ID** | Unique identifier | + | **Model** | AI model used | + | **Voice** | Voice used | + | **Date & Time** | When the call occurred | + | **Cost (Credits)** | Credit usage | + | **Disconnection Reason** | Why the call ended | + + Play the audio recording using the waveform player at the top. + + + + + ![Transcript tab](../images/convo-logs-transcript.png) + + + The full conversation, word for word. Each message shows: + - Who spoke (Agent or User) + - What was said + - Timestamp + + Use this to understand exactly how the conversation flowed. + + + + + ![Events tab](../images/convo-logs-events.png) + + + Timeline of everything that happened: + - Agent responses + - Call End triggers + - Transfers + - API calls + - Other system events + + Each event shows the timestamp and what occurred. + + + + + ![Metrics tab](../images/convo-logs-metrics.png) + + + Post-call metrics you've configured, with their values: + + Each metric shows: + - Name + - Data type (Integer, String, Boolean, etc.) + - Value extracted from this call + + Click the dropdown on any metric to see details. + + + +--- + +## Exporting + +Click the **Export** button to download your logs: + +| Format | Best for | +|--------|----------| +| **JSON** | Developers, programmatic analysis | +| **CSV** | Spreadsheets, reporting | + +--- + +## Related + + + + Define what to track from each call + + + Test your agent + + diff --git a/fern/products/atoms/pages/platform/building-agents/testing-launch/locking.mdx b/fern/products/atoms/pages/platform/building-agents/testing-launch/locking.mdx new file mode 100644 index 0000000..2018520 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/testing-launch/locking.mdx @@ -0,0 +1,45 @@ +--- +title: "Locking Your Agent" +sidebarTitle: "Locking" +description: "Protect production agents from accidental changes." +--- + +Once your agent is working well, lock it. Locking prevents accidental edits that could break a production agent. + +**Location:** Top right → **Lock Agent** toggle + + + ![Lock agent toggle](../images/lock-agent.png) + + +--- + +## How It Works + +Toggle **Lock Agent** to ON — all editing becomes disabled. + +**What's blocked:** +- Prompt editing +- Configuration changes +- Settings modifications +- Node changes (Convo Flow) + +**What still works:** +- Test Agent +- View Conversation Logs +- Live calls continue normally + +To make changes, toggle it back to OFF, edit, test, then re-lock. + +--- + +## Related + + + + Test before locking + + + Deploy to production + + diff --git a/fern/products/atoms/pages/platform/building-agents/testing-launch/testing.mdx b/fern/products/atoms/pages/platform/building-agents/testing-launch/testing.mdx new file mode 100644 index 0000000..3450908 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/testing-launch/testing.mdx @@ -0,0 +1,58 @@ +--- +title: "Testing Your Agent" +sidebarTitle: "Testing" +description: "Validate your agent before deploying to production." +--- + +Testing is the difference between a great agent and a frustrating one. Atoms gives you three ways to test, so you can catch issues before real callers do. + +**Location:** Top right → **Test Agent** button + +--- + +## Three Test Modes + + + + + ![Web call testing](../images/test-webcall.png) + + + **Voice call in your browser.** Quick and convenient — no phone needed. + + Best for rapid testing during development. + + + + + ![Telephony testing](../images/test-telephony.png) + + + **Real phone call.** The authentic experience — exactly what your callers will hear. + + Best for final validation before launch. + + + + + ![Chat testing](../images/test-chat.png) + + + **Text-only conversation.** Test conversation logic without voice. + + Best for testing branching and prompts quickly. + + + +--- + +## Related + + + + Review call transcripts + + + Protect production agents + + diff --git a/fern/products/atoms/pages/platform/building-agents/testing-management/conversation-logs.mdx b/fern/products/atoms/pages/platform/building-agents/testing-management/conversation-logs.mdx new file mode 100644 index 0000000..79f380b --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/testing-management/conversation-logs.mdx @@ -0,0 +1,175 @@ +--- +title: "Conversation Logs" +sidebarTitle: "Conversation Logs" +description: "Review and analyze every conversation your agent has." +--- + +Conversation Logs record every interaction your agent has — test calls, live calls, all of them. They're your window into what's actually happening in conversations, essential for debugging, optimization, and quality assurance. + +--- + +## Accessing Conversation Logs + +Find Conversation Logs in the top bar of your agent editor, or through the main dashboard. + + + Conversation Logs button + + +Each entry shows a complete conversation with all the details you need to understand what happened. + +--- + +## What's in a Log + +Each conversation log contains: + +### Call Metadata + +| Field | Description | +|-------|-------------| +| **Call ID** | Unique identifier for this conversation | +| **Timestamp** | When the call happened | +| **Duration** | How long it lasted | +| **Direction** | Inbound or outbound | +| **Phone Number** | Caller's number (if available) | +| **Disposition** | How the call ended (successful, unsuccessful, transferred, etc.) | + +### Transcript + +A full text record of everything said — both by the caller and your agent. Each turn is timestamped. + +``` +[00:02] Agent: Hi, this is Jordan from TechStore. How can I help you today? +[00:05] Caller: Yeah, I need to return something I bought last week. +[00:08] Agent: I'd be happy to help with that. Could you tell me what item you'd like to return? +[00:12] Caller: The wireless headphones, they don't fit right. +... +``` + +### Audio Recording + +The actual audio of the call (if recording is enabled). Listen to hear tone, pacing, and things the transcript might miss. + +### Variables + +All variables used in the conversation — what values they had and when they changed. + +### API Activity + +For Convo Flow agents: which API calls were made, what data was sent/received, and whether they succeeded. + +### Branch Decisions + +For Convo Flow agents: which branches were taken and why. Understand the path the conversation followed. + +--- + +## Using Logs for Debugging + +When something goes wrong, logs tell you exactly what happened: + +### Common Issues to Look For + +**Speech Recognition Errors** +The transcript shows what the system *thought* it heard. If it's wrong, that explains downstream problems. + +``` +Caller actually said: "I want to cancel my order" +System heard: "I want to cancel my daughter" +``` + +**Intent Misunderstanding** +The caller's words were transcribed correctly, but the agent misinterpreted the intent. + +**Wrong Branch Taken** +In Convo Flow, check which branches fired. The AI's interpretation might differ from your expectation. + +**API Failures** +Check the API activity section. Failed calls explain why data wasn't available or actions weren't taken. + +--- + +## Filtering and Searching + +With many calls, you need to find specific conversations: + +### Filter Options + +| Filter | What It Finds | +|--------|---------------| +| **Date Range** | Calls within a specific period | +| **Disposition** | Only successful, unsuccessful, or transferred calls | +| **Duration** | Calls shorter or longer than a threshold | +| **Phone Number** | Calls from/to a specific number | +| **Contains Text** | Calls where specific words appear in transcript | + +### Useful Searches + +- **Short calls** — Often indicate quick hang-ups or problems +- **Long calls** — May reveal conversations that got stuck +- **Unsuccessful dispositions** — Learn why calls fail +- **Calls with transfers** — Understand what AI couldn't handle + +--- + +## Analyzing Patterns + +Look beyond individual calls to spot trends: + +**Frequent Fallbacks** +If many calls trigger fallback branches, your prompts may need more specific handling. + +**Common Questions** +What do callers ask most? Make sure these are handled excellently. + +**Transfer Reasons** +Why are calls being transferred? Can AI handle more of these? + +**Drop-off Points** +Where do unsuccessful calls fail? Early hang-ups vs. mid-conversation issues tell different stories. + +--- + +## Using Logs for Improvement + +Turn log insights into agent improvements: + +1. **Review regularly** — Set aside time weekly to review a sample of calls +2. **Categorize issues** — Group problems by type (prompt, understanding, integration) +3. **Prioritize by impact** — Fix issues that affect the most callers first +4. **Make targeted changes** — Adjust prompts, add branches, fix integrations +5. **Track results** — See if the same issues recur after your changes + +--- + +## Privacy and Compliance + + +Conversation logs may contain personally identifiable information (PII). Ensure your logging practices comply with relevant regulations (GDPR, HIPAA, etc.) and your company's data policies. + + +### Data Retention + +Configure how long logs are kept. Shorter retention reduces storage and compliance risk. Longer retention enables more analysis. + +### Recording Consent + +For calls with audio recording, ensure appropriate consent disclosures are made at the start of calls, as required by law. + +### Access Control + +Limit who can view conversation logs. Not everyone needs access to customer call recordings. + +--- + +## Next Steps + + + + Generate logs through testing + + + Freeze your agent configuration + + diff --git a/fern/products/atoms/pages/platform/building-agents/testing-management/locking/how-locking-works.mdx b/fern/products/atoms/pages/platform/building-agents/testing-management/locking/how-locking-works.mdx new file mode 100644 index 0000000..40df631 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/testing-management/locking/how-locking-works.mdx @@ -0,0 +1,175 @@ +--- +title: "How Locking Works" +sidebarTitle: "How It Works" +description: "Technical details of the agent locking mechanism." +--- + +Understanding how locking works helps you use it effectively and avoid common pitfalls. + +--- + +## The Lock Toggle + +Find the lock toggle in the top bar of your agent editor. + + + Lock toggle + + +**Unlocked (default):** Full editing enabled. You can change anything. + +**Locked:** Editing disabled. Configuration is frozen. + +Click the toggle to switch states. Locking requires confirmation. + +--- + +## What Gets Locked + +When you lock an agent, the following are frozen: + +### For All Agents + +| Component | Locked? | +|-----------|---------| +| Agent name | ✓ | +| Voice selection | ✓ | +| LLM model | ✓ | +| LLM parameters (temperature, etc.) | ✓ | +| Language settings | ✓ | + +### For Single Prompt Agents + +| Component | Locked? | +|-----------|---------| +| Role & Objective prompt | ✓ | +| Conversation Flow prompt | ✓ | +| Dos and Don'ts prompt | ✓ | +| End Call Conditions prompt | ✓ | + +### For Convo Flow Agents + +| Component | Locked? | +|-----------|---------| +| All nodes and their prompts | ✓ | +| All branch conditions | ✓ | +| Branch connections | ✓ | +| API configurations | ✓ | +| Variable definitions | ✓ | + +### Not Locked + +| Component | Why Not Locked? | +|-----------|-----------------| +| Phone number assignment | You need to connect/disconnect numbers | +| Conversation logs | These accumulate regardless | +| Knowledge Base content | Content can be updated independently | +| External API endpoints | These are managed externally | + +--- + +## Attempting to Edit While Locked + +If you try to edit a locked agent: + +1. The edit UI is disabled (fields are read-only) +2. If you somehow trigger an edit API, it returns an error +3. A message indicates the agent is locked + +To make changes, you must either: +- Unlock the agent (if you have permission) +- Create a new version based on this one + +--- + +## Locking and Phone Numbers + +Locked agents can be connected to live phone numbers. In fact, this is the recommended workflow: + +``` +Lock agent → Connect to phone number → Go live +``` + +The lock protects the configuration while it's handling real calls. + +You can disconnect a phone number from a locked agent without unlocking it. + +--- + +## Unlocking + +To unlock an agent: + +1. Click the lock toggle +2. Confirm you want to unlock +3. Editing is now enabled + + +Unlocking a live agent is risky. Any changes you make immediately affect live calls. Consider creating a new version instead. + + +### When Unlocking is Appropriate + +- The agent is not connected to any live phone numbers +- You've disconnected it from live traffic +- You're making an emergency fix and accept the risk + +### Better Alternative: Version + +Instead of unlocking, create a new version: +1. Keep the current version locked (and live) +2. Create a new unlocked version to edit +3. Test the new version +4. Lock the new version +5. Switch traffic to the new version +6. Archive or delete the old version + +This prevents any disruption to live calls. + +--- + +## Lock State in Team Environments + +In team settings: + +- **Who can lock:** Anyone with edit permissions +- **Who can unlock:** Anyone with edit permissions (consider restricting this) +- **Visibility:** Lock state is visible to all team members +- **Audit:** Lock/unlock actions are logged with timestamp and user + + +For production agents, consider restricting unlock permissions to senior team members to prevent accidental changes. + + +--- + +## Lock State via API + +If you're managing agents programmatically: + +```bash +# Check lock state +GET /agents/{agent_id} +# Returns: { ..., "is_locked": true, ... } + +# Lock an agent +POST /agents/{agent_id}/lock + +# Unlock an agent +POST /agents/{agent_id}/unlock +``` + +API requests to modify a locked agent return `403 Forbidden`. + +--- + +## Next Steps + + + + Managing multiple versions safely + + + Deciding when to lock + + diff --git a/fern/products/atoms/pages/platform/building-agents/testing-management/locking/version-management.mdx b/fern/products/atoms/pages/platform/building-agents/testing-management/locking/version-management.mdx new file mode 100644 index 0000000..2e20f1a --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/testing-management/locking/version-management.mdx @@ -0,0 +1,188 @@ +--- +title: "Version Management" +sidebarTitle: "Versions" +description: "Manage multiple versions of your agent safely." +--- + +Version management lets you maintain multiple configurations of your agent — enabling safe iteration without disrupting live traffic. + +--- + +## Why Versioning Matters + +Without versioning, every change to your agent affects live calls immediately. That's risky: + +- A typo in your prompt could confuse callers +- A broken API configuration could crash conversations +- Testing in production means real customers experience bugs + +Versioning gives you a safety net. Work on changes separately, test thoroughly, then promote when ready. + +--- + +## How Versioning Works + +Each agent can have multiple versions: + +``` +My Support Agent +├── v1 (locked, live on +1-555-0100) +├── v2 (locked, archived) +└── v3 (unlocked, in development) +``` + +Only one version handles live traffic at a time. Others exist for development, testing, or historical reference. + +--- + +## Creating a New Version + +To create a new version: + +1. Go to your agent's settings or version panel +2. Click **Create New Version** +3. Choose to copy from: + - Current version (start with existing config) + - Blank slate (start fresh) +4. The new version is created **unlocked** for editing + +The new version gets a version number automatically (v2, v3, etc.). + +--- + +## Version Lifecycle + +A typical version goes through these stages: + +``` +Created (unlocked) + ↓ editing and testing +Locked + ↓ final testing +Deployed + ↓ live traffic +Superseded + ↓ replaced by newer version +Archived +``` + +### Stage Details + +| Stage | What's Happening | +|-------|------------------| +| **Created** | New version, actively being edited | +| **Locked** | Configuration frozen, ready for deployment | +| **Deployed** | Handling live traffic | +| **Superseded** | Was live, now replaced by newer version | +| **Archived** | Kept for reference but no longer active | + +--- + +## Switching Live Versions + +To switch which version handles live calls: + +1. Ensure the new version is locked +2. Go to the phone number configuration +3. Change the assigned agent version +4. Confirm the switch + + +The switch is nearly instant. In-progress calls continue on the old version; new calls go to the new version. + + +--- + +## Comparing Versions + +When deciding whether to deploy a new version, compare it to the current live version: + +**Configuration Diff** +See exactly what changed between versions — prompt text, settings, workflow structure. + +**Test Results** +Compare test call outcomes. Does the new version handle the same scenarios correctly? + +**Metrics Preview** +If possible, A/B test the new version with a portion of traffic before full deployment. + +--- + +## Rolling Back + +If a new version has problems, roll back quickly: + +1. Go to phone number configuration +2. Switch back to the previous version +3. The old version resumes handling calls + +Because locked versions are frozen, the old version is exactly as you left it. + + +Don't delete old versions immediately. Keep at least one previous version available for quick rollback. + + +--- + +## Best Practices + + + + Never unlock a live version to make changes. Create a new version instead. This protects live traffic from in-progress changes. + + + Only deploy locked versions. This ensures the configuration is frozen and stable. + + + Don't test on v3 then deploy v4 because "they're basically the same." Test the exact version going live. + + + When creating a new version, add notes about what changes it contains. Future you will thank present you. + + + Archive versions you no longer need. Keep the current live version and one previous version for rollback. + + + +--- + +## Version Strategy for Teams + +For teams with multiple people working on agents: + +**Development Branch** +Unlocked versions where individual contributors make changes. Each person might have their own development version. + +**Staging Version** +A locked version deployed to a test phone number. Team reviews changes here before production. + +**Production Version** +The locked version handling real customer calls. Changes only come through the staging process. + +``` +Development → Staging → Production + (unlocked) (locked) (locked) +``` + +--- + +## Version Limits + +Each agent can have a limited number of versions (check your plan). To free up version slots: + +- Archive versions you no longer need +- Delete versions that were never deployed +- Consolidate abandoned development versions + +--- + +## Next Steps + + + + Know when your version is ready + + + Test versions before deployment + + diff --git a/fern/products/atoms/pages/platform/building-agents/testing-management/locking/when-to-lock.mdx b/fern/products/atoms/pages/platform/building-agents/testing-management/locking/when-to-lock.mdx new file mode 100644 index 0000000..c38b983 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/testing-management/locking/when-to-lock.mdx @@ -0,0 +1,133 @@ +--- +title: "When to Lock Your Agent" +sidebarTitle: "When to Lock" +description: "Understand when and why to lock your agent configuration." +--- + +Locking freezes your agent's configuration — preventing accidental changes while it handles live calls. It's the safeguard between testing and production. + +--- + +## What Locking Does + +When you lock an agent: + +- **Prompt cannot be edited** — Your carefully tested prompt stays exactly as is +- **Settings are frozen** — Voice, LLM, language settings stay fixed +- **Workflow is immutable** — For Convo Flow agents, nodes and branches can't change +- **API configurations are protected** — Integration settings remain stable + +You can still: +- View the agent configuration +- View conversation logs +- Make test calls +- Create a new unlocked version + +--- + +## When to Lock + +### Before Going Live + +Lock immediately before connecting your agent to a live phone number or widget. This ensures: + +- The tested version is exactly what goes live +- No one accidentally changes something +- You have a known-good baseline to return to + +### After Major Changes + +After significant prompt or workflow updates that you've thoroughly tested: + +1. Test the changes completely +2. Confirm everything works +3. Lock the agent +4. Then deploy to production + +### For Compliance + +Some industries require change management for production systems. Locking provides: + +- A clear record of what configuration was live when +- Protection against unauthorized changes +- Audit trail of version history + +--- + +## Signs You're Ready to Lock + +Ask yourself these questions: + +| Question | If "Yes" → Ready to Lock | +|----------|--------------------------| +| Have I tested all common scenarios? | ✓ | +| Have I tested edge cases? | ✓ | +| Have I had others test it? | ✓ | +| Am I confident in the current behavior? | ✓ | +| Are all integrations working correctly? | ✓ | +| Does the voice and tone match my brand? | ✓ | + +If you answered "no" to any of these, keep testing before locking. + +--- + +## When NOT to Lock + +**During Active Development** +If you're still iterating frequently, locking just creates extra steps. Wait until you've reached a stable point. + +**If You're Unsure** +Locking isn't urgent. If you're not confident the agent is ready, keep testing. + +**For Quick Experiments** +Testing new prompt ideas? Don't lock between every experiment. Lock only when you've found what works. + +--- + +## The Lock-Test-Deploy Workflow + +A typical safe workflow: + +``` +1. Develop agent (unlocked) +2. Test thoroughly +3. Lock the agent ← You are confident +4. Deploy to live phone number +5. Monitor conversation logs +6. If changes needed: + - Create new version (unlocked) + - Make changes + - Test again + - Lock new version + - Deploy new version +``` + +--- + +## Multiple Environments + +For larger teams, consider: + +**Development Agent (Unlocked)** +Where you make and test changes freely. + +**Staging Agent (Locked)** +A locked version connected to a test phone number for final validation. + +**Production Agent (Locked)** +The version actually handling customer calls, always locked. + +Changes flow from Development → Staging → Production, with locking at each promotion. + +--- + +## Next Steps + + + + Technical details of the locking mechanism + + + Managing multiple versions of your agent + + diff --git a/fern/products/atoms/pages/platform/building-agents/testing-management/testing.mdx b/fern/products/atoms/pages/platform/building-agents/testing-management/testing.mdx new file mode 100644 index 0000000..fc8c6b1 --- /dev/null +++ b/fern/products/atoms/pages/platform/building-agents/testing-management/testing.mdx @@ -0,0 +1,171 @@ +--- +title: "Testing Your Agent" +sidebarTitle: "Testing" +description: "Validate your agent before going live with real callers." +--- + +Testing is the most important step between building and deploying. A well-tested agent handles real conversations gracefully. A poorly tested one frustrates callers and damages your brand. + +Atoms provides multiple testing methods so you can validate thoroughly before going live. + +--- + +## Testing Methods + +### Web Call Testing + +The fastest way to test. Click **Test Agent** in the top bar to start a browser-based call. + + + Test Agent button + + +**How it works:** +1. Click **Test Agent** +2. Allow microphone access when prompted +3. Talk to your agent like a real caller would +4. The call appears in Conversation Logs when done + +**Best for:** Quick iterations, checking prompt changes, initial validation. + +### Phone Call Testing + +Test with actual phone audio — more realistic than browser testing. + +**How it works:** +1. Go to **Settings** → **Test Phone Number** +2. Call the provided number from your phone +3. Have a real conversation with your agent +4. Review the call in Conversation Logs + +**Best for:** Final validation, voice quality checks, realistic testing before launch. + +--- + +## What to Test + +Don't just test the happy path. Systematically validate every aspect of your agent: + +### Conversation Flow + +| Test | What You're Checking | +|------|---------------------| +| **Happy path** | Does the ideal conversation work perfectly? | +| **Edge cases** | What happens when callers say unexpected things? | +| **Fallbacks** | Does the agent handle confusion gracefully? | +| **Interruptions** | What happens if you interrupt mid-sentence? | +| **Silence** | What does the agent do when you don't respond? | + +### Response Quality + +| Test | What You're Checking | +|------|---------------------| +| **Accuracy** | Are answers factually correct? | +| **Relevance** | Does the agent actually answer the question asked? | +| **Tone** | Does it sound like your brand? | +| **Length** | Are responses appropriately concise for voice? | + +### Guardrails + +| Test | What You're Checking | +|------|---------------------| +| **Off-topic requests** | Does the agent stay on task? | +| **Manipulation attempts** | Can you trick it into breaking rules? | +| **Sensitive topics** | Does it handle them appropriately? | +| **Competitor mentions** | Does it follow your guidelines? | + +--- + +## Testing Checklist + +Before going live, verify each of these: + +**Basic Functionality** +- [ ] Agent greets correctly +- [ ] Agent understands common requests +- [ ] Agent provides accurate information +- [ ] Agent ends calls gracefully + +**Edge Cases** +- [ ] Agent handles unclear speech +- [ ] Agent manages interruptions +- [ ] Agent recovers from silence +- [ ] Agent deals with unexpected topics + +**Integrations** (if applicable) +- [ ] API calls work correctly +- [ ] Data is fetched/sent properly +- [ ] Transfer calls connect +- [ ] Variables populate correctly + +**Quality** +- [ ] Voice sounds natural +- [ ] Response timing feels right +- [ ] Personality matches brand +- [ ] No awkward pauses + +--- + +## Testing Tips + + + + Don't use perfect English. Mumble. Use filler words. Ask things in weird ways. Real callers don't speak like prompts. + + + Ask off-topic questions. Try to get it to reveal information it shouldn't. Interrupt constantly. Your callers will find edge cases — find them first. + + + Background noise, poor phone connection, speaking quickly. Test the conditions your real callers will have. + + + You know your agent too well. Fresh testers find problems you miss because they don't know the "right" way to talk to it. + + + After testing, check Conversation Logs. Read the transcript. Listen to the audio. Look for anywhere the agent could have done better. + + + +--- + +## Interpreting Test Results + +After each test call, check: + +**Transcript Accuracy** +Was what you said transcribed correctly? Speech recognition errors can cause downstream problems. + +**Intent Understanding** +Did the agent understand what you meant, even if you said it imperfectly? + +**Response Appropriateness** +Was the response helpful, on-topic, and correctly toned? + +**Flow Progression** +Did the conversation move forward naturally, or did it get stuck or repeat? + +--- + +## Iterating on Problems + +When tests reveal issues: + +1. **Identify the pattern** — Is it a one-off or does it happen consistently? +2. **Check the logs** — What exactly did the agent see and decide? +3. **Trace to the cause** — Is it a prompt issue, LLM issue, or integration issue? +4. **Make targeted fixes** — Change one thing at a time so you know what worked +5. **Re-test the specific scenario** — Confirm the fix works +6. **Re-test adjacent scenarios** — Make sure you didn't break something else + +--- + +## Next Steps + + + + Analyze what happened in test calls + + + Freeze your agent when it's ready + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/agent-settings/general-tab.mdx b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/general-tab.mdx new file mode 100644 index 0000000..2d1a3cc --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/general-tab.mdx @@ -0,0 +1,85 @@ +--- +title: "General Tab" +sidebarTitle: "General Tab" +description: "Configure timeout and general agent behavior." +--- + +The General Tab contains timeout settings that control what happens when callers become unresponsive. + +--- + +## Location + +**Left Sidebar → Agent Settings → General tab** + +--- + +## LLM Idle Timeout Settings + +> Configure how long agent waits for user response before sending inactivity message. After 3 attempts, conversation automatically ends. + +| Setting | Input | Default | Description | +|---------|-------|---------|-------------| +| **Chat Timeout** | Seconds | 60 | Timeout for chat/text conversations | +| **Webcall Timeout** | Seconds | 20 | Timeout for browser voice calls | +| **Telephony Timeout** | Seconds | (configurable) | Timeout for phone calls | + +--- + +## How Timeout Works + +1. **User stops responding** — Silence detected +2. **Timeout triggers** — After configured seconds +3. **Agent sends inactivity message** — "Are you still there?" +4. **Repeats up to 3 times** — Giving user chances to respond +5. **Call ends automatically** — After 3 failed attempts + +--- + +## Setting Recommendations + +| Channel | Recommended Timeout | Reason | +|---------|---------------------|--------| +| Chat | 60s | Users may be multitasking | +| Web Call | 20s | Real-time, expect engagement | +| Telephony | 15-30s | Phone calls expect quick responses | + +--- + +## Inactivity Messages + +When timeout triggers, the agent sends a re-engagement message. Configure what the agent says in your prompt: + +``` +If the user hasn't responded for a while, say something like: +"I'm still here if you need anything. Is there something I can help with?" +``` + +--- + +## Use Cases + +### Short Timeouts (10-20s) + +- Sales calls where engagement matters +- Time-sensitive conversations +- Active support calls + +### Longer Timeouts (30-60s) + +- Complex issues where user might be looking something up +- Multi-tasking expected +- User might be finding information + +--- + +## What's Next + + + + Configure web widget + + + Test timeout behavior + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/agent-settings/model-tab.mdx b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/model-tab.mdx new file mode 100644 index 0000000..196392a --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/model-tab.mdx @@ -0,0 +1,137 @@ +--- +title: "Model Tab" +sidebarTitle: "Model Tab" +description: "Configure LLM and language behavior settings." +--- + +The Model Tab controls the AI model powering your agent and how it handles language. This includes model selection, speech formatting, and advanced language switching. + + + [IMAGE: Full Model tab with Language Switching section expanded] + + +--- + +## Location + +**Left Sidebar → Agent Settings → Model tab** + +--- + +## AI Model + +| Setting | Control | Example | +|---------|---------|---------| +| **LLM Model** | Dropdown | GPT-4o | +| **Language** | Dropdown | English | + +### Model Selection + +Choose the LLM that powers your agent's understanding and responses. This is the same as the Prompt Section model dropdown. + +### Language + +Set the primary language. This should match your voice selection's language capability. + +--- + +## Speech Formatting + +| Setting | Toggle | Default | +|---------|--------|---------| +| **Speech Formatting** | ON/OFF | ON | + +When enabled, the system formats responses for natural speech: +- Expands abbreviations +- Handles numbers naturally +- Formats dates and times for speaking + +**Example:** +- Written: "Your appt is on 1/15/24 at 3:00pm" +- Spoken: "Your appointment is on January fifteenth, twenty twenty-four at three p m" + +--- + +## Language Switching + +| Setting | Toggle | Default | +|---------|--------|---------| +| **Language Switching** | ON/OFF | ON | + +When enabled, the agent can detect when a caller switches languages and respond in kind. + +### Advanced Language Switching Settings + +When language switching is enabled, additional controls appear: + +> Controls how accurately agent detects language switches. Higher thresholds reduce false switches; lower thresholds allow quicker changes. + +| Setting | Range | Default | Description | +|---------|-------|---------|-------------| +| **Minimum Words for Detection** | 1-10 | 2 | Words needed before switching | +| **Strong Signal Threshold** | Low-High | 0.7 | Confidence for immediate switch | +| **Weak Signal Threshold** | Low-High | 0.3 | Confidence for tentative detection | +| **Consecutive Weak Signals** | 1-8 | 2 | Weak signals needed to switch | + +### Understanding the Settings + +**Minimum Words for Detection** +How many words in the new language before considering a switch. Higher = more conservative. + +**Strong Signal Threshold** +If confidence is above this level, switch immediately. Higher = requires more certainty. + +**Weak Signal Threshold** +Confidence level that counts as "maybe" a language switch. Below this is ignored. + +**Consecutive Weak Signals** +How many weak signals in a row before switching. Higher = more conservative. + +--- + +## Use Cases + +### Single Language (Simple) + +- Set primary language +- Disable Language Switching +- Agent only speaks one language + +### Multi-Language (Flexible) + +- Select multi-language voice +- Set primary language +- Enable Language Switching +- Agent adapts to caller's language + +### Conservative Multi-Language + +- Enable Language Switching +- Increase Minimum Words (5+) +- Increase Strong Signal Threshold (0.8+) +- Agent only switches when very confident + +--- + +## Saving Changes + +After making changes, you'll see: +``` +"You have unsaved changes" +[Discard] [Save] +``` + +Click **Save** to apply changes. + +--- + +## What's Next + + + + Assign a phone number + + + Configure voice settings + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/agent-settings/phone-number-tab.mdx b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/phone-number-tab.mdx new file mode 100644 index 0000000..87aeefa --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/phone-number-tab.mdx @@ -0,0 +1,94 @@ +--- +title: "Phone Number Tab" +sidebarTitle: "Phone Number Tab" +description: "Assign a phone number to your agent." +--- + +The Phone Number Tab lets you connect your agent to a phone number. Once assigned, inbound calls to that number are handled by this agent. + +--- + +## Location + +**Left Sidebar → Agent Settings → Phone Number tab** + +--- + +## Assigning a Number + +1. Navigate to Agent Settings → Phone Number tab +2. Click the phone number dropdown +3. Select from available numbers +4. Save changes + +--- + +## Available Numbers + +The dropdown shows phone numbers you've acquired through Atoms: + +| Status | Description | +|--------|-------------| +| **Available** | Not assigned to any agent | +| **Assigned** | Currently used by another agent (shown with agent name) | + +You can only assign numbers that aren't already in use. + +--- + +## Getting Phone Numbers + +Before you can assign a number, you need to acquire one. + +→ See: [Getting a Number](/atoms/atoms-platform/deployment/phone-numbers) + +--- + +## One Number Per Agent + +Each phone number can only be assigned to one agent at a time. + +| Scenario | Result | +|----------|--------| +| Number is free | Can assign to this agent | +| Number assigned elsewhere | Must unassign first, or choose different number | + +To reassign a number: +1. Go to the agent currently using it +2. Unassign or assign a different number +3. Return to your agent and assign + +--- + +## Inbound vs Outbound + +**Inbound Calls:** +When someone calls your assigned number, this agent handles the conversation. + +**Outbound Calls:** +For outbound, you use Campaigns which specify which agent makes calls. The Phone Number Tab is primarily for inbound. + +→ See: [Campaigns](/atoms/atoms-platform/deployment/campaigns) + +--- + +## Saving Changes + +After selecting a number, save to apply: +``` +"You have unsaved changes" +[Discard] [Save] +``` + +--- + +## What's Next + + + + Configure event notifications + + + Get phone numbers + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/agent-settings/voice-tab.mdx b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/voice-tab.mdx new file mode 100644 index 0000000..6dea80b --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/voice-tab.mdx @@ -0,0 +1,166 @@ +--- +title: "Voice Tab" +sidebarTitle: "Voice Tab" +description: "Fine-tune voice and speech behavior for your agent." +--- + +The Voice Tab provides detailed control over how your agent sounds and handles speech. Beyond basic voice selection, you can adjust speed, pronunciation, background sounds, and advanced speech detection. + + + [IMAGE: Full Voice tab with all sections visible] + + +--- + +## Location + +**Left Sidebar → Agent Settings → Voice tab** + +--- + +## Voice Selection + +At the top, you'll find the same voice dropdown as the Prompt Section. Change voices here or in the top bar — they sync. + +--- + +## Speech Settings + +| Setting | Control | Range | Default | +|---------|---------|-------|---------| +| **Speech Speed** | Slider | Slow ↔ Fast | 1 (normal) | + +Adjust how quickly your agent speaks: +- Slower for complex information +- Faster for energetic conversations +- Default (1) works for most cases + +--- + +## Pronunciation & Background + +### Pronunciation Dictionaries + +Custom pronunciations for words the AI might mispronounce. + +| Element | Description | +|---------|-------------| +| **Pronunciations list** | Current custom pronunciations | +| **Add Pronunciation** | Add new pronunciation rule | + +**When to use:** +- Brand names with unusual spelling +- Technical terms +- Names of people or places +- Acronyms that should be spelled out + +**Example:** +- Word: "Atoms" → Pronunciation: "AH-toms" +- Word: "API" → Pronunciation: "A P I" + +### Background Sound + +Add ambient audio to make conversations feel more natural: + +| Option | Description | +|--------|-------------| +| **None** | No background (default) | +| **Office** | Subtle office ambiance | +| **Cafe** | Coffee shop sounds | +| **Call Center** | Busy call center background | + +--- + +## Advanced Voice Settings + +| Setting | Toggle | Default | Description | +|---------|--------|---------|-------------| +| **Mute User Until First Bot Response** | ON/OFF | OFF | Prevents user audio until bot speaks first | +| **Voicemail Detection** | ON/OFF | OFF | Detect if call goes to voicemail | +| **Personal Info Redaction (PII)** | ON/OFF | OFF | Automatically redact sensitive information | +| **Denoising** | ON/OFF | OFF | Remove background noise from caller audio | + +### Setting Details + +**Mute User Until First Bot Response** +Useful for outbound calls where you want the agent to speak first without the caller interrupting the greeting. + +**Voicemail Detection** +For outbound campaigns — detect answering machines and handle appropriately. + +**Personal Info Redaction (PII)** +Automatically removes sensitive data (SSN, credit cards, etc.) from transcripts and logs. + +**Denoising** +Improves speech recognition when callers have noisy backgrounds. + +--- + +## Smart Turn Detection + +Intelligent handling of when the agent should start speaking. + +| Setting | Control | Description | +|---------|---------|-------------| +| **Smart Turn Detection** | Toggle | Enable intelligent turn-taking | +| **Wait Time** | Slider | How long to wait before responding | + +When enabled, the system intelligently determines when the caller has finished speaking, reducing interruptions. + +**Wait Time:** When enabled, adjust how long the agent waits after detected silence before responding. + +--- + +## Interruption Backoff Timer + +Controls how the agent handles when users interrupt. + +| Control | Description | +|---------|-------------| +| **Slider** | Time before agent resumes after interruption | + +Shorter = agent resumes quickly after interruption +Longer = more patient, waits for caller to finish + +--- + +## Voice Detection Tuning + +Fine-tune voice activity detection for challenging audio conditions. + +| Setting | Control | Range | Description | +|---------|---------|-------|-------------| +| **Confidence** | Slider | Low - High | How confident system must be that speech is occurring | +| **Min Volume** | Slider | Low - High | Minimum volume to count as speech | +| **Trigger Time** | Slider | Seconds | How long speech must occur to trigger detection | +| **Release Time** | Slider | Seconds | How long silence before speech is considered ended | + +**When to adjust:** +- Callers with very quiet voices → lower Min Volume +- Noisy environments → higher Confidence +- Callers who pause often → longer Release Time + +--- + +## Saving Changes + +After making changes, you'll see: +``` +"You have unsaved changes" +[Discard] [Save] +``` + +Click **Save** to apply your changes. + +--- + +## What's Next + + + + Configure LLM and language settings + + + Test voice settings + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/agent-settings/webhook-tab.mdx b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/webhook-tab.mdx new file mode 100644 index 0000000..05ae2b4 --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/agent-settings/webhook-tab.mdx @@ -0,0 +1,123 @@ +--- +title: "Webhook Tab" +sidebarTitle: "Webhook Tab" +description: "Send event data to your external systems." +--- + +The Webhook Tab lets you configure automatic notifications when call events occur. When something happens (call starts, call ends, analytics ready), Atoms sends data to your specified URL. + + + [IMAGE: Configure Webhook modal showing endpoint dropdown and event checkboxes] + + +--- + +## Location + +**Left Sidebar → Agent Settings → Webhook tab** + +--- + +## How Webhooks Work + +1. An event occurs (e.g., call ends) +2. Atoms sends an HTTP POST to your endpoint +3. Your system receives call data +4. You process it as needed (log, update CRM, trigger workflows) + +--- + +## Configuration + +### Webhook Endpoint + +| Field | Control | Description | +|-------|---------|-------------| +| **Webhook Endpoint** | Dropdown | Select from existing webhooks | + +You must create webhook endpoints first before they appear here. + +→ See: [Creating Endpoints](/atoms/atoms-platform/features/webhooks) + +### Events to Subscribe + +| Event | When It Fires | Data Included | +|-------|---------------|---------------| +| **Start** | When call begins, before conversation | Caller info, timestamp | +| **End** | When call ends | Transcript, duration, outcome | +| **Analytics Completed** | When post-call analytics finish | Metrics, analysis | + +Check the events you want to receive. + +--- + +## Setup Process + +1. **Create webhook endpoint** (Features → Webhooks) +2. **Select endpoint** in dropdown +3. **Check events** to subscribe +4. **Save** configuration + + +"Select a webhook endpoint to enable event selection" + + +You must select an endpoint before you can choose events. + +--- + +## Event Details + +### Start Event + +Fires when a call connects, before any conversation. + +**Use cases:** +- Log call initiation +- Start tracking in external system +- Trigger real-time dashboards + +### End Event + +Fires when call concludes, regardless of how it ended. + +**Use cases:** +- Log full transcript +- Update CRM with call outcome +- Create follow-up tasks +- Trigger post-call workflows + +### Analytics Completed + +Fires when post-call analysis is ready. + +**Use cases:** +- Capture sentiment analysis +- Log metrics and scores +- Update customer records with insights + +--- + +## Testing Webhooks + +After configuration: + +1. Make a test call +2. Check your endpoint for received data +3. Verify payload structure +4. Confirm your system processes correctly + +→ See: [Webhook Use Cases](/atoms/atoms-platform/features/webhooks) for common patterns + +--- + +## What's Next + + + + Timeout settings + + + Full webhook documentation + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/api-calls.mdx b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/api-calls.mdx new file mode 100644 index 0000000..453ac33 --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/api-calls.mdx @@ -0,0 +1,171 @@ +--- +title: "API Calls Configuration" +sidebarTitle: "API Calls" +description: "Connect to external services during conversations." +--- + +API Calls let your agent interact with external systems — look up customer data, check order status, book appointments, or update records mid-conversation. + +--- + +## Location + +**Configuration Panel (right sidebar) → API Calls** + +--- + +## What API Calls Enable + +| Use Case | Example | +|----------|---------| +| Customer lookup | Fetch account details by phone number | +| Order status | Get real-time order information | +| Appointment booking | Check availability and create bookings | +| CRM updates | Log call information to Salesforce | +| Data verification | Verify customer information | +| Ticket creation | Create support tickets | + +--- + +## How It Works + +1. **Define the API endpoint** — URL, method, headers, body +2. **Set trigger conditions** — When should the call happen? +3. **Map responses to variables** — Store returned data +4. **Use variables in conversation** — Reference the data + +--- + +## Configuration + +→ **NEEDS PLATFORM INFO:** Detailed API configuration interface + +Basic configuration includes: + +| Field | Description | +|-------|-------------| +| **Endpoint URL** | The API URL to call | +| **Method** | GET, POST, PUT, DELETE | +| **Headers** | Authentication tokens, content-type | +| **Body** | Request payload (for POST/PUT) | +| **Response Mapping** | Map response fields to variables | + +--- + +## Example: Customer Lookup + +### API Configuration + +``` +URL: https://api.yourcrm.com/customers +Method: GET +Headers: + Authorization: Bearer {{api_key}} + Content-Type: application/json +Query Parameters: + phone: {{caller_phone}} +``` + +### Response + +```json +{ + "id": "cust_12345", + "name": "Jane Smith", + "status": "active", + "tier": "premium" +} +``` + +### Response Mapping + +| API Field | Variable | +|-----------|----------| +| name | `{{api.customer_name}}` | +| status | `{{api.account_status}}` | +| tier | `{{api.tier}}` | + +### Using in Conversation + +``` +Hi {{api.customer_name}}! I see you're a {{api.tier}} member. +How can I help you today? +``` + +--- + +## In Conversational Flow + +Conversational Flow agents have dedicated **API Call Nodes**: + +- **Pre-Call API** — Runs before conversation starts +- **API Call Node** — Runs mid-conversation +- **Post-Call API** — Runs after conversation ends + +→ See: [Node Types Reference](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types) + +--- + +## Error Handling + +What happens when API calls fail? + +| Scenario | Recommendation | +|----------|----------------| +| Timeout | Have fallback responses ready | +| 404 Not Found | "I couldn't find that information" | +| 500 Server Error | "I'm having trouble accessing that right now" | +| Invalid Data | Validate before using | + +Build graceful handling into your prompts: +``` +If you cannot retrieve customer information, apologize and +offer to help in other ways or transfer to a human agent. +``` + +--- + +## Security Considerations + +- **Never expose API keys in prompts** — Use secure configuration +- **Validate inputs** — Don't pass unvalidated user input to APIs +- **Use HTTPS** — Encrypt all API traffic +- **Limit permissions** — API keys should have minimal required access + +--- + +## Best Practices + +### Test API Calls Thoroughly + +Use the test modes to verify: +- Calls execute at the right time +- Responses map correctly to variables +- Errors are handled gracefully + +### Keep Latency in Mind + +API calls add latency to conversations. Optimize for speed: +- Use fast endpoints +- Cache when possible +- Only call when necessary + +### Log for Debugging + +Check conversation logs to see: +- Request sent +- Response received +- Variables set + +--- + +## What's Next + + + + Complete API documentation + + + Configure voice settings + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/end-call.mdx b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/end-call.mdx new file mode 100644 index 0000000..ce75413 --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/end-call.mdx @@ -0,0 +1,147 @@ +--- +title: "End Call Configuration" +sidebarTitle: "End Call" +description: "Configure when and how your agent terminates calls." +--- + +End Call configuration defines when the AI should end conversations. Without proper end call setup, agents may hang on awkwardly or end calls too abruptly. + + + [IMAGE: End Call Settings page showing list of configured functions] + + +--- + +## Location + +**Configuration Panel (right sidebar) → End Call** + +--- + +## Enabling End Call + +1. Find "End Call" in the Configuration Panel +2. Toggle it **ON** +3. Click the settings icon (⚙️) to configure + +--- + +## End Call Settings Page + +| Element | Description | +|---------|-------------| +| **Header** | "← End Call Settings" | +| **Add button** | "+ Add End Call" | +| **Functions list** | Configured end call functions with Edit/Delete | + +--- + +## Adding an End Call Function + +Click **"+ Add End Call"** to open the configuration modal. + + + [IMAGE: Configure End Call Function modal with Name and Description fields] + + +### Modal Fields + +| Field | Type | Required | Example | +|-------|------|----------|---------| +| **Name** | Text input | Yes | `end_call` | +| **Description** | Text area | Yes | `Terminate the current call` | + +--- + +## Writing Descriptions + +The **Description** tells the AI when to trigger this function. Be clear and specific. + +### Good Examples + +``` +End the call when the customer confirms their issue is resolved +and has no more questions. +``` + +``` +Terminate the conversation when the user says goodbye, thanks you, +or indicates they're done. +``` + +``` +End the call after successfully transferring to a human agent. +``` + +### Bad Examples + +``` +End call +``` +(Too vague — AI doesn't know when) + +``` +When done +``` +(What counts as "done"?) + +--- + +## Common End Call Triggers + +| Trigger | Description Example | +|---------|---------------------| +| **Issue Resolved** | "Customer confirms their issue is resolved" | +| **User Says Goodbye** | "User says goodbye, thanks, or indicates they're done" | +| **Task Complete** | "Appointment is successfully booked and confirmed" | +| **Transfer Success** | "Call has been transferred to human agent" | +| **No Response** | "User is unresponsive after 3 attempts to re-engage" | +| **Cannot Help** | "Issue is outside scope and no transfer is available" | + +--- + +## Multiple End Call Functions + +Create separate functions for different scenarios: + +| Function Name | Trigger | +|---------------|---------| +| `successful_resolution` | Issue resolved, customer satisfied | +| `customer_goodbye` | Customer ends conversation | +| `transfer_complete` | Handoff to human successful | +| `no_response_timeout` | Caller unresponsive | + +This provides better analytics — you can see how calls ended. + +--- + +## Best Practices + +### Be Specific + +The AI uses your description to decide when to end. Specific descriptions = accurate behavior. + +### Cover All Scenarios + +Think about every way a call might end: +- Happy path (resolved) +- Customer-initiated (goodbye) +- Handoff (transfer) +- Edge cases (unresponsive, can't help) + +### Test Each Trigger + +In testing, verify each end condition triggers properly. Check conversation logs. + +--- + +## What's Next + + + + Configure call transfers + + + Test end call behavior + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/knowledge-base.mdx b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/knowledge-base.mdx new file mode 100644 index 0000000..65ec859 --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/knowledge-base.mdx @@ -0,0 +1,119 @@ +--- +title: "Knowledge Base (Config)" +sidebarTitle: "Knowledge Base" +description: "Attach a knowledge base to give your agent reference information." +--- + +Knowledge Base configuration lets you connect a repository of information that your agent can reference during conversations. This is essential for agents that need to answer detailed questions accurately. + +--- + +## Location + +**Configuration Panel (right sidebar) → Knowledge Base** + +--- + +## Enabling Knowledge Base + +1. Find "Knowledge Base" in the Configuration Panel +2. Toggle it **ON** +3. Select a KB from the dropdown + + + [IMAGE: KB toggle and dropdown selection] + + +--- + +## Selecting a Knowledge Base + +The dropdown shows available knowledge bases: + +| Option | Description | +|--------|-------------| +| **None** | Agent relies only on prompt | +| **[Your KB names]** | Available knowledge bases | + +Select the KB you want this agent to use. + +--- + +## Local vs Global KB + +| Type | Scope | Best For | +|------|-------|----------| +| **Local KB** | Only this agent | Agent-specific information | +| **Global KB** | Shared across agents | Company-wide FAQs, policies | + +→ See: [Knowledge Base Features](/atoms/atoms-platform/features/knowledge-base) for creating and managing KBs + +--- + +## How It Works + +When a caller asks a question: + +1. Agent checks if the answer might be in the KB +2. Searches the KB for relevant content +3. Uses that content to form an accurate response +4. Cites information from your documents + +This helps agents answer questions they couldn't know from the prompt alone. + +--- + +## When to Use + +| Use Case | KB Value | +|----------|----------| +| Product FAQ | High — answer product questions accurately | +| Pricing information | High — give correct prices | +| Policy details | High — cite official policies | +| General support | Medium — supplements prompt | +| Simple tasks | Low — may not need external info | + +--- + +## Requirements + +Before you can attach a KB: + +1. **Create the Knowledge Base** — In the Build section +2. **Upload content** — Documents, URLs, text +3. **Wait for processing** — Content needs indexing + +→ See: [Creating a Knowledge Base](/atoms/atoms-platform/features/knowledge-base) + +--- + +## Best Practices + +### Keep KB Content Current + +Outdated information leads to wrong answers. Update your KB when information changes. + +### Structure Content Well + +Well-organized documents are easier to search. Use clear headings and sections. + +### Test Retrieval + +Ask questions that should use KB content. Verify the agent finds and uses the right information. + +### Don't Overload + +Focus on information the agent actually needs. Too much content can reduce retrieval accuracy. + +--- + +## What's Next + + + + Add dynamic values to prompts + + + Full KB documentation + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/transfer-call.mdx b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/transfer-call.mdx new file mode 100644 index 0000000..496c299 --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/transfer-call.mdx @@ -0,0 +1,191 @@ +--- +title: "Transfer Call Configuration" +sidebarTitle: "Transfer Call" +description: "Configure call transfers to human agents or other numbers." +--- + +Transfer Call configuration lets your agent hand off conversations to human agents. You can set up cold transfers (immediate) or warm transfers (with context). + + + [IMAGE: Transfer Call Settings page] + + +--- + +## Location + +**Configuration Panel (right sidebar) → Transfer Call** + +--- + +## Enabling Transfer Call + +1. Find "Transfer Call" in the Configuration Panel +2. Toggle it **ON** +3. Click the settings icon (⚙️) to configure + +--- + +## Transfer Call Settings Page + +| Element | Description | +|---------|-------------| +| **Header** | "← Transfer Call Settings" | +| **Add button** | "+ Add Transfer Call" | +| **Empty state** | "No transfer call configured yet" | +| **Functions list** | Configured transfers | + +--- + +## Adding a Transfer + +Click **"+ Add Transfer Call"** to open the configuration modal. + + + [IMAGE: Configure Transfer Call modal showing Cold/Warm selection] + + +### Modal Fields + +| Field | Type | Required | Example | +|-------|------|----------|---------| +| **Name** | Text input | Yes | `transfer_call` | +| **Description** | Text area | Yes | `Transfer to support team` | +| **Transfer Number** | Phone input | Yes | +1 (555) 123-4567 | +| **Type** | Selection | Yes | Cold Transfer / Warm Transfer | + +--- + +## Transfer Types + +| Type | Icon | Description | +|------|------|-------------| +| **Cold Transfer** | 📞 | Immediately connects caller to the number | +| **Warm Transfer** | 👥 | AI briefs receiving agent before connecting | + +### When to Use Cold Transfer + +- Simple handoffs +- When context isn't critical +- High-volume call centers +- When receiving agents access caller info independently + +### When to Use Warm Transfer + +- Complex issues requiring context +- VIP customers +- When receiving agents need background +- Improved customer experience + +--- + +## Warm Transfer Options + +When you select Warm Transfer, additional options appear: + + + [IMAGE: Warm transfer options - During Transfer, During Connection, After Connects] + + +### 1. During Transfer Call + +| Setting | Control | Description | +|---------|---------|-------------| +| **On-hold Music** | Dropdown | Music caller hears while transferring | + +Options: Ringtone, Classical, Jazz, None, etc. + +### 2. During Agent Connection + +| Setting | Control | Description | +|---------|---------|-------------| +| **Transfer only if human answers** | Checkbox | Coming Soon — Skip voicemails | +| **Whisper Message** | Toggle | Message ONLY the receiving agent hears | +| **Handoff Message** | Text/Prompt | Content of the whisper message | + +**Whisper Message:** This is spoken to the receiving agent before connecting. The caller doesn't hear it. + +Example: "Incoming transfer. Customer Jane is calling about a billing dispute. Account ends in 4521." + +### 3. After Transfer Connects + +| Setting | Control | Description | +|---------|---------|-------------| +| **Three-way Message** | Toggle | Message BOTH parties hear | +| **Message Content** | Text/Prompt | Introduction message | + +Example: "I've connected you with Sarah from our billing team. Sarah, Jane has been asking about a charge from last month." + +--- + +## Writing Descriptions + +The description tells the AI when to trigger the transfer. + +### Good Examples + +``` +Transfer to the sales team when the caller is interested in +purchasing and ready to speak with a sales representative. +``` + +``` +Transfer to a supervisor when the caller requests to speak with +a manager or when the issue requires authorization beyond my scope. +``` + +### Specify Conditions Clearly + +- What keywords trigger it? +- What situations require it? +- What should happen first? + +--- + +## Multiple Transfer Destinations + +Create separate transfers for different scenarios: + +| Transfer Name | Destination | Trigger | +|---------------|-------------|---------| +| `transfer_billing` | Billing team | Billing questions | +| `transfer_technical` | Tech support | Technical issues | +| `transfer_sales` | Sales team | Purchase interest | +| `transfer_supervisor` | Management | Escalation requests | + +--- + +## Best Practices + +### Use Warm Transfer for Complex Issues + +The brief context saves time and improves customer experience. + +### Write Helpful Whisper Messages + +Include: +- Customer name (if known) +- Issue summary +- Account info (if relevant) +- What you've already tried + +### Test Transfer Flows + +Verify: +- Number connects correctly +- Whisper plays only to agent +- Hold music works +- Three-way message is clear + +--- + +## What's Next + + + + Attach reference documents + + + Configure call termination + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/variables.mdx b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/variables.mdx new file mode 100644 index 0000000..78a5f4e --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/configuration-panel/variables.mdx @@ -0,0 +1,183 @@ +--- +title: "Variables Configuration" +sidebarTitle: "Variables" +description: "Use dynamic values in prompts and conversations." +--- + +Variables let you insert dynamic values into your prompts. Instead of hardcoding information, you use placeholders that get replaced with real data at runtime. + + + [IMAGE: Variable Management panel showing three tabs] + + +--- + +## Location + +**Configuration Panel (right sidebar) → Variables Management** + +--- + +## Variable Syntax + +Use double curly braces: + +``` +{{variable_name}} +``` + +### Example in Prompt + +``` +Hello {{customer_name}}, I see you're calling about order {{order_id}}. +Your order status is currently {{order_status}}. +``` + +At runtime, this becomes: +``` +Hello John, I see you're calling about order #12345. +Your order status is currently shipped. +``` + +--- + +## Variable Types + +The Variable Management Panel has three tabs: + +| Tab | Source | Description | +|-----|--------|-------------| +| **User Defined** | You create them | Custom variables for your needs | +| **System** | Platform provides | Built-in variables from Atoms | +| **API** | API responses | Variables from API call results | + +--- + +## User Defined Variables + +Variables you create and manage. + +### Empty State + +``` +{ } +No user variables found +Add variables to your prompt using +{{variable_name}} syntax. +``` + +### Adding Variables + +Variables appear when you: +1. Type `{{variable_name}}` in your prompt +2. The system detects and lists it +3. You can set default values if needed + +### Common User Variables + +| Variable | Example Use | +|----------|-------------| +| `{{company_name}}` | Your company name | +| `{{product_name}}` | Product being discussed | +| `{{promo_code}}` | Current promotion code | +| `{{team_name}}` | Department name | + +--- + +## System Variables + +Built-in variables provided by Atoms. + +| Variable | Contains | +|----------|----------| +| `{{caller_phone}}` | Caller's phone number | +| `{{call_time}}` | When the call started | +| `{{call_duration}}` | How long the call has lasted | +| `{{current_date}}` | Today's date | +| `{{current_time}}` | Current time | + +→ See: [Variables Reference](/atoms/atoms-platform/features/variables-reference) for complete list + +--- + +## API Variables + +Variables populated from API call responses. + +When an API Call returns data, you can map response fields to variables: + +```json +// API Response +{ + "customer_name": "Jane Smith", + "account_status": "active", + "balance": "$1,234.56" +} + +// Mapped Variables +{{api.customer_name}} → "Jane Smith" +{{api.account_status}} → "active" +{{api.balance}} → "$1,234.56" +``` + +Use these in subsequent prompts or nodes. + +--- + +## Using Variables Effectively + +### Personalization + +``` +Hi {{customer_name}}! Thanks for being a {{membership_level}} member. +``` + +### Dynamic Information + +``` +Your order {{order_id}} is scheduled for delivery on {{delivery_date}}. +``` + +### Conditional Context + +Variables can inform behavior: +``` +If {{account_type}} is "premium", offer priority support. +``` + +--- + +## Best Practices + +### Use Descriptive Names + +- Good: `{{customer_first_name}}` +- Bad: `{{n1}}` + +### Handle Missing Variables + +What if a variable isn't set? Have graceful fallbacks in your prompt: +``` +Hi {{customer_name|there}}! (Uses "there" if name unknown) +``` + +### Test Variable Substitution + +Verify variables are replaced correctly in test conversations. + +### Keep Track + +Document your variables so team members understand what each one does. + +--- + +## What's Next + + + + Fetch dynamic data + + + Complete variable list + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/editor-overview.mdx b/fern/products/atoms/pages/platform/configuring-agents/editor-overview.mdx new file mode 100644 index 0000000..7f7962a --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/editor-overview.mdx @@ -0,0 +1,131 @@ +--- +title: "Editor Overview" +sidebarTitle: "Editor Overview" +description: "Navigate the agent editor and understand all configuration areas." +--- + +After creating an agent, you work in the agent editor. This page explains every part of the interface so you can configure agents efficiently. + + + [IMAGE: Full agent editor with all three areas (Prompt Section, Config Panel, Left Sidebar) clearly annotated] + + +--- + +## Editor Layout + +The editor has three main configuration areas: + +| Area | Location | Contains | +|------|----------|----------| +| **Prompt Section** | Top bar | Model, Voice, Language dropdowns | +| **Configuration Panel** | Right sidebar | End Call, Transfer, KB, Variables, API Calls | +| **Agent Settings** | Left sidebar → "Agent Settings" | Voice, Model, Phone, Webhook, General tabs | + +--- + +## Left Sidebar Navigation + +| Item | Icon | Description | +|------|------|-------------| +| **Prompt** | 📄 | Main prompt editor (Single Prompt only) | +| **Agent Settings** | ⚙️ | Detailed configuration tabs | +| **Widget** | `` | Embed widget configuration | +| **Integrations** | 🔗 | Third-party connections | +| **Post Call Metrics** | 📊 | Analytics configuration | + +--- + +## Top Right Actions + +| Button | Function | +|--------|----------| +| **Convo Logs** | View conversation transcripts | +| **Lock Agent** | Prevent accidental edits | +| **Test Agent** | Test via Telephony/Web/Chat | + +--- + +## Header + +The header contains: + +- **Back arrow (←)** — Return to agents list +- **Agent name** — Click to rename +- **Agent ID** — Click to copy + +--- + +## Single Prompt vs Conversational Flow + +The editor layout differs slightly between agent types: + +| Element | Single Prompt | Conversational Flow | +|---------|---------------|---------------------| +| Top area | Prompt Section (Model, Voice, Language) | Toggle tabs (Workflow / Settings) | +| Center | Prompt editor | Visual canvas | +| Right sidebar | Configuration Panel | Configuration Panel (when in Settings) | +| Left sidebar | Same | Same | + +In Conversational Flow, click **"Settings"** tab to access the same configuration options available in Single Prompt. + +--- + +## Configuration Areas in Detail + +### Prompt Section + +Located in the top bar, provides quick access to: +- **Model** — Select LLM (GPT-4o, etc.) +- **Voice** — Choose from voice library +- **Language** — Set primary language + +→ See: [Model Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/model-selection), [Voice Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/voice-selection), [Language Selection](/atoms/atoms-platform/single-prompt-agents/prompt-section/language-selection) + +### Configuration Panel + +Located in the right sidebar, provides toggles for: +- **End Call** — When/how to terminate calls +- **Transfer Call** — Handoff to humans +- **Knowledge Base** — Attach reference docs +- **Variables** — Dynamic values in prompts +- **API Calls** — External service connections + +→ See: [End Call](/atoms/atoms-platform/single-prompt-agents/configuration-panel/end-call), [Transfer Call](/atoms/atoms-platform/single-prompt-agents/configuration-panel/transfer-call), [Knowledge Base](/atoms/atoms-platform/single-prompt-agents/configuration-panel/knowledge-base), [Variables](/atoms/atoms-platform/single-prompt-agents/configuration-panel/variables), [API Calls](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) + +### Agent Settings + +Accessed via left sidebar, contains detailed tabs: +- **Voice Tab** — Speech settings, pronunciation, advanced voice +- **Model Tab** — LLM settings, language switching +- **Phone Number Tab** — Assign phone number +- **Webhook Tab** — Event notifications +- **General Tab** — Timeout settings + +→ See: [Voice Tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/voice-settings), [Model Tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/model-settings), [Phone Number Tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/phone-number), [Webhook Tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/webhooks), [General Tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/general-settings) + +--- + +## Saving Changes + +Most changes save automatically. However, some sections show: + +``` +"You have unsaved changes" +[Discard] [Save] +``` + +Always click **Save** before leaving if you see this message. + +--- + +## What's Next + + + + Choose your LLM + + + Select and configure voice + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/integrations.mdx b/fern/products/atoms/pages/platform/configuring-agents/integrations.mdx new file mode 100644 index 0000000..caa943f --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/integrations.mdx @@ -0,0 +1,99 @@ +--- +title: "Integrations" +sidebarTitle: "Integrations" +description: "Connect third-party services to your agent." +--- + +Integrations let you connect your agent to external services like CRMs, calendars, and business tools. This enables automated data flow between your voice agent and the systems your business already uses. + +--- + +## Location + +**Left Sidebar → Integrations** + +--- + +## Available Integrations + +| Integration | Status | Description | +|-------------|--------|-------------| +| **Salesforce** | Available | CRM integration | +| **HubSpot** | Coming Soon | Marketing and CRM | +| **Google Calendar** | Coming Soon | Calendar booking | +| **Google Sheets** | Coming Soon | Spreadsheet data | + +--- + +## Salesforce Integration + +The Salesforce integration allows your agent to: + +- Look up contact records +- Create and update leads +- Log call activities +- Trigger workflows + +### Setup + +1. Click **"Configure"** next to Salesforce +2. Connect your Salesforce account +3. Configure field mappings +4. Test the connection + +→ **NEEDS PLATFORM INFO:** Detailed Salesforce configuration steps + +--- + +## Using Integrations in Conversational Flow + +In Conversational Flow agents, integrations appear in the Node Palette: + +1. Open your workflow +2. Find integrations in the left panel +3. Drag integration node onto canvas +4. Configure the specific action + +This lets you trigger integration actions at specific points in the conversation. + +--- + +## Enterprise Integrations + +Some advanced integrations require: +- Enterprise plan +- Custom configuration +- Sales team engagement + +Contact sales for: +- Custom CRM integrations +- ERP connections +- Proprietary system APIs +- High-volume data sync + +--- + +## Coming Soon + +We're actively building more integrations: + +| Integration | Expected Capability | +|-------------|---------------------| +| **HubSpot** | CRM data, contact management | +| **Google Calendar** | Availability checking, booking | +| **Google Sheets** | Data lookup, logging | +| **Zapier** | Connect to thousands of apps | +| **Microsoft 365** | Calendar, contacts | + +--- + +## What's Next + + + + Configure analytics extraction + + + Custom API integrations + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/post-call-metrics.mdx b/fern/products/atoms/pages/platform/configuring-agents/post-call-metrics.mdx new file mode 100644 index 0000000..38026e4 --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/post-call-metrics.mdx @@ -0,0 +1,180 @@ +--- +title: "Post-Call Metrics" +sidebarTitle: "Post-Call Metrics" +description: "Define metrics to extract from completed calls." +--- + +Post-Call Metrics let you automatically extract and analyze data from every call. Define what you want to measure, and Atoms analyzes conversations to provide structured insights. + + + [IMAGE: Post-call metrics setup modal showing form] + + +--- + +## Location + +**Left Sidebar → Post Call Metrics** + +--- + +## What Are Post-Call Metrics? + +After each call, Atoms can analyze the conversation and extract specific information: + +- Did the issue get resolved? +- What product was discussed? +- How satisfied was the customer? +- Does this need follow-up? + +This data helps you understand performance and improve over time. + +--- + +## Set Up Post Call Metrics Modal + +### Left Panel - Metric Types + +| Type | Icon | Description | +|------|------|-------------| +| **Disposition Metrics** | 📊 | Evaluate call outcomes by category | +| **Templates** | 📋 | Select from pre-filled metrics | + +### Right Panel - Configuration + +Header shows "Disposition Metrics" with **"Add Another +"** button. + +--- + +## Creating a Metric + +### Metric Form Fields + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| **Identifier** | Text | Yes | Unique name for this metric | +| **Data Type** | Dropdown | Yes | String, Number, Boolean | +| **Prompt** | Text area | Yes | Question for AI to analyze | + +### Identifier Rules + +- Lowercase letters only +- Numbers allowed +- Underscores allowed +- No spaces or special characters + +**Examples:** +- `call_outcome` ✓ +- `satisfaction_score` ✓ +- `follow_up_needed` ✓ +- `Call Outcome` ✗ (has spaces, uppercase) + +--- + +## Data Types + +| Type | When to Use | Example Values | +|------|-------------|----------------| +| **String** | Categories, text answers | "resolved", "billing", "product_x" | +| **Number** | Scores, counts | 1-5 rating, count of issues | +| **Boolean** | Yes/no questions | true, false | + +--- + +## Example Metrics + +### Call Outcome (String) + +``` +Identifier: call_outcome +Data Type: String +Prompt: What was the outcome of this call? + Options: resolved, unresolved, transferred, callback_scheduled +``` + +### Satisfaction Score (Number) + +``` +Identifier: satisfaction_score +Data Type: Number +Prompt: Based on the conversation tone and customer responses, + rate the customer's satisfaction from 1 (very dissatisfied) + to 5 (very satisfied). +``` + +### Follow-up Needed (Boolean) + +``` +Identifier: follow_up_needed +Data Type: Boolean +Prompt: Does this call require any follow-up action from + the support team? Answer true or false. +``` + +### Product Discussed (String) + +``` +Identifier: product_discussed +Data Type: String +Prompt: What product or service was primarily discussed + in this conversation? +``` + +--- + +## Using Templates + +Click **Templates** in the left panel to see pre-built metric sets: + +- Common support metrics +- Sales qualification metrics +- Satisfaction metrics + +Templates pre-fill identifier, type, and prompt. Customize as needed. + +--- + +## Viewing Results + +After calls complete, metrics appear in: + +- **Conversation Logs** — Individual call details +- **Analytics Dashboard** — Aggregate views +- **Webhook Payloads** — Sent to your systems (Analytics Completed event) + +--- + +## Best Practices + +### Write Clear Prompts + +The AI uses your prompt to analyze. Be specific: + +**Vague:** "Was the customer happy?" + +**Better:** "Based on the customer's language, tone, and explicit statements, rate their satisfaction from 1-5." + +### Use Consistent Identifiers + +Stick to a naming convention across agents for easier analysis. + +### Start Simple + +Begin with 3-5 key metrics. You can always add more later. + +### Test with Real Calls + +Make test calls and verify metrics are extracted correctly. + +--- + +## What's Next + + + + Test metric extraction + + + View aggregated metrics + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/prompt-section/language-selection.mdx b/fern/products/atoms/pages/platform/configuring-agents/prompt-section/language-selection.mdx new file mode 100644 index 0000000..819a5ca --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/prompt-section/language-selection.mdx @@ -0,0 +1,91 @@ +--- +title: "Language Selection" +sidebarTitle: "Language Selection" +description: "Set the primary language for your agent's conversations." +--- + +Language selection determines what language your agent speaks and understands. This affects both speech recognition and text-to-speech output. + +--- + +## Location + +**Prompt Section (top bar) → Language dropdown** + +--- + +## How to Select + +1. Click the language dropdown +2. Choose from available languages +3. Selection applies immediately + +--- + +## Available Languages + +The available languages depend on your selected voice. Choose your voice first, then select a language it supports. + +Common options include: +- English +- Spanish +- French +- German +- Portuguese +- Hindi +- Arabic +- Chinese +- And more + +--- + +## Voice and Language Relationship + +| If Voice Supports | You Can Select | +|-------------------|----------------| +| English only | English | +| English, Spanish | English OR Spanish | +| 18+ languages | Any of those languages | + +**Tip:** If you need specific languages, check voice capabilities first. Voices like Rachel support 18+ languages. + +--- + +## Multi-Language Support + +For agents that need to switch languages during conversations: + +1. Select a multi-language voice +2. Set primary language here +3. Enable **Language Switching** in the Model Tab + +→ See: [Model Tab](/atoms/atoms-platform/single-prompt-agents/agent-settings/model-settings) for language switching configuration + +--- + +## Best Practices + +### Choose Primary Language First + +Set the language most callers will use as the primary language. + +### Match Voice to Language + +Some voices sound more natural in certain languages. Preview the voice speaking your target language. + +### Test with Native Speakers + +Speech recognition and synthesis quality varies by language. Test with native speakers when possible. + +--- + +## What's Next + + + + Configure call termination + + + Language switching settings + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/prompt-section/model-selection.mdx b/fern/products/atoms/pages/platform/configuring-agents/prompt-section/model-selection.mdx new file mode 100644 index 0000000..18ab0ca --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/prompt-section/model-selection.mdx @@ -0,0 +1,117 @@ +--- +title: "Model Selection" +sidebarTitle: "Model Selection" +description: "Choose the LLM that powers your agent's understanding and responses." +--- + +The model selection determines which Large Language Model (LLM) processes conversations. Different models offer different capabilities, speeds, and costs. + + + [IMAGE: Model dropdown expanded showing options] + + +--- + +## Location + +**Prompt Section (top bar) → First dropdown** + +--- + +## Available Model Types + +| Type | Description | +|------|-------------| +| **Emotive Models** | Speech-to-speech with emotional tone and expression (Beta) | +| **Traditional Models** | Standard LLM models for text processing | + +--- + +## Available Models + +| Model | Provider | Best For | +|-------|----------|----------| +| **GPT-4o** | OpenAI | Complex conversations, nuanced understanding | +| **Electron** | Atoms | Speed and efficiency | + +→ **NEEDS PLATFORM INFO:** Complete list of available models + +--- + +## How to Select + +1. Click the model dropdown in the Prompt Section +2. Browse available options +3. Click to select your choice + +The change applies immediately. + +--- + +## Choosing the Right Model + +### GPT-4o (Recommended for most) + +- Best understanding and reasoning +- Handles complex queries well +- Slightly higher latency than smaller models +- Recommended for support, sales, complex workflows + +### Emotive Models (Beta) + +- Natural emotional expression +- More human-like responses +- Good for conversations requiring empathy +- Beta — may have quirks + +### Electron Models + +- Optimized for speed +- Lower latency +- Good for simple, high-volume use cases +- Cost-effective + +--- + +## Considerations + +### Latency + +Larger models take longer to respond. For voice AI, every millisecond matters: +- Simple queries → smaller models may be sufficient +- Complex reasoning → larger models worth the tradeoff + +### Cost + +Different models have different per-minute costs: +- Check your plan details for pricing +- Higher-capability models typically cost more + +### Accuracy + +More capable models generally: +- Follow instructions better +- Handle edge cases more gracefully +- Provide more nuanced responses + +--- + +## Best Practices + +- **Start with GPT-4o** for most use cases +- **Test thoroughly** when changing models +- **Compare latency** in real calls +- **Review conversation logs** to verify quality + +--- + +## What's Next + + + + Choose your agent's voice + + + Set primary language + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/prompt-section/voice-selection.mdx b/fern/products/atoms/pages/platform/configuring-agents/prompt-section/voice-selection.mdx new file mode 100644 index 0000000..eec1877 --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/prompt-section/voice-selection.mdx @@ -0,0 +1,144 @@ +--- +title: "Voice Selection" +sidebarTitle: "Voice Selection" +description: "Choose and preview the voice your agent uses." +--- + +Voice selection determines how your agent sounds. Atoms offers a library of high-quality voices with different characteristics to match your brand. + + + [IMAGE: Voice picker panel with filter options and voice list visible] + + +--- + +## Location + +**Prompt Section (top bar) → Voice dropdown** + +--- + +## Voice Picker Panel + +Clicking the voice dropdown opens a panel with: + +| Element | Description | +|---------|-------------| +| **Search bar** | "Search for languages, accent" with filter icon | +| **Filter tabs** | Language, Accent, Age, Gender, Model | +| **Currently used** | Shows your selected voice | +| **All Voices** | Full voice library | + +--- + +## Filter Options + +Click filter tabs to narrow your search: + +### Language + +- ✓ All Language +- African +- Arabic +- Bengali +- Chinese +- Croatian +- English +- Hindi +- Spanish +- (and more) + +### Other Filters + +- **Accent** — Regional variations +- **Age** — Young, Middle, Mature +- **Gender** — Male, Female +- **Model** — Voice engine type + +--- + +## Available Voices + +| Voice | Languages | Notes | +|-------|-----------|-------| +| **Tiffany** ⭐ | English, Hindi, Spanish +3 | Multi-language, warm | +| **Daniel** ⭐ | English | Professional | +| **Jessica** ⭐ | English | Friendly | +| **Joseph** ⭐ | English | Confident | +| **Brandon** ⭐ | English, Spanish, Hindi +4 | Multi-language | +| **Rachel** ⭐ | English, Hindi, Portuguese +18 | Most languages | + +**Legend:** +- ⭐ = Featured/premium voice +- ▶️ = Preview button (click to hear sample) +- "Others" = Shows regional variants + +--- + +## How to Select + +1. Click the voice dropdown +2. Use filters to narrow options (optional) +3. Click ▶️ to preview a voice +4. Click the voice name to select it + +The selected voice appears in the dropdown and is used for all conversations. + +--- + +## Previewing Voices + +Before selecting, preview how voices sound: + +1. Find a voice you're interested in +2. Click the ▶️ play button next to it +3. Listen to the sample +4. Compare multiple voices +5. Select your preference + +**Tip:** Preview with text similar to what your agent will say. Different content sounds different. + +--- + +## Choosing the Right Voice + +### Match Your Brand + +| Brand Personality | Voice Characteristics | +|-------------------|----------------------| +| Professional/Corporate | Clear, neutral, confident | +| Friendly/Casual | Warm, approachable, upbeat | +| Authoritative | Deep, measured, assured | +| Youthful | Energetic, modern | + +### Consider Your Audience + +- Who are your callers? +- What do they expect? +- What builds trust with them? + +### Multi-Language Needs + +If your agent handles multiple languages, choose a voice that supports all needed languages (like Rachel with 18+ languages). + +--- + +## Best Practices + +- **Preview before selecting** — Hearing is believing +- **Test in real calls** — Browser preview differs from phone audio +- **Match to use case** — Support vs. sales may need different voices +- **Consider consistency** — Use the same voice across similar agents + +--- + +## What's Next + + + + Set primary language + + + Advanced voice configuration + + diff --git a/fern/products/atoms/pages/platform/configuring-agents/widget-configuration.mdx b/fern/products/atoms/pages/platform/configuring-agents/widget-configuration.mdx new file mode 100644 index 0000000..b7b9a28 --- /dev/null +++ b/fern/products/atoms/pages/platform/configuring-agents/widget-configuration.mdx @@ -0,0 +1,90 @@ +--- +title: "Widget Configuration" +sidebarTitle: "Widget Configuration" +description: "Configure the embeddable web widget for your agent." +--- + +The Widget section lets you configure an embeddable voice widget that visitors can use on your website. This is an alternative to phone-based deployment. + + + [IMAGE: Widget configuration page] + + +--- + +## Location + +**Left Sidebar → Widget** + +--- + +## What is the Widget? + +A small button or panel that appears on your website. When visitors click it, they can have a voice conversation with your agent directly in their browser. + +**Benefits:** +- No phone number needed +- Works on any website +- Visitors don't leave your site +- Lower barrier to conversation + +--- + +## Configuration Options + +→ **NEEDS PLATFORM INFO:** Specific widget configuration options and interface + +### Appearance + +- Widget button style +- Colors and branding +- Position on page (corner placement) +- Size and visibility + +### Behavior + +- Auto-open settings +- Greeting message +- Privacy controls +- Recording disclosure + +### Branding + +- Logo/icon customization +- Brand colors +- Custom CSS (if available) + +--- + +## Getting the Embed Code + +After configuring your widget: + +1. Complete widget settings +2. Generate embed code +3. Copy the code snippet +4. Add to your website's HTML + +→ See: [Widget Deployment](/atoms/atoms-platform/features/widget) for embedding instructions + +--- + +## Testing the Widget + +1. Configure widget settings +2. Save changes +3. Use **Test Agent → Web Call** to test widget behavior +4. Verify appearance and functionality + +--- + +## What's Next + + + + Get embed code for your site + + + Connect third-party services + + diff --git a/fern/products/atoms/pages/platform/convo-flow/conditions.mdx b/fern/products/atoms/pages/platform/convo-flow/conditions.mdx new file mode 100644 index 0000000..f3549b0 --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/conditions.mdx @@ -0,0 +1,361 @@ +--- +title: "Conditions & Branching" +sidebarTitle: "Conditions & Branching" +description: "Create dynamic conversation paths based on caller responses." +--- + +Branching is what makes Conversational Flow powerful. Instead of a linear script, your agent can take different paths based on what callers say, how they respond, or data from your systems. + + + ![Flow with branches](../building-agents/images/cf-branching-example.png) + + +--- + +## How Branching Works + + + + The current node poses a question or presents options. + + + The caller says something (or doesn't respond). + + + The AI checks each branch condition against the response. + + + The matching condition determines which node comes next. + + + +--- + +## Configuring Branches + +Click any Default node, then click **Branching** to configure output paths. + + + ![Branch configuration](../building-agents/images/cf-branching-example.png) + + +### Adding a Branch + +1. Click the Default node you want to branch from +2. Click **Branching** in the configuration panel +3. Click **+ Add Branch** +4. Configure the condition +5. Connect to the target node + +### Branch Fields + +| Field | Description | +|-------|-------------| +| **Label** | Name shown on the canvas (e.g., "Wants Billing Help") | +| **Condition** | What triggers this path (natural language or variable-based) | +| **Target Node** | Where the conversation goes when this condition matches | + +--- + +## Condition Types + + + + Write conditions in plain English. The AI interprets caller intent. + + **Examples:** + + | Condition | Matches When Caller Says | + |-----------|--------------------------| + | "User confirms" | "Yes", "Yeah", "That's right", "Correct" | + | "User declines" | "No", "Nope", "Not really", "I don't think so" | + | "User asks about billing" | "I have a billing question", "About my invoice" | + | "User wants to speak to a person" | "Can I talk to someone?", "Get me a human" | + | "User is frustrated" | Angry tone, complaints, escalation requests | + | "Anything else" | Fallback — catches everything not matched above | + + + Write conditions as if describing the caller's intent, not their exact words. The AI handles variations automatically. + + + + + Route based on variable values from APIs or collected data. + + **Syntax:** + ``` + {{variable_name}} operator value + ``` + + **Operators:** + + | Operator | Meaning | + |----------|---------| + | `==` | Equals | + | `!=` | Not equals | + | `>` | Greater than | + | `<` | Less than | + | `>=` | Greater or equal | + | `<=` | Less or equal | + + **Examples:** + + | Condition | Logic | + |-----------|-------| + | `{{budget}} > 10000` | High-value lead path | + | `{{account_tier}} == "premium"` | VIP treatment | + | `{{attempts}} >= 3` | Escalation after 3 tries | + | `{{api.balance}} < 0` | Negative balance handling | + + + + Combine multiple conditions for complex routing. + + **AND Conditions:** + ``` + {{account_tier}} == "premium" && {{budget}} > 10000 + ``` + Both must be true. + + **OR Conditions:** + ``` + {{issue_type}} == "billing" || {{issue_type}} == "payment" + ``` + Either can be true. + + + + Catch-all for when no other condition matches. + + Use **"Anything else"** or leave condition empty for fallback behavior. + + **Always include a default branch.** It handles: + - Unexpected responses + - Unclear answers + - Edge cases you didn't anticipate + - Silence or no response + + **Good fallback prompt:** + ``` + I didn't quite catch that. Could you tell me again — + are you calling about billing or technical support? + ``` + + + +--- + +## Branch Order Matters + +Conditions are evaluated top to bottom. The first matching condition wins. + + +Put specific conditions first, fallback last. If "Anything else" is first, it will always match and other conditions will never trigger. + + +**Correct Order:** +1. `{{budget}} >= 50000` → Enterprise Path +2. `{{budget}} >= 10000` → Professional Path +3. `{{budget}} >= 1000` → Starter Path +4. Anything else → Self-Serve Resources + +**Incorrect Order:** +1. `{{budget}} >= 1000` ← This catches everything above $1k +2. `{{budget}} >= 10000` ← Never reached +3. `{{budget}} >= 50000` ← Never reached + +--- + +## Real-World Examples + +### Support Routing + +``` +[Ask Issue Type] +"I'm here to help! Are you calling about billing, +technical support, or something else?" + +Branches: +├── "User asks about billing" → [Billing Flow] +├── "User has technical issue" → [Technical Flow] +├── "User wants to speak to someone" → [Transfer Call] +└── "Anything else" → [Clarify and Re-ask] +``` + +### Lead Qualification + +``` +[Ask Budget] +"What's your approximate budget for this project?" + +→ Store response as {{budget}} + +Branches: +├── {{budget}} >= 50000 → [Enterprise Path] +├── {{budget}} >= 10000 → [Professional Path] +├── {{budget}} >= 1000 → [Starter Path] +└── "Anything else" → [Self-Serve Resources] +``` + +### Confirmation Flow + +``` +[Confirm Appointment] +"Just to confirm — you'd like to book for +Tuesday at 3pm. Is that correct?" + +Branches: +├── "User confirms" → [Complete Booking] +├── "User wants to change" → [Modify Appointment] +└── "Anything else" → [Re-confirm] +``` + +### API Response Routing + +``` +[API Call: Check Account Status] + +Branches: +├── {{account_status}} == "active" → [Standard Service] +├── {{account_status}} == "suspended" → [Reactivation Flow] +├── {{account_status}} == "premium" → [VIP Service] +└── API Error → [Fallback / Transfer] +``` + +--- + +## Best Practices + + + No matter how many conditions you define, something unexpected will happen. + + Your fallback should: + - Acknowledge the response + - Re-ask the question differently + - Offer options to clarify + + ``` + "I want to make sure I understand. Are you calling about + billing, technical issues, or something else entirely?" + ``` + + + + Avoid overlapping conditions that could both match. + + | Overlapping (Bad) | Exclusive (Good) | + |-------------------|------------------| + | "support" AND "technical support" | "billing" OR "technical" OR "other" | + | `>10` AND `>50` | `10-50` AND `>50` | + + When conditions overlap, the first one wins — which may not be what you want. + + + + It's easy to forget a branch while testing. Be systematic: + + 1. List all possible paths + 2. Test each one deliberately + 3. Try edge cases (silence, gibberish, topic changes) + 4. Review in Conversation Logs + + + + Too many nested branches become impossible to manage. + + If your flow is getting too deep: + - Can you combine some branches? + - Should some paths be separate flows? + - Is Single Prompt better for this use case? + + + + Labels appear on the canvas — make them meaningful. + + | Bad Labels | Good Labels | + |------------|-------------| + | "Option 1" | "Wants Billing Help" | + | "Path A" | "Budget > $10k" | + | "Yes" | "Confirmed Appointment" | + | "Branch 2" | "Frustrated Customer" | + + +--- + +## Complex Patterns + +### Loop Back (Retry) + +For scenarios where you need to re-ask: + +``` +[Ask Budget] + ↓ +[Validate Response] + ├── Valid → Continue to next step + └── Invalid → Loop back to [Ask Budget] +``` + + +**Avoid infinite loops.** Add a counter variable and exit after N attempts. After 2-3 retries, offer to transfer or end gracefully. + + +### Parallel Paths That Merge + +Different paths can lead to the same destination: + +``` +[Qualify Lead] +├── High Budget → [Fast Track] ─────────┐ +└── Standard Budget → [Standard Process] ├→ [Schedule Demo] + │ +``` + +### Guard Rails + +Add safety branches for edge cases: + +``` +[Any Node] +├── Normal flow conditions... +├── "User seems upset or frustrated" → [Empathy Response] +├── "User mentions legal or lawsuit" → [Transfer to Manager] +└── "Anything else" → [Standard Fallback] +``` + +--- + +## Debugging Branches + +When branches don't work as expected: + + + + See exactly which conditions were evaluated and which matched. + + + + Typos in variable names or operators fail silently. + + + + If fallback triggers too often, your conditions may be too specific. + + + + Conditions are checked top to bottom — earlier matches win. + + + +--- + +## Next + + + + Dynamic data for conditions and prompts + + + Validate your branches work correctly + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/api-calls.mdx b/fern/products/atoms/pages/platform/convo-flow/config/api-calls.mdx new file mode 100644 index 0000000..c3da98d --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/api-calls.mdx @@ -0,0 +1,287 @@ +--- +title: "API Calls" +sidebarTitle: "API Calls" +description: "Connect your agent to external services and data." +--- + +API Calls let your agent fetch and send data during conversations. Look up customer information, check order status, book appointments, create tickets — all in real-time while the conversation is happening. + +In Conversational Flow, API calls are configured through dedicated nodes: **API Call**, **Pre-Call API**, and **Post-Call API**. + +--- + +## API Nodes Overview + +| Node | When It Runs | Use Case | +|------|--------------|----------| +| **Pre-Call API** | Before conversation starts | Load customer data, set context | +| **API Call** | During conversation | Fetch/send data mid-flow | +| **Post-Call API** | After conversation ends | Log outcomes, trigger follow-ups | + +→ See: [Node Types](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types) for full details on each node. + +--- + +## Configuration + +All three API node types share the same configuration structure. Click the node to open its settings panel. + + + + Select the HTTP method and enter the full API endpoint URL. + + | Field | Description | + |-------|-------------| + | **Method** | HTTP method (GET, POST, PUT, PATCH, DELETE) | + | **URL** | Full endpoint URL | + + Use variables in the URL for dynamic requests: + + ``` + https://api.example.com/customers/{{caller_phone}} + ``` + + + + Add custom headers to your API request. These are key-value pairs, often used for Content-Type or API keys. + + | Common Headers | Example Value | + |----------------|---------------| + | `Content-Type` | `application/json` | + | `Authorization` | `Bearer {{api_key}}` | + | `X-API-Key` | `your-api-key-here` | + + Click **+ Add Header** to add each header. + + + + Construct the data payload for your request. Typically required for POST, PUT, or PATCH methods. + + ```json + { + "phone": "{{caller_phone}}", + "name": "{{customer_name}}", + "action": "lookup" + } + ``` + + Variables are replaced with actual values at runtime. + + + + Specify a variable name and JSONPath expression to extract values from the API response. + + | Field | Description | + |-------|-------------| + | **Variable Name** | Name to store the extracted value | + | **JSONPath** | Path to the value in the response | + + **Example:** + + API returns: + ```json + { + "data": { + "name": "John Smith", + "tier": "premium" + } + } + ``` + + Extract mappings: + | Variable | JSONPath | + |----------|----------| + | `customer_name` | `$.data.name` | + | `account_tier` | `$.data.tier` | + + Use in later nodes: `"Hi {{customer_name}}, I see you're a {{account_tier}} member."` + + + +--- + +## Requirements + + +**API Call nodes have connection requirements:** +- Must have at least one incoming connection +- Must have at least one outgoing connection +- Must have an endpoint URL configured + + +Pre-Call API and Post-Call API nodes have different connection requirements since they run at specific points in the call lifecycle. + +--- + +## Example: Customer Lookup (Pre-Call) + +Load customer data before the conversation starts. + + + + | Field | Value | + |-------|-------| + | **Method** | GET | + | **URL** | `https://crm.example.com/api/customers?phone={{caller_phone}}` | + + + + | JSONPath | Variable | + |----------|----------| + | `$.customer.name` | `customer_name` | + | `$.customer.tier` | `account_tier` | + | `$.customer.last_order` | `last_order` | + + + + ``` + Hello {{customer_name|there}}! I see you're a {{account_tier}} + member. Your last order was {{last_order}}. + + How can I help you today? + ``` + + + +--- + +## Example: Mid-Conversation Booking + +Book an appointment based on information collected during the call. + + + + ``` + [Collect Preferred Time] + ↓ + [API Call: Book Appointment] + ↓ + [Confirm or Handle Error] + ``` + + + + | Field | Value | + |-------|-------| + | **Method** | POST | + | **URL** | `https://calendar.example.com/api/appointments` | + | **Headers** | `Content-Type: application/json` | + | **Body** | See below | + + ```json + { + "customer_phone": "{{caller_phone}}", + "customer_name": "{{customer_name}}", + "preferred_time": "{{collected_time}}", + "service_type": "consultation" + } + ``` + + + + ``` + [API Call: Book Appointment] + ├── {{booking_status}} == "confirmed" → [Confirmation Node] + ├── {{booking_status}} == "unavailable" → [Offer Alternative] + └── Error → [Fallback / Transfer] + ``` + + + +--- + +## Example: Post-Call CRM Update + +Log the call outcome after the conversation ends. + + + + | Field | Value | + |-------|-------| + | **Method** | POST | + | **URL** | `https://crm.example.com/api/call-logs` | + + + + ```json + { + "phone": "{{caller_phone}}", + "agent_id": "{{agent_id}}", + "call_id": "{{call_id}}", + "duration": "{{call_duration}}", + "outcome": "{{disposition}}", + "notes": "{{call_summary}}", + "collected_data": { + "budget": "{{collected_budget}}", + "timeline": "{{collected_timeline}}" + } + } + ``` + + + +--- + +## Using Variables in API Calls + +Variables work throughout your API configuration: + +**In URL:** +``` +https://api.example.com/orders/{{order_id}} +``` + +**In Headers:** +``` +Authorization: Bearer {{api_key}} +``` + +**In Body:** +```json +{ + "phone": "{{caller_phone}}", + "query": "{{customer_question}}" +} +``` + +→ See: [Variables](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/variables) + +--- + +## Tips + + + APIs can fail. Add branches to handle errors: + + ``` + [API Call] + ├── Success → Continue + └── Error → [Fallback Response or Transfer] + ``` + + In your fallback node: "I'm having trouble accessing that information right now. Let me connect you with someone who can help." + + + + For slow APIs, ensure your timeout is adequate — but remember the caller is waiting. Keep API calls as fast as possible. + + + + Test your API calls with actual responses to ensure: + - JSONPath expressions extract correctly + - Variables populate as expected + - Error handling works + + +--- + +## Related + + + + Full reference for API nodes + + + Use API response data in prompts + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/end-call.mdx b/fern/products/atoms/pages/platform/convo-flow/config/end-call.mdx new file mode 100644 index 0000000..1e084c7 --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/end-call.mdx @@ -0,0 +1,135 @@ +--- +title: "End Call" +sidebarTitle: "End Call" +description: "Configure when and how your agent ends calls." +--- + +In Conversational Flow, calls end when the conversation reaches an **End Call node**. This node terminates the call gracefully with a closing message. + +--- + +## How End Calls Work + +Unlike Single Prompt agents where end conditions trigger automatically, Conversational Flow agents end calls explicitly when the flow reaches an End Call node. + +``` +[Conversation Flow] + ↓ +[Final Question] +├── "User confirms" → [End Call: Resolved] +├── "User has more questions" → [Loop back] +└── "User wants transfer" → [Transfer Call] +``` + +--- + +## End Call Node Configuration + +Click an End Call node to configure it. + +| Field | Description | +|-------|-------------| +| **Name** | Identifier shown on canvas (e.g., "End - Resolved") | +| **Closing Message** | What the agent says before hanging up | + +### Example Closings + + + + ``` + Great! I'm glad I could help. Is there anything else + before I let you go? ... Perfect! Thank you for calling + Acme Support. Have a wonderful day! + ``` + + + + ``` + I'm connecting you now. Thanks for calling Acme, and + have a great day! + ``` + + + + ``` + Thanks so much for your interest. I'll have someone + send over some resources that might be helpful. Take care! + ``` + + + + ``` + It seems like you might have stepped away. Feel free to + call back anytime if you need assistance. Goodbye! + ``` + + + +--- + +## Multiple End Points + +Your flow will likely have multiple End Call nodes for different outcomes: + +| End Call Node | Use Case | +|---------------|----------| +| `End - Resolved` | Issue successfully handled | +| `End - Goodbye` | Customer said goodbye | +| `End - Transfer Complete` | After successful transfer | +| `End - No Response` | Caller became unresponsive | +| `End - Out of Scope` | Couldn't help, directed elsewhere | +| `End - Callback Scheduled` | Follow-up arranged | + + +**Every path must end.** Make sure all branches in your flow eventually reach an End Call or Transfer Call node. Use the validation feature to find orphaned paths. + + +--- + +## Best Practices + + + Don't assume the caller is done. Ask for confirmation: + + ``` + Is there anything else I can help you with today? + ``` + + Create branches: + - "User confirms they're done" → End Call + - "User has more questions" → Loop back + + + + Different endings deserve different messages: + + | Outcome | Closing Tone | + |---------|--------------| + | Resolved | Warm, celebratory | + | Not qualified | Helpful, redirecting | + | No response | Polite, inviting return | + | After transfer | Brief, professional | + + + + For complex interactions, summarize before closing: + + ``` + Just to recap — I've scheduled your appointment for Tuesday + at 3pm, and sent a confirmation to {{customer_email}}. + Is there anything else before we wrap up? + ``` + + +--- + +## Related + + + + Complete node reference + + + Hand off to human agents + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/general-settings.mdx b/fern/products/atoms/pages/platform/convo-flow/config/general-settings.mdx new file mode 100644 index 0000000..d63b74c --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/general-settings.mdx @@ -0,0 +1,40 @@ +--- +title: "General Settings" +sidebarTitle: "General Settings" +description: "Configure timeout behavior for idle conversations." +--- + +General Settings control how long your agent waits for a response before prompting the caller — and when to give up. + +**Location:** Agent Settings → General tab + + + ![General settings](../../building-agents/images/cf-general-settings.png) + + +--- + +## LLM Idle Timeout Settings + +Configure how long the agent waits for user response before sending an inactivity message. After 3 attempts with no response, the conversation automatically ends. + +| Setting | Default | Description | +|---------|---------|-------------| +| **Chat Timeout** | 60 sec | For text chat conversations | +| **Webcall Timeout** | 20 sec | For browser-based voice calls | +| **Telephony Timeout** | 20 sec | For phone calls | + +Each timeout triggers an inactivity prompt. If the user still doesn't respond after 3 prompts, the agent ends the conversation gracefully. + +--- + +## Next + + + + Try your agent before going live + + + Review and analyze calls + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/knowledge-base.mdx b/fern/products/atoms/pages/platform/convo-flow/config/knowledge-base.mdx new file mode 100644 index 0000000..3f4ba3d --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/knowledge-base.mdx @@ -0,0 +1,122 @@ +--- +title: "Knowledge Base" +sidebarTitle: "Knowledge Base" +description: "Give your agent access to reference documents." +--- + +A Knowledge Base is a repository of information your agent can search during conversations. Instead of cramming everything into the prompt, upload documents — product specs, policies, FAQs — and let the agent retrieve what it needs. + +**Location:** Settings tab → Model → Knowledge Base dropdown + + + ![Knowledge base](../../building-agents/images/kb-selection.png) + + + +In Conversational Flow agents, Knowledge Base is configured in the **Model tab** alongside the Global Prompt, not in a separate configuration panel. + + +--- + +## Setup + +1. Go to **Settings** tab +2. Open **Model** section +3. Find the **Knowledge Base** dropdown +4. Select a KB from the list (or "No Knowledge Base") + + +You need to create a Knowledge Base first before you can attach it here. See [Creating Knowledge Bases](/atoms/atoms-platform/features/knowledge-base). + + +--- + +## Local vs Global + +| Type | Scope | Best For | +|------|-------|----------| +| **Local** | This agent only | Agent-specific information | +| **Global** | Shared across agents | Company-wide policies, general FAQs | + +--- + +## How It Works + +1. **Caller asks a question** → "What's your return policy?" +2. **Agent searches KB** → Finds relevant documents automatically +3. **Agent responds** → Uses retrieved information in the answer + +The agent decides when to search based on the question. You don't need to configure triggers — it happens automatically when relevant. + +--- + +## KB + Global Prompt + +In Conversational Flow, both the Knowledge Base and Global Prompt are in the Model tab. They work together: + +| Component | Purpose | +|-----------|---------| +| **Global Prompt** | Personality, behavior guidelines, style instructions | +| **Knowledge Base** | Factual reference material, policies, product info | + +**Example Global Prompt instruction:** + +```markdown +When answering questions about products, pricing, or policies, +always check the knowledge base first. Don't guess at specific +details like prices or return windows. +``` + +--- + +## Best Practices + + + | Put in KB | Put in Prompt | + |-----------|---------------| + | Product details | Personality and tone | + | Policies | Conversation patterns | + | FAQ answers | End conditions | + | Pricing info | Behavioral guidelines | + + + + Smaller, topic-specific documents retrieve better than massive catch-all files. + + | Better | Worse | + |--------|-------| + | `returns-policy.pdf` | `everything.pdf` | + | `pricing-2024.txt` | `company-info.docx` (500 pages) | + | `product-specs-widget.md` | `all-products.pdf` | + + + + Ask your agent questions that should hit the KB. Verify it finds the right information: + + 1. Test with expected questions ("What's the return window?") + 2. Test with variations ("Can I return something after 30 days?") + 3. Check Conversation Logs to see what was retrieved + + + + Tell your agent when to consult the knowledge base: + + ``` + For any question about specific policies, pricing, or product + specifications, search the knowledge base. Never make up numbers + or policy details. + ``` + + +--- + +## Related + + + + Global Prompt and model configuration + + + Creating and managing knowledge bases + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/languages.mdx b/fern/products/atoms/pages/platform/convo-flow/config/languages.mdx new file mode 100644 index 0000000..20daaba --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/languages.mdx @@ -0,0 +1,53 @@ +--- +title: "Languages" +sidebarTitle: "Languages" +description: "Configure supported languages for your agent." +--- + +The Languages tab lets you define which languages your Conversational Flow agent can communicate in. + +**Location:** Settings tab → Languages + + + ![Languages settings](../../building-agents/images/cf-languages-tab.png) + + +--- + +## Configuration + +| Setting | Description | +|---------|-------------| +| **Supported Languages** | Languages your agent can speak and understand | +| **Primary Language** | Default language for starting conversations | + +--- + +## Adding Languages + +1. Open **Settings** tab +2. Go to **Languages** +3. Click **+ Add Language** +4. Select from available languages +5. Set one as primary + +Your agent will start conversations in the primary language but can switch to any supported language if the caller speaks differently. + +--- + +## Language Switching + +When combined with **Language Switching** in the Model tab, your agent can automatically detect when a caller speaks a different language and switch accordingly. + +--- + +## Next + + + + Configure speech speed and pronunciation + + + Set up Global Prompt and Knowledge Base + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/model-settings.mdx b/fern/products/atoms/pages/platform/convo-flow/config/model-settings.mdx new file mode 100644 index 0000000..af76185 --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/model-settings.mdx @@ -0,0 +1,88 @@ +--- +title: "Model Settings" +sidebarTitle: "Model Settings" +description: "Configure AI model, global prompt, and knowledge base." +--- + +Model Settings control how the AI behaves across your flow — the model powering responses, global instructions, and knowledge base connection. + +**Location:** Settings tab → Model + + + ![Model settings](../../building-agents/images/cf-model-settings.png) + + + +Conversational Flow agents have **Global Prompt** and **Knowledge Base** in this tab since individual prompts are defined per-node in the workflow. + + +--- + +## AI Model + +| Setting | Description | +|---------|-------------| +| **LLM Model** | The AI model (Electron, GPT-4o, etc.) | +| **Language** | Primary language for responses | + +--- + +## Global Prompt + +Set global instructions for your agent's personality and behavior. This applies across all nodes (limit: 4,000 characters). + +Use this for: +- Personality traits +- Behavioral guidelines +- Consistent phrasing +- Company-specific instructions + +Node-specific prompts add to the Global Prompt. Use Global Prompt for consistency, node prompts for specific questions. + +--- + +## Knowledge Base + +Connect a knowledge base for reference material. + +| Option | Description | +|--------|-------------| +| **No Knowledge Base** | Agent uses only prompts | +| **[Your KB]** | Agent can search for answers | + + +--- + +## Speech Formatting + +When enabled (default: ON), transcripts are automatically formatted for readability — punctuation, paragraphs, and proper formatting for dates and numbers. + +--- + +## Language Switching + +Enable your agent to switch languages mid-conversation (default: ON). + +### Advanced Settings + +| Setting | Default | Description | +|---------|---------|-------------| +| **Minimum Words** | 2 | Words before considering a switch | +| **Strong Signal** | 0.7 | Confidence for immediate switch | +| **Weak Signal** | 0.3 | Confidence for tentative detection | +| **Consecutive Weak** | 2 | Weak signals needed to switch | + +Defaults work for most cases. Adjust only if seeing unwanted switching. + +--- + +## Next + + + + Create and manage knowledge bases + + + Configure speech and pronunciation + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/phone-number.mdx b/fern/products/atoms/pages/platform/convo-flow/config/phone-number.mdx new file mode 100644 index 0000000..f6ae21f --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/phone-number.mdx @@ -0,0 +1,38 @@ +--- +title: "Phone Number" +sidebarTitle: "Phone Number" +description: "Assign a phone number to your agent." +--- + +The Phone Number tab lets you connect your agent to a phone number for inbound and outbound calls. Once assigned, callers to that number will reach this agent. + +**Location:** Settings tab → Phone Number + + + ![Phone number settings](../../building-agents/images/cf-phone-number.png) + + +--- + +## Select Phone Numbers + +Click the dropdown to choose from your available phone numbers. If you haven't set up any numbers yet, you'll see "No phone numbers selected." + +You can assign multiple numbers to the same agent if needed. + + +You need to purchase or configure phone numbers before they appear here. See [Phone Numbers](/atoms/atoms-platform/deployment/phone-numbers) to get started. + + +--- + +## Next + + + + Get and manage phone numbers + + + Set up outbound calling + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/transfer-call.mdx b/fern/products/atoms/pages/platform/convo-flow/config/transfer-call.mdx new file mode 100644 index 0000000..22a9c84 --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/transfer-call.mdx @@ -0,0 +1,215 @@ +--- +title: "Transfer Call" +sidebarTitle: "Transfer Call" +description: "Configure call transfers to human agents." +--- + +In Conversational Flow, transfers happen when the conversation reaches a **Transfer Call node**. This hands the call to a human agent via cold or warm transfer. + +--- + +## How Transfers Work + +Transfer Call nodes route callers to human agents when the AI can't (or shouldn't) handle the situation. + +``` +[Conversation Flow] + ↓ +[Handle Request] +├── AI can resolve → Continue flow +├── Needs human → [Transfer Call: Support] +└── VIP customer → [Transfer Call: Premium Line] +``` + +--- + +## Transfer Call Node Configuration + +Click a Transfer Call node to configure it. + +| Field | Required | Description | +|-------|----------|-------------| +| **Name** | Yes | Identifier (e.g., `transfer_to_sales`) | +| **Description** | Yes | Internal note about when this transfer triggers | +| **Phone Number** | Yes | Transfer destination with country code | +| **Transfer Type** | Yes | Cold or Warm | + +--- + +## Transfer Types + + + + **Immediate handoff.** The caller is connected directly to the destination without any briefing to the receiving agent. + + | Pros | Cons | + |------|------| + | Fast | No context for receiving agent | + | Simple | Caller may repeat themselves | + + **Best for:** + - Simple escalations + - When context isn't needed + - Time-sensitive transfers + - High call volume scenarios + + + + **AI briefs the agent first.** The receiving agent gets context before the caller joins. + + | Pros | Cons | + |------|------| + | Human has context | Slightly longer | + | Better experience | More configuration | + + **Best for:** + - Complex issues needing context + - VIP callers + - When continuity matters + + ### Warm Transfer Options + + | Setting | Description | + |---------|-------------| + | **On-hold Music** | What the caller hears while waiting for connection | + | **Transfer if Human** | Skip transfer if voicemail detected (coming soon) | + | **Whisper Message** | Private message only the agent hears before connecting | + | **Handoff Message** | What the AI says to brief the receiving agent | + | **Three-way Message** | Message both parties hear when connected | + + + +--- + +## Warm Transfer Messages + +### Whisper Message + +Only the agent hears this — give them everything they need: + +``` +Incoming transfer: Customer {{customer_name}} calling about +a billing dispute. They've been charged twice for order #{{order_id}}. +Identity already verified. +``` + +### Handoff Message + +What the AI says to brief the agent (can include variables): + +``` +Hi, I'm transferring a customer who needs help with a technical +issue. They've tried restarting their device without success. +``` + +### Three-way Message + +Both parties hear this — use it for smooth introductions: + +``` +I've connected you with Sarah from our billing team. +Sarah, {{customer_name}} is calling about a duplicate charge. +``` + +--- + +## Multiple Transfer Destinations + +Create multiple Transfer Call nodes for different scenarios: + +| Transfer Node | Destination | When | +|---------------|-------------|------| +| `Transfer - Sales` | Sales team | Purchase interest, demo requests | +| `Transfer - Support` | Support team | Technical issues | +| `Transfer - Billing` | Billing team | Payment questions | +| `Transfer - Manager` | Manager line | Escalations, complaints | + +--- + +## Examples + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_sales` | + | **Description** | "Transfer when caller expresses strong purchase intent" | + | **Phone Number** | +1-555-SALES | + | **Type** | Cold | + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_support` | + | **Description** | "Transfer when issue requires manual intervention" | + | **Phone Number** | +1-555-SUPPORT | + | **Type** | Warm | + | **Whisper** | "Technical issue: {{issue_type}}. Customer: {{customer_name}}" | + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_manager` | + | **Description** | "Transfer when caller is upset and requests a manager" | + | **Phone Number** | +1-555-MANAGER | + | **Type** | Warm | + | **Whisper** | "Escalation: Customer is frustrated about {{issue_summary}}" | + + + +--- + +## Best Practices + + + Tell the caller what's happening: + + ``` + I'm going to connect you with one of our billing specialists + who can help resolve this. It may take just a moment to connect. + Is that okay? + ``` + + + + When context matters, always use warm transfer: + + - The caller has explained a complex problem + - You've already verified their identity + - The issue requires sensitive handling + - The caller is frustrated + + + + Make whisper messages actionable: + + | Good | Bad | + |------|-----| + | "Billing dispute: double-charged $150, order #12345" | "Billing issue" | + | "Technical: Can't log in, tried password reset 3x" | "Login problem" | + | "VIP customer, account value $50k/year" | "Important customer" | + + + + What happens if the transfer line is busy or doesn't answer? + + Consider adding a branch that handles transfer failure: + - Offer to schedule a callback + - Provide an alternative contact method + - Return to AI handling with apology + + +--- + +## Related + + + + Complete node reference + + + Configure call termination + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/variables.mdx b/fern/products/atoms/pages/platform/convo-flow/config/variables.mdx new file mode 100644 index 0000000..baa145d --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/variables.mdx @@ -0,0 +1,76 @@ +--- +title: "Variables" +sidebarTitle: "Variables" +description: "Use dynamic values throughout your conversation flow." +--- + +Variables let you personalize conversations with dynamic data — caller information, API responses, or values you define. + +**Location:** Workflow tab → **{ } Variables** button (top right) + + + ![Variables panel](../../building-agents/images/cf-variables-panel.png) + + +--- + +## Variable Types + +| Type | Source | Example | +|------|--------|---------| +| **User Defined** | Variables you create | `{{company_name}}`, `{{promo_code}}` | +| **System** | Platform-provided (read-only) | `{{caller_phone}}`, `{{call_duration}}` | +| **API** | Extracted from API responses | `{{customer_name}}`, `{{account_tier}}` | + +--- + +## Syntax + +Use double curly braces anywhere in prompts or conditions: + +``` +Hello {{customer_name}}! Thanks for calling {{company_name}}. +``` + +### Default Values + +Handle missing variables with the pipe syntax: + +``` +Hello {{customer_name|there}}! +``` + +If `customer_name` is empty → "Hello there!" + +--- + +## System Variables + +| Variable | Description | +|----------|-------------| +| `{{caller_phone}}` | Caller's phone number | +| `{{call_time}}` | When call started | +| `{{call_duration}}` | Elapsed seconds | +| `{{call_direction}}` | "inbound" or "outbound" | +| `{{agent_id}}` | This agent's ID | +| `{{call_id}}` | Unique call identifier | + +--- + +## Creating Variables + +1. Click **{ } Variables** in the workflow tab +2. Go to **User Defined** tab +3. Click **+ Add Variable** +4. Enter name and default value + +--- + +## Extracting from APIs + +In API nodes, use **Extract Response Data** to create variables from responses: + +| JSONPath | Variable | +|----------|----------| +| `$.data.name` | `customer_name` | +| `$.data.tier` | `account_tier` | diff --git a/fern/products/atoms/pages/platform/convo-flow/config/voice-settings.mdx b/fern/products/atoms/pages/platform/convo-flow/config/voice-settings.mdx new file mode 100644 index 0000000..a740f5e --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/voice-settings.mdx @@ -0,0 +1,170 @@ +--- +title: "Voice Settings" +sidebarTitle: "Voice Settings" +description: "Fine-tune speech behavior, pronunciation, and voice detection." +--- + +Voice Settings give you precise control over how your agent sounds and listens. From speech speed to background ambiance, pronunciation rules to turn-taking — this is where you shape the audio experience. + +**Location:** Settings tab → Voice + + + ![Voice settings](../../building-agents/images/cf-voice-settings.png) + + +--- + +## Voice + +Select the voice for your agent. Click the dropdown to browse available voices — you can preview each one before selecting. + +--- + +## Speech Settings + +### Speech Speed + +Control how fast your agent speaks. + +| Control | Range | Default | +|---------|-------|---------| +| Slider | Slow ↔ Fast | 1 | + +Slide left for a more measured, deliberate pace. Slide right for quicker delivery. Find the sweet spot that matches your use case — slower often works better for complex information, faster for simple confirmations. + +--- + +## Pronunciation & Background + +### Pronunciation Dictionaries + +Add custom pronunciations for words that aren't pronounced correctly by the default voice. + +This is especially useful for: +- Brand names +- Technical terms +- Proper nouns +- Industry-specific jargon + +**To add a pronunciation:** Click **Add Pronunciation** to open the modal. + + + ![Add pronunciation](../../building-agents/images/cf-add-pronunciation.png) + + +| Field | Description | +|-------|-------------| +| **Word** | The word as written | +| **Pronunciation** | How it should sound | + +### Background Sound + +Add ambient audio behind your agent's voice for a more natural feel. + +| Option | Description | +|--------|-------------| +| **None** | Silent background (default) | +| **Office** | Subtle office ambiance | +| **Call Center** | Busy call center sounds | +| **Static** | Light static noise | +| **Cafe** | Coffee shop atmosphere | + +--- + +## Advanced Voice Settings + +### Mute User Until First Bot Response + +When enabled, the user's audio is muted until the agent's first response is complete. Useful for preventing early interruptions during the greeting. + +### Voicemail Detection + +Detects when a call goes to voicemail instead of reaching a live person. + + +Voicemail detection may not work as expected if **Release Time** is less than 0.6 seconds. + + +### Personal Info Redaction (PII) + +Automatically redacts sensitive personal information from transcripts and logs. + +### Denoising + +Filters out background noise and improves voice clarity before processing. This helps reduce false detections caused by environmental sounds — useful when callers are in noisy environments. + +--- + +## Voice Detection + +Fine-tune how your agent recognizes when someone is speaking. + +### Confidence + +Defines how strict the system is when deciding if detected sound is speech. + +- **Higher values** → Less likely to trigger on background noise +- **Lower values** → More sensitive to quiet speech + +| Default | Range | +|---------|-------| +| 0.70 | 0 – 1 | + +### Min Volume + +The minimum volume level required to register as speech. + +| Default | Range | +|---------|-------| +| 0.60 | 0 – 1 | + +### Trigger Time (Seconds) + +How long the system waits after detecting the start of user speech (and after the bot has finished speaking) before processing. This helps avoid overlapping speech and false triggers. + +| Default | Range | +|---------|-------| +| 0.10 | 0 – 1 | + +### Release Time (Seconds) + +How long the system waits after the user stops speaking before the bot begins its response. This ensures the user has completely finished their thought. + +| Default | Range | +|---------|-------| +| 0.30 | 0 – 1+ | + + +**Start with defaults.** Only adjust these if you're experiencing specific issues like missed words or premature responses. + + +--- + +## Smart Turn Detection + +Intelligent detection of when the caller is done speaking. When enabled, the agent uses context and speech patterns — not just silence — to determine when it's time to respond. + +--- + +## Interruption Backoff Timer + +Time in seconds to prevent interruptions after the bot starts speaking (default: 0, disabled). + +This helps prevent conversation loops when the user and bot interrupt each other — the agent will wait this duration before allowing itself to be interrupted again. + + +For per-node interruption control, use the **Uninterruptible** toggle on Default nodes. + + +--- + +## Next + + + + Configure AI model, Global Prompt, and Knowledge Base + + + Set timeout behavior + + diff --git a/fern/products/atoms/pages/platform/convo-flow/config/webhooks.mdx b/fern/products/atoms/pages/platform/convo-flow/config/webhooks.mdx new file mode 100644 index 0000000..635e526 --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/config/webhooks.mdx @@ -0,0 +1,29 @@ +--- +title: "Webhooks" +sidebarTitle: "Webhooks" +description: "Connect your agent to webhook endpoints." +--- + +Webhooks push real-time data to your systems when call events happen — starts, ends, analytics ready. Use them to update CRMs, create tickets, trigger workflows, or feed analytics pipelines. + +**Location:** Settings tab → Webhooks + +--- + +## Adding to Your Agent + +Once a webhook endpoint exists, connect it to your agent here. + + + ![Webhook in agent](../../building-agents/images/webhook-agent.png) + + +Select your webhook from the dropdown. The agent will now send events to that endpoint. + +--- + +## Next + + + Create endpoints, manage subscriptions, and view payload details + diff --git a/fern/products/atoms/pages/platform/convo-flow/from-template.mdx b/fern/products/atoms/pages/platform/convo-flow/from-template.mdx new file mode 100644 index 0000000..4053bdf --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/from-template.mdx @@ -0,0 +1,78 @@ +--- +title: "From Template" +sidebarTitle: "Start with Template" +description: "Jumpstart your Conversational Flow agent with pre-built templates." +--- + +Templates give you a proven starting point. Pick one that matches your use case, customize it, and you're ready to go. + +--- + +## Getting There + + + + From your dashboard, click the green **Create Agent** button in the top right. + + + + Select the second option in the modal. + + + Create agent modal + + + + + Use **Filter By** to narrow by industry, direction (inbound/outbound), or agent type. Click any template to select it, then hit **Create**. + + + Template selection screen + + + + +The editor opens with a complete workflow — nodes, branches, and settings ready to customize. + +--- + +## What Templates Include + +Each template comes with a complete starting point: + +| Component | What You Get | +|-----------|--------------| +| **Complete Workflow** | Nodes and branches for the entire conversation flow | +| **Node Prompts** | Pre-written prompts for each step | +| **Branch Conditions** | Logic already configured for common responses | +| **Voice Selection** | A voice that fits the use case | + +Templates follow proven conversation patterns for their use case. The structure is designed — you just need to customize the details. + +--- + +## Customizing Your Template + +Templates are starting points. Always replace the placeholders with your specifics: + +- **Company name and details** — Replace placeholder names with your actual business +- **Policies and rules** — Update return windows, hours, pricing, etc. +- **Node prompts** — Adjust the tone to match your brand +- **Branch conditions** — Add or modify paths for your specific needs + + +**Keep the structure.** Templates are organized intentionally. Replace the content, but keep the overall flow — it's designed for that use case. + + +--- + +## What's Next + + + + Understand the canvas and make changes + + + Validate before deploying + + diff --git a/fern/products/atoms/pages/platform/convo-flow/manual-setup.mdx b/fern/products/atoms/pages/platform/convo-flow/manual-setup.mdx new file mode 100644 index 0000000..ff6eaa6 --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/manual-setup.mdx @@ -0,0 +1,56 @@ +--- +title: "Manual Setup" +sidebarTitle: "Start from Scratch" +description: "Build a Conversational Flow agent from a blank canvas." +--- + +Starting from scratch gives you complete control. You'll land in an empty workflow builder ready for your design. + +--- + +## Getting There + + + + From your dashboard, click the green **Create Agent** button in the top right. + + + + Select the first option in the modal. + + + Create agent modal + + + + + Choose **Conversational Flow** as your agent type. + + + +The editor opens with an empty canvas — ready for you to build your workflow. + +--- + +## What's Next + +You're now in the editor. Here's where to go from here: + + + + Learn the canvas and start adding nodes + + + Every node type and how to use them + + + Create paths based on caller responses + + + Voice, model, and agent settings + + + + +**First time?** Start by dragging a Default node onto the canvas, writing a greeting, and hitting **Test Agent** (top right). You can build out the full flow as you go. + diff --git a/fern/products/atoms/pages/platform/convo-flow/node-types.mdx b/fern/products/atoms/pages/platform/convo-flow/node-types.mdx new file mode 100644 index 0000000..6e54832 --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/node-types.mdx @@ -0,0 +1,446 @@ +--- +title: "Node Types" +sidebarTitle: "Node Types" +description: "Complete reference for every node type in Conversational Flow." +--- + +Nodes are the building blocks of your conversation flow. Each type serves a specific purpose in guiding the conversation from start to finish. + +--- + +## Node Types at a Glance + +| Node | Purpose | When to Use | +|------|---------|-------------| +| [Default](#default-node) | Conversation step | Each conversation point where the agent speaks and listens | +| [API Call](#api-call-node) | External data | Fetch or send data mid-conversation | +| [Transfer Call](#transfer-call-node) | Handoff to human | Connect caller to a live agent | +| [End Call](#end-call-node) | Terminate call | Natural conversation endings | +| [Pre-Call API](#pre-call-api-node) | Load context | Get data before the call starts | +| [Post-Call API](#post-call-api-node) | Save data | Send data after the call ends | + +--- + +## Default Node + +The workhorse of your flow. Each Default node represents one step in the conversation where your agent speaks and waits for a response. + + + ![Default node](../building-agents/images/cf-node-default.png) + + +### Configuration + +Click a Default node to open its settings panel. + +| Field | Description | +|-------|-------------| +| **Name** | Identifier shown on the canvas | +| **Prompt** | What the agent says at this step | +| **Branches** | Output paths based on caller response | + +### Uninterruptible Mode + +When enabled, the user cannot interrupt the bot while it is speaking on this node. This ensures the bot completes its message before any user interaction is allowed. + +| Setting | Behavior | +|---------|----------| +| **OFF** (default) | Caller can interrupt at any time | +| **ON** | Bot must finish speaking before caller input is processed | + + +Use Uninterruptible mode for critical messages like legal disclaimers, confirmation summaries, or important instructions that shouldn't be cut off. + + +### Example Prompts + + + + ``` + Hi! Thanks for calling Acme Support. My name is Alex. + How can I help you today? + ``` + + + + ``` + I'd be happy to help with that. First, could you tell + me your account number or the email on file? + ``` + + + + ``` + Just to confirm — you'd like to schedule a demo for + next Tuesday at 2pm. Is that correct? + ``` + + + +--- + +## API Call Node + +Connect your agent to external systems mid-conversation. Fetch data, book appointments, update records — anything your APIs can do. + + + ![API Call node](../building-agents/images/cf-node-api-call.png) + + +### Requirements + + +**API Call nodes have connection requirements:** +- Must have at least one incoming connection +- Must have at least one outgoing connection +- Must have an endpoint URL configured + + +### Configuration + +Click an API Call node to configure the request. + + + + Select the HTTP method and enter the full API endpoint URL you want to call. + + | Field | Description | + |-------|-------------| + | **Method** | HTTP method (GET, POST, PUT, PATCH, DELETE) | + | **URL** | Full endpoint URL (e.g., `https://api.example.com/customers`) | + + Use variables in the URL for dynamic requests: + + ``` + https://api.example.com/customers/{{caller_phone}} + ``` + + + + Add custom headers to your API request. These are key-value pairs, often used for things like Content-Type or API keys. + + | Common Headers | Example Value | + |----------------|---------------| + | `Content-Type` | `application/json` | + | `Authorization` | `Bearer {{api_key}}` | + | `X-API-Key` | `your-api-key-here` | + + Click **+ Add Header** to add each header. + + + + Construct the data payload for your request. This is typically required for POST, PUT, or PATCH methods. + + ```json + { + "phone": "{{caller_phone}}", + "name": "{{customer_name}}", + "action": "lookup" + } + ``` + + Variables are replaced with actual values at runtime. + + + + Specify a variable name and a JSONPath expression to extract a specific value from the API response. This value is then stored in the variables for use in subsequent steps. + + | Field | Description | + |-------|-------------| + | **Variable Name** | Name to store the extracted value (e.g., `customer_name`) | + | **JSONPath** | Path to the value in the response (e.g., `$.data.name`) | + + **Example:** + + If your API returns: + ```json + { + "data": { + "name": "John Smith", + "tier": "premium", + "balance": 1500 + } + } + ``` + + Extract mappings: + | Variable | JSONPath | + |----------|----------| + | `customer_name` | `$.data.name` | + | `account_tier` | `$.data.tier` | + | `balance` | `$.data.balance` | + + Use in later nodes: `"Hi {{customer_name}}, I see you're a {{account_tier}} member."` + + + +### Branching on Results + +After extracting response data, you can branch based on the results: + +``` +[API Call: Check Account] +├── Success ({{account_tier}} == "premium") → VIP Flow +├── Success ({{account_tier}} == "basic") → Standard Flow +├── Error → Fallback / Transfer +``` + +--- + +## Transfer Call Node + +Hand the conversation to a human when needed — for escalations, complex issues, or high-value opportunities. + + + ![Transfer Call node](../building-agents/images/cf-node-transfer-call.png) + + +### Configuration + +| Field | Required | Description | +|-------|----------|-------------| +| **Name** | Yes | Identifier (e.g., `transfer_to_sales`) | +| **Description** | Yes | When this transfer should trigger | +| **Phone Number** | Yes | Transfer destination with country code | +| **Transfer Type** | Yes | Cold or Warm | + +### Transfer Types + + + + **Immediate handoff.** The caller is connected directly to the destination without any briefing to the receiving agent. + + | Pros | Cons | + |------|------| + | Fast | No context for receiving agent | + | Simple | Caller may repeat themselves | + + **Best for:** + - Simple escalations + - When context isn't needed + - Time-sensitive transfers + - High call volume scenarios + + + + **AI briefs the agent first.** The receiving agent gets context before the caller joins. + + | Pros | Cons | + |------|------| + | Human has context | Slightly longer | + | Better experience | More configuration | + + **Best for:** + - Complex issues needing context + - VIP callers + - When continuity matters + + ### Warm Transfer Options + + | Setting | Description | + |---------|-------------| + | **On-hold Music** | What the caller hears while waiting for connection | + | **Transfer if Human** | Skip transfer if voicemail detected (coming soon) | + | **Whisper Message** | Private message only the agent hears before connecting | + | **Handoff Message** | What the AI says to brief the receiving agent | + | **Three-way Message** | Message both parties hear when connected | + + **Example Whisper Message:** + ``` + Incoming transfer: Customer calling about a billing dispute. + They've been charged twice for order #12345. + Identity already verified. + ``` + + **Example Three-way Message:** + ``` + I've connected you with Sarah from our billing team. + Sarah, this customer is calling about a duplicate charge. + ``` + + + +--- + +## End Call Node + +Gracefully conclude the conversation when the interaction is complete. + + + ![End Call node](../building-agents/images/cf-node-end-call.png) + + +### Configuration + +| Field | Description | +|-------|-------------| +| **Name** | Identifier for this ending | +| **Closing Message** | Final words before hanging up | + +### Example Closings + + + + ``` + Great! You're all set. Is there anything else I can + help you with today? ... Perfect, thank you for calling + Acme. Have a wonderful day! + ``` + + + + ``` + I'm connecting you now. Thanks for calling Acme, and + have a great day! + ``` + + + + ``` + Thanks so much for your interest. I'll have someone + send over some resources that might be helpful. Take care! + ``` + + + + +**Every path must end.** Make sure all branches in your flow eventually reach an End Call or Transfer Call node. + + +--- + +## Pre-Call API Node + +Execute API calls *before* your agent says hello. Perfect for loading personalized data that shapes the entire conversation. + + + ![Pre-Call API node](../building-agents/images/cf-node-pre-call-api.png) + + +### When It Runs + +The Pre-Call API executes immediately when the call connects, before any conversation: + +1. Phone rings / Call connects +2. **Pre-Call API executes** ← Here +3. Data available in variables +4. Conversation starts (first Default node) + +### Configuration + +Pre-Call API uses the same configuration as the [API Call node](#api-call-node): + +- **Request:** Method + URL +- **Headers:** Key-value pairs +- **Body:** JSON payload (for POST requests) +- **Extract Response Data:** Variable name + JSONPath + +### Use Cases + +| Scenario | What to Fetch | +|----------|---------------| +| **CRM Lookup** | Customer history before greeting | +| **Account Status** | Check for open issues or alerts | +| **Personalization** | Load name, preferences, language | +| **Routing Logic** | VIP status, time zone, special handling | + +### Example + +``` +GET https://crm.example.com/lookup?phone={{caller_phone}} + +Response Mapping: + $.customer_name → customer_name + $.last_ticket → last_issue + $.tier → account_tier +``` + +Your greeting node can now say: + +``` +"Hi {{customer_name}}! Thanks for calling back. +Are you still having trouble with {{last_issue}}?" +``` + +--- + +## Post-Call API Node + +Trigger actions *after* the call ends. Perfect for logging outcomes, updating CRMs, and triggering follow-ups. + + + ![Post-Call API node](../building-agents/images/cf-node-post-call-api.png) + + +### When It Runs + +The Post-Call API executes after the conversation ends: + +1. Conversation ends (End Call node reached) +2. Call terminates +3. **Post-Call API executes** ← Here +4. Data saved externally + +### Configuration + +Post-Call API uses the same configuration as the [API Call node](#api-call-node): + +- **Request:** Method + URL +- **Headers:** Key-value pairs +- **Body:** JSON payload (typically POST) +- **Extract Response Data:** Variable name + JSONPath + +### Use Cases + +| Scenario | What to Send | +|----------|--------------| +| **CRM Logging** | Call summary, outcome, duration | +| **Ticket Creation** | Issue details, priority level | +| **Follow-up Triggers** | Send email confirmations, SMS receipts | +| **Analytics** | Custom metrics, conversion tracking | + +### Example + +``` +POST https://crm.example.com/calls + +Body: +{ + "phone": "{{caller_phone}}", + "duration": "{{call_duration}}", + "outcome": "{{disposition}}", + "notes": "{{call_summary}}", + "agent_id": "{{agent_id}}", + "collected": { + "name": "{{collected_name}}", + "budget": "{{collected_budget}}" + } +} +``` + +--- + +## Choosing the Right Node + +| If you need to... | Use | +|-------------------|-----| +| Ask a question or give information | Default Node | +| Make the bot uninterruptible for a message | Default Node with Uninterruptible ON | +| Get external data mid-conversation | API Call Node | +| Transfer to a human | Transfer Call Node | +| End the conversation | End Call Node | +| Load data before the call starts | Pre-Call API Node | +| Save data after the call ends | Post-Call API Node | + +--- + +## Next + + + + Create dynamic conversation paths + + + Use dynamic data in your nodes + + diff --git a/fern/products/atoms/pages/platform/convo-flow/overview.mdx b/fern/products/atoms/pages/platform/convo-flow/overview.mdx new file mode 100644 index 0000000..2ae0840 --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/overview.mdx @@ -0,0 +1,124 @@ +--- +title: "Conversational Flow Agents" +sidebarTitle: "Overview" +description: "Build structured, goal-oriented agents with visual workflows." +tag: "Legacy" +--- + + +Conversational Flow is the original agent type. For most use cases, we now recommend [Single Prompt agents](/atoms/atoms-platform/single-prompt-agents/overview) — they're faster to set up and more flexible. Conversational Flow remains ideal for structured, multi-step processes like lead qualification, booking, and intake forms. + + +A Conversational Flow agent guides callers through a designed path. You create a visual workflow of nodes — each representing a step in the conversation — and connect them with branches that determine where the conversation goes based on what the caller says. + +--- + +## When to Use + +Conversational Flow is ideal for **structured, goal-oriented conversations** — lead qualification, appointment booking, surveys, intake forms. Choose it when you need specific data collected in a specific order, or when different responses should lead to fundamentally different paths. + +For **open-ended, flexible conversations** like general support or FAQs, consider [Single Prompt](/atoms/atoms-platform/single-prompt-agents/overview) instead. + +--- + +## How It Works + +Think of your workflow as a roadmap. Each node represents a step where the agent takes action — asking a question, making an API call, or transferring the caller. Branches connect these steps, and the caller's responses determine which path to take. + +Unlike Single Prompt agents that interpret instructions dynamically, Conversational Flow agents follow your designed structure. This gives you predictable, consistent conversations — every caller gets the same thorough experience. + +--- + +## Capabilities + +**Visual workflow design.** Drag nodes onto a canvas, connect them with branches, and see your entire conversation flow at a glance. Complex logic becomes manageable when you can see it. + +**Precise data collection.** Each node can collect specific information. You control exactly what gets asked, in what order, and what happens based on the answers. + +**Mid-conversation API calls.** Nodes can fetch external data, check availability, update CRMs, or trigger any API — and branch based on the results. + +**Multiple paths to multiple outcomes.** Different caller responses lead to different experiences. Qualified leads go to sales, support issues go to technicians, everyone gets the right path. + +--- + +## Building a Conversational Flow Agent + +You'll create three things: + +**1. The Workflow** + +This is the core. Your workflow includes: +- **Nodes** — Each step: greetings, questions, API calls, transfers, endings +- **Branches** — Conditions that route callers based on their responses +- **Variables** — Dynamic data used throughout the conversation + +**2. Global Prompt** (optional) + +Set personality and behavior guidelines that apply across all nodes. This keeps your agent consistent without repeating instructions in every node. + +**3. Voice and Model** + +Pick the voice your agent speaks with and the AI model that powers its understanding. + +--- + +## The Editor + +Once you create a Conversational Flow agent, you land in the editor with two main tabs. + + + + + ![Workflow tab](../building-agents/images/cf-editor-workflow-tab.png) + + + | Area | Location | What It Does | + |------|----------|--------------| + | **Node Palette** | Left panel | Drag nodes onto your workflow | + | **Canvas** | Center | Where you build and visualize your flow | + | **Variables** | Top right button | Manage flow-wide variables | + | **Node Config** | Right panel | Configure selected node | + + + + + ![Settings tab](../building-agents/images/cf-editor-settings-tab.png) + + + | Section | What It Configures | + |---------|-------------------| + | **Languages** | Supported languages for your agent | + | **Voice** | Speech speed, pronunciation, turn detection | + | **Model** | AI model, Global Prompt, Knowledge Base | + | **Phone Number** | Assigned phone number | + | **Webhooks** | Event notifications | + | **General** | Timeout settings | + + + +--- + +## After You Launch + +Once your agent is live, refinement happens in a few places: + +**Flow adjustments.** Review call logs, find where callers drop off or get stuck, and refine your nodes and branches. + +**Prompt updates.** Tweak individual node prompts or the global prompt to improve how the agent sounds and responds. + +**Voice tuning.** Adjust speech speed, add pronunciation rules, tweak turn-taking behavior. + +**Branch refinement.** Add new conditions, adjust thresholds, handle edge cases you discover. + +--- + +## Get Started + + + + Blank canvas with full control over your workflow + + + Pre-built flows for common use cases + + \ No newline at end of file diff --git a/fern/products/atoms/pages/platform/convo-flow/workflow-builder.mdx b/fern/products/atoms/pages/platform/convo-flow/workflow-builder.mdx new file mode 100644 index 0000000..94ed2eb --- /dev/null +++ b/fern/products/atoms/pages/platform/convo-flow/workflow-builder.mdx @@ -0,0 +1,108 @@ +--- +title: "Workflow Builder" +sidebarTitle: "Workflow Builder" +description: "The visual canvas for designing conversation flows." +--- + +The workflow builder is where you design your conversation. Drag nodes onto the canvas, connect them with branches, and see your entire flow at a glance. + +--- + +## The Interface + + + ![Workflow builder](../building-agents/images/cf-workflow-builder.png) + + +| Area | Location | Purpose | +|------|----------|---------| +| **Node Palette** | Left panel | All available node types to drag onto canvas | +| **Canvas** | Center | Your visual workspace | +| **Node Config** | Right panel | Settings for the selected node | +| **Variables** | Top right button | Manage flow-wide variables | +| **Controls** | Bottom | Auto-layout, zoom, feedback | + +--- + +## Adding Nodes + +1. Find the node type in the left palette +2. Drag it onto the canvas +3. Release where you want it placed + +Every flow starts with a **Start** node (the green pill). Connect your first node to Start to begin the conversation. + +--- + +## Connecting Nodes + +1. Hover over a node to see connection handles (small circles) +2. Drag from an output handle +3. Drop onto another node's input handle + +Connections show conversation flow. When a node finishes, the conversation moves to the connected node. + +--- + +## Configuring Nodes + +Click any node to open its settings in the right panel. Each node type has different options: + +| Node | Key Settings | +|------|--------------| +| **Default** | Name, Prompt, Branches, Uninterruptible toggle | +| **API Call** | Method, URL, Headers, Body, Response extraction | +| **Transfer Call** | Phone number, Transfer type, Warm transfer messages | +| **End Call** | Closing message | + +--- + +## Variables Panel + +Click **{ } Variables** (top right) to manage variables: + +| Tab | Contents | +|-----|----------| +| **User Defined** | Variables you create | +| **System** | Platform-provided (caller_phone, call_duration, etc.) | +| **API** | Values extracted from API responses | + +Use variables in any prompt with `{{variable_name}}` syntax. + +--- + +## Canvas Controls + +| Control | Function | +|---------|----------| +| **Auto-layout** | Automatically organize nodes | +| **Zoom +/-** | Adjust view | +| **Pan** | Click and drag empty space | + + +Use **Auto-layout** often. It keeps your flow readable as it grows. + + +--- + +## Keyboard Shortcuts + +| Shortcut | Action | +|----------|--------| +| `Delete` | Delete selected | +| `Cmd/Ctrl + Z` | Undo | +| `Cmd/Ctrl + Shift + Z` | Redo | +| `Escape` | Deselect | + +--- + +## Next + + + + Every node and how to configure it + + + Create paths based on responses + + diff --git a/fern/products/atoms/pages/platform/cookbooks/using-cookbooks.mdx b/fern/products/atoms/pages/platform/cookbooks/using-cookbooks.mdx new file mode 100644 index 0000000..f643a5a --- /dev/null +++ b/fern/products/atoms/pages/platform/cookbooks/using-cookbooks.mdx @@ -0,0 +1,45 @@ +--- +title: "Cookbooks" +sidebarTitle: "Cookbooks" +description: "Code examples, templates, and demos for common voice AI use cases." +--- + +Explore ready-to-use examples and see what's possible with Atoms. + +--- + +## Code Examples + +The [Atoms Cookbook](https://github.com/smallest-inc/cookbook) on GitHub has runnable examples for common use cases — clone, configure, and deploy. + + + Text-to-speech demos, agent templates, and integration examples. + + +--- + +## Showcase + +See real projects built with Atoms by the community and our team. + + + Browse live demos and project galleries. + + +--- + +## SDK Examples + +For developer-focused examples with full source code and deployment guides: + + + End-to-end agent examples — from hello world to multi-agent orchestration. + + +--- + +## Community + + + Share your projects, get feedback, and see what others are building. + \ No newline at end of file diff --git a/fern/products/atoms/pages/platform/deployment/audiences.mdx b/fern/products/atoms/pages/platform/deployment/audiences.mdx new file mode 100644 index 0000000..0902b57 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/audiences.mdx @@ -0,0 +1,161 @@ +--- +title: "Audiences" +description: "Create and manage contact lists for outbound campaigns" +--- + +Audiences are your contact lists — the people your agents will call during campaigns. Each audience contains phone numbers and any additional information you want your agent to reference during conversations. + +--- + +## Your Audiences + + + Audiences list + + +The Audiences page shows all your contact lists at a glance. + +| Column | Description | +|--------|-------------| +| Name | Audience name and ID | +| Campaigns | Campaigns using this audience | +| Members | Number of contacts | +| Created on | When the audience was created | +| Last modified | Last update time | +| Actions | Three-dot menu for options | + +**Sorting Options:** +- Created on +- Total members +- Last modified + +**Filter Options:** +- All Audiences +- With Campaigns +- Without Campaigns + +--- + +## Creating an Audience + +Click **Create Audience** (green button, top right) to start a three-step process. + + + + + Upload CSV step + + + + All recipients must have explicit consent to be contacted. Atoms prohibits purchased, rented, or third-party phone lists. + + + Upload a CSV with your contacts (max 10 MB). Your CSV needs a phone number column—everything else is flexible. + + ```csv + phoneNumber,Name,Email,CustomerID + 919999900000,John,john@example.com,CUST001 + 919999900001,Alice,alice@example.com,CUST002 + ``` + + Phone numbers should be in international format (`+919999900000` or `919999900000`). + + + + Tell us which column contains the phone numbers. + + + Map phone number column + + + You'll see a preview of your CSV data. Select the column that contains valid phone numbers from the dropdown. + + + Only the phone number column needs to be mapped—everything else is up to you. Include any additional information you want available during calls (names, order IDs, account numbers, etc.). + + + + + Give your audience a name and optional description. + + + Name your audience + + + | Field | Required | Example | + |-------|----------|---------| + | Audience Name | Yes | VIP Customers List | + | Description | No | High-value customers for premium promotions | + + Click **Create Audience** to finish. + + + +--- + +## Managing Audience Members + +Click any audience to view and manage its contacts. + + + Audience members view + + +Here you can: +- **Search** for specific members +- **Select contacts** using checkboxes +- **Delete** selected contacts with the Delete button +- **Add new** contacts with the Add New button + +--- + +## Adding Members to Existing Audiences + +Click **Add New** to add contacts to an existing audience. You have two options: + + + + + Add member manually + + + Fill in the contact details one at a time: + - **phoneNumber** (required) — in international format + - Plus any other fields defined in your audience + + Click **Add Member** to save. + + + + + Import CSV to audience + + + Upload another CSV file to add contacts in bulk. + + - CSV headers should match your audience fields + - Maximum file size: 10 MB + - Click to upload or drag and drop + + + +--- + +## Using Audience Data in Calls + +The columns you upload become available as variables during campaigns. If your CSV has a "Name" column, your agent can greet callers by name. + +See [Variables](/atoms/atoms-platform/single-prompt-agents/configuration-panel/variables) for more. + +--- + +## Related + + + + Create outbound calling programs + + + Manage your phone numbers + + diff --git a/fern/products/atoms/pages/platform/deployment/audiences/creating-audiences.mdx b/fern/products/atoms/pages/platform/deployment/audiences/creating-audiences.mdx new file mode 100644 index 0000000..0f428d0 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/audiences/creating-audiences.mdx @@ -0,0 +1,140 @@ +--- +title: "Creating Audiences" +sidebarTitle: "Creating Audiences" +description: "Build contact lists for your outbound campaigns." +--- + +Audiences are contact lists used for campaigns. Create an Audience with phone numbers and optional data, then use it across multiple campaigns. + +--- + +## Location + +**Left Sidebar → Deploy → Audiences** + +--- + +## Creating an Audience + +→ **NEEDS PLATFORM INFO:** Audience creation interface + +### Step 1: Start Creation + +Click **"Create Audience"** button. + +### Step 2: Basic Details + +| Field | Description | Example | +|-------|-------------|---------| +| **Name** | Audience identifier | "Q4 Leads" | +| **Description** | Optional notes | "Leads from October event" | + +### Step 3: Add Contacts + +Two methods: + +**Upload File:** +- CSV or Excel +- Must include phone column +- Can include any custom fields + +**Manual Entry:** +- Add contacts one by one +- Good for small lists + +### Step 4: Map Fields + +After upload, map columns to fields: + +| Your Column | Maps To | +|-------------|---------| +| "Phone Number" | phone (required) | +| "First Name" | name | +| "Account ID" | account_id | +| "Amount" | amount | + +Custom fields become variables: `{{account_id}}`, `{{amount}}` + +### Step 5: Save + +Click **Save** to create the Audience. + +--- + +## File Format Requirements + +### Required Column + +- **Phone number** — In any standard format + +### Optional Columns + +Any additional data you want to use as variables: +- Name +- Email +- Appointment date +- Account number +- Custom fields + +### Example CSV + +```csv +phone,name,appointment_date,amount ++15551234567,John Smith,2024-07-20,150.00 ++15559876543,Jane Doe,2024-07-21,225.00 ++15555555555,Bob Johnson,2024-07-22,175.00 +``` + +--- + +## Using Variables from Audiences + +Audience data populates agent variables: + +**In your agent prompt:** +``` +Hi {{name}}, this is a reminder about your appointment on {{appointment_date}}. +Your balance is ${{amount}}. +``` + +**During the call:** +``` +Hi John Smith, this is a reminder about your appointment on July 20th. +Your balance is $150.00. +``` + +--- + +## Best Practices + +### Data Quality + +| Do | Don't | +|----|-------| +| Verify phone numbers | Use invalid numbers | +| Keep data current | Use stale lists | +| Standardize formatting | Mix formats | + +### Phone Number Formatting + +Include country code when possible: +- Good: +15551234567 +- Okay: 5551234567 +- Bad: 555-1234 (incomplete) + +### Consent + +Ensure you have appropriate consent to contact everyone in your Audience. + +--- + +## What's Next + + + + Connect to campaigns + + + Start calling + + diff --git a/fern/products/atoms/pages/platform/deployment/audiences/using-with-campaigns.mdx b/fern/products/atoms/pages/platform/deployment/audiences/using-with-campaigns.mdx new file mode 100644 index 0000000..7c2471d --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/audiences/using-with-campaigns.mdx @@ -0,0 +1,113 @@ +--- +title: "Using Audiences with Campaigns" +sidebarTitle: "Using with Campaigns" +description: "Connect your contact lists to outbound campaigns." +--- + +Once you have an Audience, you can use it with multiple campaigns. This page covers how Audiences and Campaigns work together. + +--- + +## Connecting to Campaigns + +When creating or editing a campaign: + +1. Navigate to the Audience selection step +2. Choose from available Audiences +3. All contacts in that Audience become campaign targets + +--- + +## One Audience, Multiple Campaigns + +An Audience can be used in multiple campaigns: + +| Audience: "October Leads" | | +|---------------------------|---| +| Campaign 1 | Follow-up calls | +| Campaign 2 | Event invitation | +| Campaign 3 | Survey collection | + +Each campaign tracks its own progress independently. + +--- + +## Filtering Audiences + +When using an Audience in a campaign, you may be able to filter: + +| Filter | Example | +|--------|---------| +| **By field value** | Only contacts where `status = "active"` | +| **By date** | Contacts added after specific date | +| **By previous campaigns** | Exclude contacts from other campaigns | + +→ **NEEDS PLATFORM INFO:** Filtering capabilities + +--- + +## Data Flow + +Audience data flows to your agent during calls: + +``` +Audience: "October Leads" +├── Contact: John Smith, +15551234567, amount: $150 +│ └── Agent receives: {{name}}, {{phone}}, {{amount}} +│ +├── Contact: Jane Doe, +15559876543, amount: $225 +│ └── Agent receives: {{name}}, {{phone}}, {{amount}} +│ +└── ... +``` + +Each call gets that contact's specific data. + +--- + +## Updating Audiences + +What happens when you update an Audience that's in use: + +| Change | Active Campaign | Future Campaigns | +|--------|-----------------|------------------| +| Add contacts | Added to queue | Included | +| Remove contacts | Removed from queue (if pending) | Not included | +| Update data | Uses new data | Uses new data | + +--- + +## Best Practices + +### Organize by Purpose + +Create Audiences for specific purposes: +- "Appointment Reminders - December" +- "Past Customers - Reactivation" +- "Event Attendees - 2024" + +### Keep Current + +Regularly update Audiences: +- Remove invalid numbers +- Update contact information +- Remove unsubscribed contacts + +### Track Usage + +Know which campaigns used which Audiences: +- Avoid over-contacting +- Analyze performance by Audience + +--- + +## What's Next + + + + Start a new campaign + + + Track performance + + diff --git a/fern/products/atoms/pages/platform/deployment/campaigns.mdx b/fern/products/atoms/pages/platform/deployment/campaigns.mdx new file mode 100644 index 0000000..7a8f928 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/campaigns.mdx @@ -0,0 +1,134 @@ +--- +title: "Campaigns" +description: "Create and manage outbound calling campaigns" +--- + +Campaigns let you reach out to your audiences automatically. Set up an agent, pick an audience, and let Atoms call through your contact list. + +--- + +## Your Campaigns + + + Campaigns list + + +The Campaigns page shows all your outbound calling programs. + +| Column | Description | +|--------|-------------| +| Campaign name | Name and ID | +| Created on | When the campaign was created | +| Last modified | Last update time | +| Contact list | Linked audience | +| Connected agent | Agent making the calls | +| Status | Draft, Scheduled, Active, Paused, Completed, or Failed | + +**Sorting:** Created on, Last modified + +**Filters:** All, Draft, Scheduled, Active, Paused, Completed + +--- + +## Creating a Campaign + +Click **Create Campaign** (green button, top right). + + + Create Campaign form + + +| Field | Required | Description | +|-------|----------|-------------| +| **Campaign name** | Yes | Max 40 characters | +| **Description** | No | Notes about this campaign's purpose | +| **Select Audience** | Yes | Which contact list to call | +| **Select Agent** | Yes | Which agent makes the calls | +| **Max Retries** | No | Times to retry unanswered calls (0-10). Set to 0 to disable. | +| **Retry Delay** | No | Wait time before retrying (1-1440 minutes) | +| **Schedule Campaign** | No | Set timezone + date/time to start automatically. Leave empty to save as draft. | + +Click **Create Campaign** when done. + + +Leave scheduling empty to save as a draft. You can start it manually later. + + +--- + +## Campaign Analytics + +Click any campaign to view its performance. You'll see summary cards at the top: + +| Metric | Description | +|--------|-------------| +| Total Participants | Contacts in the audience | +| Contacts Called | How many were attempted (% of total) | +| Contacts Connected | Successful connections (% success rate) | + +Below that, three tabs show detailed analytics: + + + + + Call Logs tab + + + Individual call records for this campaign. Same interface as [Conversation Logs](/atoms/atoms-platform/analytics-logs/conversation-logs)—click any call to see the full transcript, events, and metrics. + + + + + Campaign Events tab + + + Timeline of campaign lifecycle events. + + | Column | Description | + |--------|-------------| + | Date | When the event occurred | + | Trigger Source | What triggered the event (system, manual) | + | Event Action | What happened (started, paused, completed, etc.) | + + + + + Executions tab + + + Execution runs and their results. + + **Summary:** Total Executions, Completed, Processing, Failed + + **Execution History** shows each run with: + - Start and completion time + - Progress bar + - Total Members, Processed, Failed, Duration + + + +--- + +## Campaign Statuses + +| Status | Meaning | +|--------|---------| +| Draft | Saved but not scheduled or started | +| Scheduled | Set to start at a future time | +| Active | Currently making calls | +| Paused | Temporarily stopped | +| Completed | All contacts processed | +| Failed | Encountered an error | + +--- + +## Related + + + + Create contact lists for campaigns + + + Get numbers for outbound calls + + diff --git a/fern/products/atoms/pages/dev/build/campaigns/campaign-analytics.mdx b/fern/products/atoms/pages/platform/deployment/campaigns/analytics.mdx similarity index 58% rename from fern/products/atoms/pages/dev/build/campaigns/campaign-analytics.mdx rename to fern/products/atoms/pages/platform/deployment/campaigns/analytics.mdx index d71ec5c..fe22c99 100644 --- a/fern/products/atoms/pages/dev/build/campaigns/campaign-analytics.mdx +++ b/fern/products/atoms/pages/platform/deployment/campaigns/analytics.mdx @@ -1,6 +1,7 @@ --- title: "Campaign Analytics" -description: "Track campaign performance" --- # Campaign Analytics + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/deployment/campaigns/campaign-analytics.mdx b/fern/products/atoms/pages/platform/deployment/campaigns/campaign-analytics.mdx new file mode 100644 index 0000000..ca1c1c0 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/campaigns/campaign-analytics.mdx @@ -0,0 +1,110 @@ +--- +title: "Campaign Analytics" +sidebarTitle: "Campaign Analytics" +description: "Track and analyze campaign performance." +--- + +Campaign analytics help you understand how your outbound calling campaigns are performing. Monitor in real-time and analyze after completion. + +--- + +## Location + +**Deploy → Campaigns → [Your Campaign] → Analytics** + +--- + +## Key Metrics + +→ **NEEDS PLATFORM INFO:** Campaign analytics interface + +### Volume Metrics + +| Metric | Description | +|--------|-------------| +| **Total Contacts** | Number of contacts in campaign | +| **Calls Made** | Number of call attempts | +| **Contacts Reached** | Successful connections | +| **Pending** | Remaining to call | + +### Performance Metrics + +| Metric | Description | +|--------|-------------| +| **Connection Rate** | % of calls that connected | +| **Completion Rate** | % of conversations completed | +| **Avg Call Duration** | Average length of calls | +| **Success Rate** | % achieving desired outcome | + +### Outcome Breakdown + +| Outcome | Count | +|---------|-------| +| Successful | Calls achieving goal | +| Unsuccessful | Calls not achieving goal | +| Voicemail | Messages left | +| No Answer | Couldn't reach | +| Failed | Technical failures | + +--- + +## Real-Time Monitoring + +During active campaigns: + +- Current calls in progress +- Queue remaining +- Today's progress +- Estimated completion time + +--- + +## Post-Campaign Analysis + +After campaign completes: + +### Overall Summary + +- Total reach rate +- Goal completion rate +- Time to complete +- Cost per contact + +### Trend Analysis + +- Performance over time +- Best times to call +- Day-of-week patterns + +### Export Data + +Download detailed data for deeper analysis: +- Contact-level results +- Call transcripts +- Custom metrics extracted + +--- + +## Improving Performance + +Use analytics to improve future campaigns: + +| Finding | Action | +|---------|--------| +| Low connection rate | Adjust calling times | +| Short calls | Improve opening | +| High hang-ups | Refine value proposition | +| Low conversion | Optimize script/flow | + +--- + +## What's Next + + + + Build better contact lists + + + Platform-wide analytics + + diff --git a/fern/products/atoms/pages/platform/deployment/campaigns/creating-campaign.mdx b/fern/products/atoms/pages/platform/deployment/campaigns/creating-campaign.mdx new file mode 100644 index 0000000..a0a6204 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/campaigns/creating-campaign.mdx @@ -0,0 +1,102 @@ +--- +title: "Creating a Campaign" +sidebarTitle: "Creating a Campaign" +description: "Set up an outbound calling campaign." +--- + +This guide walks you through creating your first outbound calling campaign. + +--- + +## Location + +**Left Sidebar → Deploy → Campaigns** + +--- + +## Steps to Create + +→ **NEEDS PLATFORM INFO:** Campaign creation interface + +### Step 1: Start Creation + +Click **"Create Campaign"** button. + +### Step 2: Basic Details + +| Field | Description | Example | +|-------|-------------|---------| +| **Name** | Campaign identifier | "Q4 Appointment Reminders" | +| **Description** | Optional notes | "Reminder calls for scheduled appointments" | + +### Step 3: Select Agent + +Choose which agent will make the calls. This should be an agent designed for outbound conversations. + +### Step 4: Select Audience + +Choose the contact list to call: +- Select from existing Audiences +- Or create a new Audience + +→ See: [Creating Audiences](/atoms/atoms-platform/deployment/audiences) + +### Step 5: Configure Schedule + +| Setting | Description | +|---------|-------------| +| **Start Date** | When to begin calling | +| **End Date** | When to stop | +| **Calling Hours** | Time window for calls (respects time zones) | +| **Days** | Which days to call | + +### Step 6: Configure Settings + +| Setting | Description | +|---------|-------------| +| **Caller ID** | Number shown to recipients | +| **Retry Rules** | How many times to retry unanswered | +| **Call Spacing** | Minimum time between calls | +| **Voicemail** | Leave message or skip | + +### Step 7: Review and Launch + +Review all settings, then: +- **Save as Draft** — Save for later +- **Launch** — Start the campaign + +--- + +## Campaign States + +| State | Description | +|-------|-------------| +| **Draft** | Created but not started | +| **Active** | Currently making calls | +| **Paused** | Temporarily stopped | +| **Completed** | All contacts called | +| **Cancelled** | Manually stopped | + +--- + +## Before Launching + +Verify: +- [ ] Agent is tested and locked +- [ ] Audience is complete and accurate +- [ ] Schedule is appropriate for your contacts +- [ ] Caller ID is set correctly +- [ ] Retry rules make sense for your use case + +--- + +## What's Next + + + + Work with campaign contacts + + + Track performance + + diff --git a/fern/products/atoms/pages/platform/deployment/campaigns/creating.mdx b/fern/products/atoms/pages/platform/deployment/campaigns/creating.mdx new file mode 100644 index 0000000..07c749d --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/campaigns/creating.mdx @@ -0,0 +1,7 @@ +--- +title: "Creating Campaign" +--- + +# Creating Campaign + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/deployment/campaigns/managing-contacts.mdx b/fern/products/atoms/pages/platform/deployment/campaigns/managing-contacts.mdx new file mode 100644 index 0000000..fa1674b --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/campaigns/managing-contacts.mdx @@ -0,0 +1,107 @@ +--- +title: "Managing Campaign Contacts" +sidebarTitle: "Managing Contacts" +description: "Work with contacts in your campaigns." +--- + +After launching a campaign, you can manage contacts — view their status, add new contacts, or remove contacts from the queue. + +--- + +## Location + +**Deploy → Campaigns → [Your Campaign] → Contacts** + +--- + +## Contact Statuses + +Each contact in a campaign has a status: + +| Status | Meaning | +|--------|---------| +| **Pending** | Not yet called | +| **In Progress** | Currently being called | +| **Completed** | Successfully reached | +| **Failed** | Could not connect | +| **Voicemail** | Left voicemail | +| **No Answer** | No answer after retries | +| **Skipped** | Manually skipped | + +--- + +## Viewing Contacts + +→ **NEEDS PLATFORM INFO:** Contact list interface + +The contact list shows: + +| Column | Description | +|--------|-------------| +| **Phone** | Contact phone number | +| **Name** | Contact name (if provided) | +| **Status** | Current call status | +| **Attempts** | Number of call attempts | +| **Last Attempt** | When last called | +| **Outcome** | Final disposition | + +--- + +## Adding Contacts + +During a campaign, you can add more contacts: + +1. Navigate to campaign contacts +2. Click **"Add Contacts"** +3. Upload file or select from Audience +4. New contacts enter the queue + +--- + +## Removing Contacts + +To remove contacts from the queue: + +1. Find the contact(s) +2. Select them +3. Click **"Remove"** or **"Skip"** + +Removed contacts won't be called. + +--- + +## Retrying Failed Contacts + +For contacts that failed: + +1. Filter by "Failed" status +2. Review failure reasons +3. Select contacts to retry +4. Click **"Retry"** + +They re-enter the queue. + +--- + +## Exporting Results + +After campaign completion: + +1. Go to campaign contacts +2. Click **"Export"** +3. Download CSV with all contact data and outcomes + +Use for follow-up or analysis. + +--- + +## What's Next + + + + Analyze performance + + + Build contact lists + + diff --git a/fern/products/atoms/pages/platform/deployment/campaigns/what-are-campaigns.mdx b/fern/products/atoms/pages/platform/deployment/campaigns/what-are-campaigns.mdx new file mode 100644 index 0000000..c1c096d --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/campaigns/what-are-campaigns.mdx @@ -0,0 +1,115 @@ +--- +title: "What Are Campaigns" +sidebarTitle: "What Are Campaigns" +description: "Run outbound calling programs at scale with your voice AI agent." +--- + +Campaigns let your agent make outbound calls — proactively reaching out to customers rather than waiting for them to call. Upload a contact list, configure your agent, and let Atoms handle the calling at scale. + +--- + +## How Campaigns Work + +1. **Create a campaign** — Name, agent, and settings +2. **Add contacts** — Upload or create an Audience +3. **Configure** — Timing, retry rules, variables +4. **Launch** — Atoms starts calling +5. **Monitor** — Track progress and results + +--- + +## Use Cases + +### Appointment Reminders + +``` +"Hi {{name}}, this is a reminder about your appointment +tomorrow at {{time}}. Would you like to confirm or reschedule?" +``` + +### Payment Reminders + +``` +"Hello {{name}}, this is a courtesy call about your payment +of {{amount}} due on {{date}}." +``` + +### Survey Collection + +``` +"Hi, we're reaching out to get your feedback on your +recent experience with {{company}}." +``` + +### Sales Follow-Up + +``` +"Hi {{name}}, this is {{agent_name}} following up on your +interest in {{product}}. Do you have a few minutes?" +``` + +### Lead Nurturing + +``` +"Hi {{name}}, I'm calling to share some information about +{{topic}} that I thought you'd find valuable." +``` + +--- + +## Campaign Components + +| Component | Purpose | +|-----------|---------| +| **Agent** | The AI that makes calls | +| **Audience** | Contact list to call | +| **Schedule** | When to make calls | +| **Settings** | Retry rules, caller ID, etc. | + +--- + +## Outbound vs Inbound + +| Aspect | Inbound | Outbound (Campaigns) | +|--------|---------|----------------------| +| Who initiates | Customer calls you | You call customer | +| Timing | Customer's choice | Your schedule | +| Context | Unknown until call | Known (from contact list) | +| Personalization | Limited | High (use contact data) | + +--- + +## Contact Data + +Campaigns use contact lists (Audiences) with: + +| Field | Usage | +|-------|-------| +| **Phone number** | Required — who to call | +| **Name** | Personalization `{{name}}` | +| **Custom fields** | Any data for variables | + +This data populates variables in your prompts. + +--- + +## Getting Started + +1. **Create an agent** for outbound calls +2. **Create an Audience** with your contacts +3. **Create a Campaign** connecting them +4. **Configure** timing and rules +5. **Launch** and monitor + +--- + +## What's Next + + + + Set up your first campaign + + + Build your contact lists + + diff --git a/fern/products/atoms/pages/platform/deployment/images/README.md b/fern/products/atoms/pages/platform/deployment/images/README.md new file mode 100644 index 0000000..dfd02a9 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/images/README.md @@ -0,0 +1,43 @@ +# Deployment — Image Guide + +Drop screenshots here. Name them exactly as listed below. + +--- + +## Phone Numbers + +| Filename | What to capture | +|----------|-----------------| +| `phone-numbers.png` | Main Phone Numbers page with list of numbers and details panel | +| `rent-number.png` | Rent Number modal | +| `import-sip.png` | Import SIP Number modal | + +--- + +## Campaigns + +| Filename | What to capture | +|----------|-----------------| +| (to be added) | | + +--- + +## Audiences + +| Filename | What to capture | +|----------|-----------------| +| `audiences.png` | Main Audiences list page with table showing Name, Campaigns, Members, Created on, Last modified | +| `upload-csv.png` | Step 1: Upload CSV page with drag-drop area and consent warning | +| `map-phone.png` | Step 2: Map Phone Number page with dropdown and CSV preview table | +| `add-contacts.png` | Step 3: Add Contacts page with Audience Name and Description fields | +| `audience-members.png` | Individual audience view showing members table with search, delete, and Add New buttons | +| `add-manually.png` | Add new members modal — "Add Manually" tab with form fields | +| `import-csv.png` | Add new members modal — "Import CSV" tab with upload area | + +--- + +## Notes + +- **Format:** PNG preferred, keep under 500KB each +- **Size:** Capture at 2x resolution if possible for retina displays +- **Sensitive data:** Blur or use fake data for any real phone numbers diff --git a/fern/products/atoms/pages/platform/deployment/overview.mdx b/fern/products/atoms/pages/platform/deployment/overview.mdx new file mode 100644 index 0000000..a163d92 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/overview.mdx @@ -0,0 +1,106 @@ +--- +title: "Deployment Overview" +sidebarTitle: "Overview" +description: "Make your voice AI agents live and accessible." +--- + +After building and testing your agent, it's time to deploy. Atoms offers multiple deployment options depending on how you want users to interact with your agent. + +--- + +## Deployment Options + +| Method | Description | Best For | +|--------|-------------|----------| +| **Phone Numbers** | Assign a phone number for inbound calls | Customer support, hotlines | +| **Widget** | Embed on your website | Web visitors, integrated support | +| **Campaigns** | Outbound calling at scale | Reminders, sales outreach | + +--- + +## Phone Numbers + +Get a phone number and assign it to your agent. When customers call that number, your agent answers. + +### Use Cases + +- Customer support hotline +- Sales inquiry line +- Information line +- After-hours support + +→ See: [Phone Numbers](/atoms/atoms-platform/deployment/phone-numbers) + +--- + +## Widget Deployment + +Embed a voice widget on your website. Visitors can click to talk with your agent directly in their browser. + +### Use Cases + +- Website support +- Sales assistance +- Product demos +- Lead capture + +→ See: [Widget Deployment](/atoms/atoms-platform/features/widget) + +--- + +## Campaigns + +Have your agent proactively call customers. Create contact lists, schedule calls, and run outreach at scale. + +### Use Cases + +- Appointment reminders +- Payment reminders +- Survey collection +- Sales follow-up +- Lead nurturing + +→ See: [Campaigns](/atoms/atoms-platform/deployment/campaigns) + +--- + +## Choosing a Deployment Method + +| If You Want To... | Use | +|-------------------|-----| +| Receive inbound calls | Phone Numbers | +| Support website visitors | Widget | +| Make outbound calls | Campaigns | +| Multiple channels | Combine methods | + +Many organizations use multiple methods — a phone number for inbound support and campaigns for outbound reminders. + +--- + +## Pre-Deployment Checklist + +Before making an agent live: + +- [ ] Tested all conversation paths +- [ ] Tested edge cases +- [ ] Reviewed conversation logs +- [ ] Locked the agent (if in production) +- [ ] Configured all necessary settings +- [ ] Set up webhooks (if using) +- [ ] Team is prepared for any escalations + +--- + +## What's Next + + + + Set up inbound calling + + + Embed on your website + + + Run outbound calls + + diff --git a/fern/products/atoms/pages/platform/deployment/phone-numbers.mdx b/fern/products/atoms/pages/platform/deployment/phone-numbers.mdx new file mode 100644 index 0000000..4894ce6 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/phone-numbers.mdx @@ -0,0 +1,107 @@ +--- +title: "Phone Numbers" +sidebarTitle: "Phone Numbers" +description: "Get and manage phone numbers for your agents." +--- + +Phone Numbers let your agents make and receive real phone calls. Rent numbers directly through Atoms, or import your own via SIP. + +--- + +## Your Numbers + + + Phone Numbers page + + +The main view shows all your phone numbers with key details: + +| Field | Description | +|-------|-------------| +| **Number** | The phone number | +| **Provider** | Telephony provider (e.g., Plivo) | +| **Connected Agent** | Which agent is assigned | +| **Agent Name** | Name of the connected agent | +| **Rent Date** | When you acquired the number | +| **Monthly Cost** | Recurring cost | +| **Status** | Active or inactive | + +Click any number in the list to see its details and manage it. + +--- + +## Adding a Number + +Click **Add Number** in the top right. You have two options: + + + + + ![Rent number](../building-agents/images/rent-number.png) + + + Rent a new phone number directly through Atoms. + + 1. Click **Add Number** → **Rent Number** + 2. Select your country and preferences + 3. Choose from available numbers + 4. Complete the rental + + The number appears in your list immediately, ready to assign to an agent. + + + + + ![Import SIP number](../building-agents/images/import-sip.png) + + + Bring your own number via SIP trunking. + + | Field | Required | Description | + |-------|----------|-------------| + | **Phone Number** | Yes | Your existing number | + | **SIP Termination URL** | Yes | Where to send calls | + | **Display Name** | No | Friendly name for the number | + | **Username** | No | For SIP authentication | + | **Password** | No | For SIP authentication | + | **SIP Origination URL** | — | Provided by Atoms (copy this to your provider) | + + Click **Add Custom Number** when done. + + + +--- + +## Assigning to an Agent + +Once you have a number, assign it to an agent: + +1. Open your agent +2. Go to **Agent Settings** → **Phone Number** tab +3. Select the number from the dropdown +4. Save + +Now calls to that number will be handled by your agent. + +--- + +## Releasing a Number + +To stop using a number, click it in the list and click **Release**. + + +Releasing a number expires it at the end of the month and **cannot be undone**. The number cannot be reused. + + +--- + +## Related + + + + Manage contact lists for campaigns + + + Use numbers for outbound calling + + diff --git a/fern/products/atoms/pages/platform/deployment/phone-numbers/buying.mdx b/fern/products/atoms/pages/platform/deployment/phone-numbers/buying.mdx new file mode 100644 index 0000000..7b956f1 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/phone-numbers/buying.mdx @@ -0,0 +1,7 @@ +--- +title: "Buying Phone Numbers" +--- + +# Buying Phone Numbers + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/deployment/phone-numbers/configuring-inbound.mdx b/fern/products/atoms/pages/platform/deployment/phone-numbers/configuring-inbound.mdx new file mode 100644 index 0000000..2287c4b --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/phone-numbers/configuring-inbound.mdx @@ -0,0 +1,92 @@ +--- +title: "Configuring Inbound Calls" +sidebarTitle: "Configuring Inbound" +description: "Set up how your phone number handles incoming calls." +--- + +After getting a phone number, you need to configure how it handles incoming calls. This includes assigning an agent and setting up call handling options. + +--- + +## Assigning an Agent + +The most important configuration: which agent answers calls to this number. + +### Method 1: From the Agent + +1. Open your agent +2. Go to Agent Settings → Phone Number Tab +3. Select your number from the dropdown +4. Save + +### Method 2: From Phone Numbers + +1. Go to Deploy → Phone Numbers +2. Find your number +3. Click to configure +4. Select the agent +5. Save + +Each number can only be assigned to one agent at a time. + +--- + +## Call Handling Options + +→ **NEEDS PLATFORM INFO:** Specific call handling configuration options + +### Greeting Behavior + +Configure what happens when a call connects: +- Immediate greeting +- Brief pause before speaking +- Wait for caller to speak first + +### Recording Settings + +- Call recording enabled/disabled +- Recording disclosure message +- Storage and retention + +### Hours of Operation + +- Always active +- Scheduled hours +- After-hours behavior + +--- + +## After-Hours Handling + +What happens when calls come outside business hours: + +| Option | Description | +|--------|-------------| +| **Voicemail** | Take a message | +| **Different Agent** | Route to after-hours agent | +| **Message Only** | Play message and disconnect | +| **Same Agent** | 24/7 operation | + +--- + +## Testing Your Configuration + +After configuring: + +1. Call your number from a different phone +2. Verify the correct agent answers +3. Have a test conversation +4. Check call logs for any issues + +--- + +## What's Next + + + + View and manage numbers + + + Run outbound calls + + diff --git a/fern/products/atoms/pages/platform/deployment/phone-numbers/getting-number.mdx b/fern/products/atoms/pages/platform/deployment/phone-numbers/getting-number.mdx new file mode 100644 index 0000000..add61a1 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/phone-numbers/getting-number.mdx @@ -0,0 +1,100 @@ +--- +title: "Getting a Phone Number" +sidebarTitle: "Getting a Number" +description: "Acquire a phone number for your voice AI agent." +--- + +To receive inbound calls, your agent needs a phone number. Atoms makes it easy to acquire and configure phone numbers. + +--- + +## Location + +**Left Sidebar → Deploy → Phone Numbers** + +--- + +## Getting a Number + +→ **NEEDS PLATFORM INFO:** Phone number acquisition interface and process + +### Step 1: Navigate to Phone Numbers + +In the left sidebar, under "Deploy," click **Phone Numbers**. + +### Step 2: Click to Get Number + +Click **"Get Number"** or **"Add Phone Number"**. + +### Step 3: Select Options + +| Option | Description | +|--------|-------------| +| **Country** | Where you want the number | +| **Type** | Local, toll-free, mobile | +| **Area Code** | Specific region (if available) | + +### Step 4: Purchase + +Select a number and complete the purchase. + +### Step 5: Number is Ready + +Your number appears in the list, ready to assign to an agent. + +--- + +## Number Types + +| Type | Description | Best For | +|------|-------------|----------| +| **Local** | Geographic area code | Regional businesses | +| **Toll-Free** | 800/888/etc numbers | National businesses, no caller cost | +| **Mobile** | Mobile format | SMS-enabled (if supported) | + +--- + +## Pricing + +→ **NEEDS PLATFORM INFO:** Phone number pricing + +Phone numbers typically have: +- **Monthly fee** — Cost to maintain the number +- **Per-minute charges** — Cost per minute of calls + +Check your plan details for specific pricing. + +--- + +## Country Availability + +Atoms supports phone numbers in multiple countries: +- United States +- Canada +- United Kingdom +- And more... + +→ **NEEDS PLATFORM INFO:** Complete country list + +--- + +## After Getting a Number + +Once you have a number: + +1. **Assign to agent** — In the agent's Phone Number Tab +2. **Configure settings** — Call handling options +3. **Test** — Call your number to verify + +--- + +## What's Next + + + + Set up call handling + + + View and manage your numbers + + diff --git a/fern/products/atoms/pages/platform/deployment/phone-numbers/management.mdx b/fern/products/atoms/pages/platform/deployment/phone-numbers/management.mdx new file mode 100644 index 0000000..caaa774 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/phone-numbers/management.mdx @@ -0,0 +1,7 @@ +--- +title: "Number Management" +--- + +# Number Management + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/deployment/phone-numbers/managing-numbers.mdx b/fern/products/atoms/pages/platform/deployment/phone-numbers/managing-numbers.mdx new file mode 100644 index 0000000..c863f88 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/phone-numbers/managing-numbers.mdx @@ -0,0 +1,103 @@ +--- +title: "Managing Phone Numbers" +sidebarTitle: "Managing Numbers" +description: "View, configure, and manage your phone numbers." +--- + +Once you have phone numbers, you'll need to manage them over time — reassigning agents, checking usage, and maintaining your phone infrastructure. + +--- + +## Location + +**Left Sidebar → Deploy → Phone Numbers** + +--- + +## Phone Numbers List + +→ **NEEDS PLATFORM INFO:** Phone numbers management interface + +The list shows all your numbers with: + +| Column | Description | +|--------|-------------| +| **Number** | The phone number | +| **Status** | Active, Inactive, Pending | +| **Assigned Agent** | Which agent handles calls | +| **Calls** | Call volume statistics | +| **Actions** | Configure, release | + +--- + +## Common Actions + +### Reassigning an Agent + +To change which agent answers: + +1. Find the number in the list +2. Click to configure +3. Select a different agent +4. Save changes + +### Viewing Call Statistics + +For each number, you can see: +- Total calls received +- Average call duration +- Peak calling times +- Success/failure rates + +### Releasing a Number + +If you no longer need a number: + +1. Find the number in the list +2. Click to manage +3. Select "Release" or "Delete" +4. Confirm + + +Releasing a number is permanent. You may not be able to get the same number back. + + +--- + +## Multiple Numbers + +Many organizations use multiple numbers: + +| Number | Purpose | +|--------|---------| +| Sales line | Sales inquiries | +| Support line | Customer support | +| After-hours | Emergency support | +| Campaign number | Outbound caller ID | + +Each number can have a different agent assigned. + +--- + +## Number Health + +Monitor your numbers for issues: + +| Issue | Indication | +|-------|------------| +| High failure rate | Connectivity problems | +| Low answer rate | Consider timing or call frequency | +| Complaints | May affect number reputation | + +--- + +## What's Next + + + + Add voice to your website + + + Run outbound calls + + diff --git a/fern/products/atoms/pages/platform/deployment/widget/adding-to-website.mdx b/fern/products/atoms/pages/platform/deployment/widget/adding-to-website.mdx new file mode 100644 index 0000000..1fe54ce --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/widget/adding-to-website.mdx @@ -0,0 +1,173 @@ +--- +title: "Adding Widget to Your Website" +sidebarTitle: "Adding to Website" +description: "Install the voice widget on your website." +--- + +Once you have your embed code, adding it to your website is straightforward. This page covers common installation methods. + +--- + +## Basic Installation + +Add the embed code just before the closing `` tag: + +```html + + + + Your Website + + + + + + + + +``` + +--- + +## Platform-Specific Instructions + +### WordPress + +1. Go to Appearance → Theme Editor +2. Open `footer.php` +3. Paste embed code before `` +4. Save + +Or use a plugin like "Insert Headers and Footers": +1. Install the plugin +2. Go to Settings → Insert Headers and Footers +3. Paste code in the "Footer" section +4. Save + +### Shopify + +1. Go to Online Store → Themes +2. Click Actions → Edit code +3. Find `theme.liquid` +4. Paste before `` +5. Save + +### Squarespace + +1. Go to Settings → Advanced → Code Injection +2. Paste in the "Footer" section +3. Save + +### Wix + +1. Go to Settings → Custom Code +2. Add new code +3. Paste embed code +4. Set placement to "Body - end" +5. Apply to all pages + +### React + +```jsx +import { useEffect } from 'react'; + +function App() { + useEffect(() => { + const script = document.createElement('script'); + script.src = 'https://widget.atoms.ai/v1/widget.js'; + script.setAttribute('data-agent-id', 'YOUR_AGENT_ID'); + document.body.appendChild(script); + + return () => { + document.body.removeChild(script); + }; + }, []); + + return ( +
+ {/* Your app content */} +
+ ); +} +``` + +### Next.js + +```jsx +// pages/_app.js or app/layout.js +import Script from 'next/script'; + +export default function App({ Component, pageProps }) { + return ( + <> + + +``` + +--- + +## What's Next + + + + Run outbound calls + + + Customize appearance + + diff --git a/fern/products/atoms/pages/platform/deployment/widget/embedding.mdx b/fern/products/atoms/pages/platform/deployment/widget/embedding.mdx new file mode 100644 index 0000000..07123af --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/widget/embedding.mdx @@ -0,0 +1,7 @@ +--- +title: "Embedding Widget" +--- + +# Embedding Widget + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/deployment/widget/getting-embed-code.mdx b/fern/products/atoms/pages/platform/deployment/widget/getting-embed-code.mdx new file mode 100644 index 0000000..db2dc76 --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/widget/getting-embed-code.mdx @@ -0,0 +1,97 @@ +--- +title: "Getting Widget Embed Code" +sidebarTitle: "Getting Embed Code" +description: "Generate the code snippet to add voice AI to your website." +--- + +The widget embed code is a snippet of JavaScript that adds a voice button to your website. Visitors can click to talk with your agent directly in their browser. + +--- + +## Location + +**Agent Editor → Left Sidebar → Widget** + +--- + +## Generating the Code + +### Step 1: Configure Widget + +Before generating code, configure your widget appearance and behavior: + +→ See: [Widget Configuration](/atoms/atoms-platform/features/widget) + +### Step 2: Get Embed Code + +After configuration: + +1. Navigate to the Widget section +2. Find the **"Get Embed Code"** or **"Copy Code"** button +3. Click to copy the snippet + +### Step 3: Code Format + +The embed code looks something like: + +```html + +``` + +→ **NEEDS PLATFORM INFO:** Exact embed code format + +--- + +## Customization Options + +The embed code may include configuration options: + +| Option | Description | +|--------|-------------| +| `data-agent-id` | Your agent identifier | +| `data-position` | Button position (bottom-right, bottom-left) | +| `data-theme` | Color theme | +| `data-greeting` | Initial greeting message | + +--- + +## What the Widget Does + +When embedded: + +1. **Button appears** — Usually bottom corner of page +2. **User clicks** — Widget opens +3. **Microphone access** — Browser prompts for permission +4. **Conversation starts** — User talks with your agent +5. **Widget closes** — When conversation ends + +--- + +## Browser Requirements + +Visitors need: +- Modern browser (Chrome, Firefox, Safari, Edge) +- Microphone access permission +- Speakers or headphones + +Mobile browsers are supported. + +--- + +## What's Next + + + + Install the widget + + + Customize appearance + + diff --git a/fern/products/atoms/pages/platform/deployment/widget/setting-up.mdx b/fern/products/atoms/pages/platform/deployment/widget/setting-up.mdx new file mode 100644 index 0000000..43a7e0e --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/widget/setting-up.mdx @@ -0,0 +1,7 @@ +--- +title: "Setting Up Widget" +--- + +# Setting Up Widget + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/deployment/widget/what-is-widget.mdx b/fern/products/atoms/pages/platform/deployment/widget/what-is-widget.mdx new file mode 100644 index 0000000..f83c80e --- /dev/null +++ b/fern/products/atoms/pages/platform/deployment/widget/what-is-widget.mdx @@ -0,0 +1,7 @@ +--- +title: "What is Widget?" +--- + +# What is Widget? + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/features/api-calls-reference.mdx b/fern/products/atoms/pages/platform/features/api-calls-reference.mdx new file mode 100644 index 0000000..face0d8 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/api-calls-reference.mdx @@ -0,0 +1,68 @@ +--- +title: "API Calls" +sidebarTitle: "API Calls" +description: "Connect agents to external systems." +--- + +API Calls let your agent interact with external systems during conversations — fetch customer data, book appointments, update CRMs, process payments. + +--- + +## API Call Types + +| Type | When It Runs | Use Case | +|------|--------------|----------| +| **Pre-Call** | Before conversation starts | Load customer context | +| **Mid-Call** | During conversation | Check status, take action | +| **Post-Call** | After conversation ends | Log to CRM, create tickets | + +--- + +## Configuration + +| Field | Description | +|-------|-------------| +| **URL** | API endpoint | +| **Method** | GET, POST, PUT, DELETE | +| **Headers** | Authorization, Content-Type | +| **Body** | Request payload (JSON) | +| **Response Mapping** | Extract data to variables | + +--- + +## Example + +**Request:** +``` +GET https://api.example.com/customers/{{caller_phone}} +``` + +**Response:** +```json +{ + "name": "Jane Smith", + "tier": "premium" +} +``` + +**Use in prompt:** +``` +Hello {{customer_name}}! I see you're a {{tier}} member. +``` + +--- + +## Implementation + +API calls are configured differently in each agent type. + + + + API Calls in Configuration Panel + + + Deprecated + + API Call Nodes in Workflow + + diff --git a/fern/products/atoms/pages/platform/features/api-calls/common-integrations.mdx b/fern/products/atoms/pages/platform/features/api-calls/common-integrations.mdx new file mode 100644 index 0000000..6e650c2 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/api-calls/common-integrations.mdx @@ -0,0 +1,384 @@ +--- +title: "Common Integrations" +sidebarTitle: "Common Integrations" +description: "Ready-to-use API patterns for popular services and use cases." +--- + +Here are pre-built API configurations for common integrations. Copy and customize for your needs. + +--- + +## CRM Integrations + +### Salesforce + + + + ``` + GET https://yourinstance.salesforce.com/services/data/v58.0/query + + Query: SELECT Id, Name, Email, Phone, Account.Name + FROM Contact + WHERE Phone = '{{caller_phone}}' + + Headers: + Authorization: Bearer {{salesforce_token}} + Content-Type: application/json + + Response Mapping: + $.records[0].Name → customer_name + $.records[0].Email → customer_email + $.records[0].Account.Name → company_name + ``` + + + ``` + POST https://yourinstance.salesforce.com/services/data/v58.0/sobjects/Task + + Headers: + Authorization: Bearer {{salesforce_token}} + Content-Type: application/json + + Body: + { + "Subject": "Voice Call - {{disposition}}", + "WhoId": "{{contact_id}}", + "Description": "{{call_summary}}", + "Status": "Completed", + "CallDurationInSeconds": {{call_duration}}, + "CallType": "{{call_direction}}" + } + ``` + + + +### HubSpot + + + + ``` + GET https://api.hubapi.com/crm/v3/objects/contacts/search + + Headers: + Authorization: Bearer {{hubspot_api_key}} + Content-Type: application/json + + Body: + { + "filterGroups": [{ + "filters": [{ + "propertyName": "phone", + "operator": "EQ", + "value": "{{caller_phone}}" + }] + }], + "properties": ["firstname", "lastname", "email", "company"] + } + + Response Mapping: + $.results[0].properties.firstname → first_name + $.results[0].properties.lastname → last_name + $.results[0].properties.company → company_name + ``` + + + ``` + POST https://api.hubapi.com/crm/v3/objects/notes + + Headers: + Authorization: Bearer {{hubspot_api_key}} + Content-Type: application/json + + Body: + { + "properties": { + "hs_timestamp": "{{call_start_time}}", + "hs_note_body": "Call Summary: {{call_summary}}\nDuration: {{call_duration}}s\nOutcome: {{disposition}}" + }, + "associations": [{ + "to": {"id": "{{contact_id}}"}, + "types": [{"associationCategory": "HUBSPOT_DEFINED", "associationTypeId": 202}] + }] + } + ``` + + + +--- + +## Calendar / Booking + +### Google Calendar + + + + ``` + GET https://www.googleapis.com/calendar/v3/freeBusy + + Headers: + Authorization: Bearer {{google_oauth_token}} + Content-Type: application/json + + Body: + { + "timeMin": "{{requested_date}}T09:00:00Z", + "timeMax": "{{requested_date}}T17:00:00Z", + "items": [{"id": "{{calendar_id}}"}] + } + + Response Mapping: + $.calendars.{{calendar_id}}.busy → busy_times + ``` + + + ``` + POST https://www.googleapis.com/calendar/v3/calendars/{{calendar_id}}/events + + Headers: + Authorization: Bearer {{google_oauth_token}} + Content-Type: application/json + + Body: + { + "summary": "Appointment - {{customer_name}}", + "start": { + "dateTime": "{{appointment_datetime}}", + "timeZone": "America/New_York" + }, + "end": { + "dateTime": "{{appointment_end_datetime}}", + "timeZone": "America/New_York" + }, + "attendees": [{"email": "{{customer_email}}"}], + "description": "Booked via voice agent. Phone: {{caller_phone}}" + } + ``` + + + +### Calendly + +``` +GET https://api.calendly.com/scheduled_events + +Headers: + Authorization: Bearer {{calendly_api_key}} + Content-Type: application/json + +Query Parameters: + user: {{calendly_user_uri}} + min_start_time: {{start_date}} + max_start_time: {{end_date}} + +Response Mapping: + $.collection[0].start_time → next_appointment + $.collection[0].name → event_name +``` + +--- + +## Payment / Billing + +### Stripe + + + + ``` + GET https://api.stripe.com/v1/customers/{{stripe_customer_id}} + + Headers: + Authorization: Bearer {{stripe_secret_key}} + + Response Mapping: + $.balance → account_balance + $.email → customer_email + $.name → customer_name + ``` + + + ``` + POST https://api.stripe.com/v1/payment_links + + Headers: + Authorization: Bearer {{stripe_secret_key}} + Content-Type: application/x-www-form-urlencoded + + Body: + line_items[0][price]: {{price_id}} + line_items[0][quantity]: 1 + metadata[call_id]: {{call_id}} + metadata[customer_phone]: {{caller_phone}} + + Response Mapping: + $.url → payment_link + ``` + + + +--- + +## Support / Ticketing + +### Zendesk + + + + ``` + GET https://{{subdomain}}.zendesk.com/api/v2/users/search + + Headers: + Authorization: Basic {{zendesk_credentials}} + + Query: query=phone:{{caller_phone}} + + Response Mapping: + $.users[0].name → customer_name + $.users[0].id → zendesk_user_id + ``` + + + ``` + POST https://{{subdomain}}.zendesk.com/api/v2/tickets + + Headers: + Authorization: Basic {{zendesk_credentials}} + Content-Type: application/json + + Body: + { + "ticket": { + "subject": "Voice Call - {{issue_type}}", + "description": "{{call_summary}}", + "requester_id": {{zendesk_user_id}}, + "priority": "normal", + "tags": ["voice_agent", "{{disposition}}"], + "custom_fields": [ + {"id": 123, "value": "{{call_id}}"}, + {"id": 456, "value": "{{call_duration}}"} + ] + } + } + + Response Mapping: + $.ticket.id → ticket_number + ``` + + + +### Intercom + +``` +POST https://api.intercom.io/conversations + +Headers: + Authorization: Bearer {{intercom_token}} + Content-Type: application/json + +Body: +{ + "from": { + "type": "user", + "id": "{{intercom_user_id}}" + }, + "body": "Voice call summary:\n\n{{call_summary}}\n\nDuration: {{call_duration}}s" +} +``` + +--- + +## Notifications + +### Slack + +``` +POST https://hooks.slack.com/services/{{webhook_path}} + +Headers: + Content-Type: application/json + +Body: +{ + "text": "📞 New call completed", + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "*Caller:* {{caller_phone}}\n*Duration:* {{call_duration}}s\n*Outcome:* {{disposition}}" + } + }, + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "*Summary:*\n{{call_summary}}" + } + } + ] +} +``` + +### Email (via SendGrid) + +``` +POST https://api.sendgrid.com/v3/mail/send + +Headers: + Authorization: Bearer {{sendgrid_api_key}} + Content-Type: application/json + +Body: +{ + "personalizations": [{ + "to": [{"email": "{{customer_email}}"}], + "dynamic_template_data": { + "customer_name": "{{customer_name}}", + "appointment_date": "{{appointment_date}}", + "appointment_time": "{{appointment_time}}" + } + }], + "from": {"email": "noreply@yourcompany.com"}, + "template_id": "{{sendgrid_template_id}}" +} +``` + +--- + +## Custom Webhooks + +For any system with an API: + +``` +POST https://your-system.com/api/webhook + +Headers: + Authorization: Bearer {{your_api_key}} + Content-Type: application/json + X-Webhook-Source: atoms-voice-agent + +Body: +{ + "event": "call_completed", + "call_id": "{{call_id}}", + "caller_phone": "{{caller_phone}}", + "customer_name": "{{customer_name}}", + "duration": {{call_duration}}, + "outcome": "{{disposition}}", + "summary": "{{call_summary}}", + "timestamp": "{{call_start_time}}", + "agent": "{{agent_name}}" +} +``` + +--- + +## Next Steps + + + + Set up pre-built integrations + + + Fix common API issues + + diff --git a/fern/products/atoms/pages/platform/features/api-calls/convo-flow.mdx b/fern/products/atoms/pages/platform/features/api-calls/convo-flow.mdx new file mode 100644 index 0000000..de5ee3f --- /dev/null +++ b/fern/products/atoms/pages/platform/features/api-calls/convo-flow.mdx @@ -0,0 +1,290 @@ +--- +title: "API Calls in Convo Flow" +sidebarTitle: "Convo Flow" +description: "Use API Call nodes to integrate external systems into your workflow." +--- + +Convo Flow agents use **API Call Nodes** to make requests at specific points in your workflow. This gives you precise control over when and how your agent interacts with external systems. + +--- + +## API Call Node Types + +| Node Type | When It Runs | Use Case | +|-----------|--------------|----------| +| **Pre-Call API** | Before call connects | Load customer context | +| **API Call Node** | When flow reaches it | Real-time data, booking | +| **Post-Call API** | After call ends | Logging, follow-ups | + +--- + +## Adding an API Call Node + + + ### Drag the Node + + From the left panel, drag **API Call** onto your canvas. + + ### Position in Flow + + Connect it where you need the API call to happen: + + ``` + Greeting → Collect Info → [API Call] → Confirm Result + ``` + + ### Configure the Request + + Click the node and configure in the right panel: + + | Field | Description | + |-------|-------------| + | **Name** | Descriptive label (e.g., "Check Availability") | + | **URL** | API endpoint | + | **Method** | GET, POST, PUT, DELETE | + | **Headers** | Authentication, content-type | + | **Body** | Request data (for POST/PUT) | + + ### Map Response Variables + + Define which response fields become variables: + + | Response Path | Variable | + |---------------|----------| + | `$.available_slots` | `available_times` | + | `$.next_available` | `suggested_time` | + + ### Configure Branches + + Create branches for different outcomes: + + | Condition | Next Node | + |-----------|-----------| + | API success + slots available | → Offer Times | + | API success + no slots | → Offer Waitlist | + | API failure | → Apologize & Transfer | + + +--- + +## Pre-Call API Node + +Runs before your first greeting — perfect for loading context. + +### Setup + +1. Find **Pre-Call API** in the nodes panel +2. It automatically connects to the start of your flow +3. Configure like a regular API call + +### Example: Customer Lookup + +``` +GET https://crm.example.com/contacts?phone={{caller_phone}} + +Response: +{ + "name": "Sarah Johnson", + "vip": true, + "last_interaction": "2024-01-15" +} + +Variables: +{{customer_name}} = "Sarah Johnson" +{{is_vip}} = true +{{last_contact}} = "2024-01-15" +``` + +### Using Pre-Call Data + +In your **Greeting Node**: + +``` +"Hi {{customer_name}}! Thanks for calling. How can I help you today?" +``` + +If VIP: +``` +"Hi {{customer_name}}! As one of our valued customers, I'll make sure +we take great care of you today. What can I help with?" +``` + +--- + +## Mid-Flow API Calls + +### Example: Checking Appointment Availability + +**Flow:** +``` +[Greeting] → [Collect Date] → [API: Check Availability] → [Offer Times] +``` + +**API Call Node Configuration:** + +``` +Name: Check Availability +URL: https://api.calendar.com/availability +Method: GET +Query Parameters: + - date: {{requested_date}} + - provider: {{provider_id}} +``` + +**Response Mapping:** + +```json +{ + "available": true, + "slots": ["9:00 AM", "2:00 PM", "4:30 PM"], + "provider": "Dr. Johnson" +} +``` + +| Path | Variable | +|------|----------| +| `$.available` | `has_availability` | +| `$.slots` | `available_times` | +| `$.provider` | `provider_name` | + +**Branching:** + +| Condition | Route | +|-----------|-------| +| `has_availability == true` | → Offer Times Node | +| `has_availability == false` | → Suggest Alternatives Node | + +--- + +## Post-Call API Node + +Runs after the call ends — for logging and triggering follow-ups. + +### Setup + +1. Add **Post-Call API** node +2. It automatically runs after any End Call node + +### Example: CRM Logging + +``` +POST https://crm.example.com/activities + +Body: +{ + "contact_phone": "{{caller_phone}}", + "contact_name": "{{customer_name}}", + "call_type": "{{call_direction}}", + "duration": {{call_duration}}, + "outcome": "{{disposition}}", + "notes": "{{call_summary}}", + "agent_id": "{{agent_id}}", + "timestamp": "{{call_start_time}}" +} +``` + +--- + +## Handling API Failures + +Always plan for API failures: + +### Timeout Handling + +``` +[API Call: Check Availability] + ├── Success → [Offer Times] + ├── Timeout → [Apologize Node] + └── Error → [Transfer to Human] +``` + +### Apologize Node Script + +``` +"I'm having trouble checking our system right now. +Let me connect you with someone who can help directly." +``` + +### Retry Logic + +For critical APIs, you can configure: +- **Retry count:** 1-3 attempts +- **Retry delay:** 1-5 seconds +- **Fallback:** What to do after all retries fail + +--- + +## Best Practices + + + + API calls can cause brief pauses. Minimize impact: + + - Use Pre-Call API for initial context (caller doesn't notice) + - Keep mid-call APIs fast (< 5 seconds) + - Fill wait time: "Let me check that for you..." + + + + Before making an API call, ensure you have required data: + + ``` + Before API Call Node: + - Collect all required inputs in previous nodes + - Validate format (dates, phone numbers, etc.) + - Handle missing data gracefully + ``` + + + + Every API call should have branches for: + - **Success** — Expected result + - **Empty result** — Valid but no data (e.g., no appointments) + - **Error** — API failed + + Never assume success. + + + +--- + +## Example: Complete Booking Flow + +``` +[Pre-Call API: Get Customer] + ↓ + [Greeting Node] + "Hi {{customer_name}}!" + ↓ + [Collect Date Node] + "What date works for you?" + ↓ +[API: Check Availability] + ↓ ↓ + Success No Slots + ↓ ↓ + [Offer Times] [Suggest Alternatives] + ↓ +[API: Book Appointment] + ↓ ↓ + Success Failed + ↓ ↓ +[Confirm Booking] [Apologize & Transfer] + ↓ + [End Call] + ↓ +[Post-Call API: Log to CRM] +``` + +--- + +## Next Steps + + + + Pre-built patterns for popular services + + + Deep dive into all node types + + diff --git a/fern/products/atoms/pages/platform/features/api-calls/creating.mdx b/fern/products/atoms/pages/platform/features/api-calls/creating.mdx new file mode 100644 index 0000000..1fa5be5 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/api-calls/creating.mdx @@ -0,0 +1,301 @@ +--- +title: "Creating API Calls" +sidebarTitle: "Creating" +description: "Step-by-step guide to setting up API calls for your agent." +--- + +This guide walks you through creating and configuring API calls for your Atoms agent. + +--- + +## Before You Start + +You'll need: +- **API endpoint URL** — Where to send requests +- **Authentication** — API key, OAuth token, or other credentials +- **Request format** — What data to send +- **Response format** — What data you'll receive + + +If you're not sure about your API details, check your service's API documentation or ask your development team. + + +--- + +## Creating a Pre-Call API + +Pre-Call APIs run **before** the conversation starts — perfect for loading customer context. + + + ### Open Agent Settings + + Navigate to your agent and find **API Calls** in the settings panel. + + ### Click "Add Pre-Call API" + + This creates a new API call that runs when the call connects. + + ### Configure the Request + + **Basic Settings:** + + | Field | Description | Example | + |-------|-------------|---------| + | **Name** | Descriptive label | "Fetch Customer Info" | + | **URL** | API endpoint | `https://api.example.com/customers` | + | **Method** | HTTP method | GET, POST, PUT, DELETE | + + ### Add Query Parameters (GET requests) + + For GET requests, add parameters: + + | Parameter | Value | + |-----------|-------| + | `phone` | `{{caller_phone}}` | + + Results in: `https://api.example.com/customers?phone=+14155551234` + + ### Add Headers + + Most APIs require authentication: + + | Header | Value | + |--------|-------| + | `Authorization` | `Bearer your-api-key` | + | `Content-Type` | `application/json` | + + ### Add Request Body (POST/PUT) + + For POST requests, add a JSON body: + + ```json + { + "phone": "{{caller_phone}}", + "source": "voice_agent" + } + ``` + + ### Map Response Variables + + Tell Atoms how to extract data from the response: + + | JSON Path | Variable Name | + |-----------|---------------| + | `$.name` | `customer_name` | + | `$.account.balance` | `account_balance` | + | `$.status` | `account_status` | + + ### Save and Test + + Click **Save**, then test with a call to verify it works. + + +--- + +## Creating a Post-Call API + +Post-Call APIs run **after** the call ends — perfect for logging and follow-ups. + + + ### Add Post-Call API + + In your agent settings, click **"Add Post-Call API"**. + + ### Configure the Request + + Typically POST requests to log data: + + ```json + { + "call_id": "{{call_id}}", + "caller": "{{caller_phone}}", + "customer_name": "{{customer_name}}", + "duration": "{{call_duration}}", + "outcome": "{{disposition}}", + "summary": "{{call_summary}}" + } + ``` + + ### Set Error Handling + + Choose what happens if the API fails: + - **Ignore** — Continue normally + - **Retry** — Try again (1-3 times) + - **Alert** — Notify your team + + +--- + +## Authentication Methods + + + + Most common for simple integrations. + + **In Header:** + ``` + Authorization: Bearer sk_live_your_api_key + ``` + + **Or as Query Parameter:** + ``` + ?api_key=sk_live_your_api_key + ``` + + + Username and password encoded. + + ``` + Authorization: Basic base64(username:password) + ``` + + Atoms can encode this for you — just enter username and password. + + + For services like Salesforce, Google, etc. + + 1. Set up OAuth connection in Integrations + 2. Reference the integration in your API call + 3. Atoms handles token refresh automatically + + + +--- + +## Response Mapping + +### Simple Mapping + +For flat JSON responses: + +```json +{ + "name": "Sarah Johnson", + "email": "sarah@example.com", + "balance": 150.00 +} +``` + +| JSON Path | Variable | +|-----------|----------| +| `$.name` | `customer_name` | +| `$.email` | `customer_email` | +| `$.balance` | `account_balance` | + +### Nested Mapping + +For nested responses: + +```json +{ + "customer": { + "name": "Sarah Johnson", + "account": { + "balance": 150.00, + "status": "active" + } + } +} +``` + +| JSON Path | Variable | +|-----------|----------| +| `$.customer.name` | `customer_name` | +| `$.customer.account.balance` | `account_balance` | +| `$.customer.account.status` | `account_status` | + +### Array Mapping + +For array responses: + +```json +{ + "appointments": [ + {"date": "2024-01-21", "time": "2:00 PM"}, + {"date": "2024-01-22", "time": "10:00 AM"} + ] +} +``` + +| JSON Path | Variable | +|-----------|----------| +| `$.appointments[0].date` | `next_appointment_date` | +| `$.appointments[0].time` | `next_appointment_time` | + +--- + +## Error Handling + + + + **Default timeout:** 10 seconds + + **If timeout occurs:** + - Pre-Call: Agent starts without the data + - In-Call: Agent acknowledges delay or skips + - Post-Call: Queued for retry + + **Recommendation:** Design prompts to handle missing data gracefully. + + + + **Causes:** + - Expired API key + - Invalid credentials + - IP not whitelisted + + **Solution:** Check and update credentials in agent settings. + + + + **Causes:** + - Wrong endpoint URL + - Customer doesn't exist in system + + **Solution:** Verify URL; handle "not found" as valid response. + + + + **Causes:** + - External API is down + - Bug in external system + + **Solution:** Implement retry logic; have fallback behavior. + + + +--- + +## Testing Your API Call + +Before going live: + + + ### Use the Test Panel + + In API settings, click **Test** to send a sample request. + + ### Check the Response + + Verify you receive expected data and variable mapping works. + + ### Test with Real Call + + Make a test call and check: + - Variables populated correctly + - Agent uses data in conversation + - Call logs show API activity + + +--- + +## Next Steps + + + + Configure APIs for Single Prompt agents + + + Use API Call nodes in workflows + + diff --git a/fern/products/atoms/pages/platform/features/api-calls/single-prompt.mdx b/fern/products/atoms/pages/platform/features/api-calls/single-prompt.mdx new file mode 100644 index 0000000..266c192 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/api-calls/single-prompt.mdx @@ -0,0 +1,253 @@ +--- +title: "API Calls in Single Prompt" +sidebarTitle: "Single Prompt" +description: "Set up API integrations for Single Prompt agents." +--- + +Single Prompt agents can use API calls to fetch data and perform actions. Here's how to set them up effectively. + +--- + +## Available API Types + +| Type | When It Runs | Use Case | +|------|--------------|----------| +| **Pre-Call API** | Before greeting | Load customer context | +| **Tool Calls** | Agent decides during call | Check availability, book appointments | +| **Post-Call API** | After call ends | Log to CRM, trigger follow-ups | + +--- + +## Pre-Call API Setup + +Pre-Call APIs load data before your agent says hello. + + + ### Access API Settings + + 1. Open your Single Prompt agent + 2. In the right panel, find **API Calls** + 3. Click **Pre-Call API** + + ### Configure the Lookup + + **Example: Customer lookup by phone** + + ``` + URL: https://api.yourcrm.com/customers + Method: GET + Query: phone={{caller_phone}} + ``` + + ### Map Response to Variables + + ```json + Response: { + "name": "Sarah Johnson", + "tier": "gold", + "balance": 150.00 + } + ``` + + | Response Path | Variable | + |---------------|----------| + | `$.name` | `customer_name` | + | `$.tier` | `customer_tier` | + | `$.balance` | `account_balance` | + + ### Use in Your Prompt + + ``` + CUSTOMER CONTEXT: + - Name: {{customer_name}} + - Tier: {{customer_tier}} + - Balance: ${{account_balance}} + + Greet them by name. If they're a gold tier customer, + thank them for their loyalty. + ``` + + +--- + +## Tool Calls (Mid-Conversation APIs) + +Tool calls let your agent make API requests during the conversation when it decides they're needed. + +### Defining Tools + +In your agent settings, define what tools are available: + +```json +{ + "name": "check_availability", + "description": "Check appointment availability for a given date", + "parameters": { + "date": { + "type": "string", + "description": "Date to check (YYYY-MM-DD)" + } + }, + "endpoint": "https://api.calendar.com/availability", + "method": "GET" +} +``` + +### Instructing the Agent + +In your prompt, tell the agent when to use tools: + +``` +AVAILABLE TOOLS: +- check_availability: Use when customer asks about appointment times +- book_appointment: Use when customer confirms they want to book +- cancel_appointment: Use when customer wants to cancel + +When the customer asks about availability: +1. Ask what date they're interested in +2. Use check_availability to get open slots +3. Present 2-3 options to the customer +``` + +### How It Works + +1. Customer asks: "Do you have anything available next Tuesday?" +2. Agent extracts the date and calls `check_availability` +3. API returns available slots +4. Agent responds: "I have 9 AM, 2 PM, and 4:30 PM available. Which works for you?" + +--- + +## Post-Call API Setup + +Post-Call APIs log data after the conversation ends. + + + ### Add Post-Call API + + In API settings, click **Add Post-Call API**. + + ### Configure the Request + + ``` + URL: https://api.yourcrm.com/activities + Method: POST + Headers: + Content-Type: application/json + Authorization: Bearer {{api_key}} + ``` + + ### Define the Payload + + ```json + { + "contact_phone": "{{caller_phone}}", + "contact_name": "{{customer_name}}", + "call_duration": {{call_duration}}, + "call_id": "{{call_id}}", + "outcome": "{{disposition}}", + "notes": "{{call_summary}}", + "agent": "{{agent_name}}" + } + ``` + + +--- + +## Example: Full Integration + +Here's a complete example for a support agent with CRM integration: + +### Pre-Call API (Customer Lookup) + +``` +GET https://crm.example.com/api/contacts?phone={{caller_phone}} + +Response → Variables: +- customer_name +- customer_email +- account_status +- open_tickets +``` + +### Prompt with Context + +``` +You are a support agent for TechCorp. + +CUSTOMER INFO: +- Name: {{customer_name}} +- Email: {{customer_email}} +- Account: {{account_status}} +- Open Tickets: {{open_tickets}} + +TOOLS AVAILABLE: +- create_ticket: Create a new support ticket +- update_ticket: Update an existing ticket +- check_order_status: Look up order information + +INSTRUCTIONS: +1. Greet {{customer_name}} by name +2. If they have open tickets ({{open_tickets}} > 0), ask if they're + calling about an existing issue +3. For new issues, gather details and create_ticket +4. Always confirm actions before taking them +``` + +### Post-Call API (Activity Log) + +``` +POST https://crm.example.com/api/activities + +{ + "contact_phone": "{{caller_phone}}", + "type": "support_call", + "duration": {{call_duration}}, + "summary": "{{call_summary}}", + "outcome": "{{disposition}}" +} +``` + +--- + +## Best Practices + + + + Not every caller will be in your system. Prepare for it: + + ``` + If customer_name is empty (caller not found in CRM): + - Use "there" as a greeting ("Hi there!") + - Ask for their name early + - Offer to create an account + ``` + + + + - **Pre-Call:** 5-10 seconds max (caller is waiting) + - **Tool Calls:** 10-15 seconds (agent can fill time) + - **Post-Call:** 30+ seconds OK (caller is gone) + + + + Use Post-Call API to log: + - Call outcome + - Actions taken + - Customer feedback + - Any issues for review + + + +--- + +## Next Steps + + + + Pre-built patterns for popular services + + + Fix common API issues + + diff --git a/fern/products/atoms/pages/platform/features/api-calls/troubleshooting.mdx b/fern/products/atoms/pages/platform/features/api-calls/troubleshooting.mdx new file mode 100644 index 0000000..652e016 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/api-calls/troubleshooting.mdx @@ -0,0 +1,268 @@ +--- +title: "Troubleshooting API Calls" +sidebarTitle: "Troubleshooting" +description: "Fix common API integration issues." +--- + +API calls not working? Here's how to diagnose and fix the most common issues. + +--- + +## Quick Diagnostic + + + ### Check the Basics + + - [ ] Is the URL correct? + - [ ] Is authentication configured? + - [ ] Are required headers set? + - [ ] Is the request body valid JSON? + + ### Test Outside Atoms + + Use a tool like Postman or curl to test your API: + ```bash + curl -X GET "https://api.example.com/endpoint" \ + -H "Authorization: Bearer your_key" + ``` + + If it works there but not in Atoms, it's likely a configuration issue. + + ### Check Call Logs + + In your agent's **Conversation Logs**, look for API activity: + - Request sent + - Response received + - Errors encountered + + +--- + +## Common Errors + + + + **What it means:** Authentication failed. + + **Common causes:** + - Expired API key + - Wrong API key + - Missing `Authorization` header + - Incorrect auth format + + **Solutions:** + 1. Verify your API key is correct and not expired + 2. Check the header format: + - `Authorization: Bearer sk_live_...` (most APIs) + - `Authorization: Basic base64(user:pass)` (basic auth) + - `X-API-Key: your_key` (some APIs) + 3. Regenerate the API key if needed + + + + **What it means:** Authenticated, but not authorized for this action. + + **Common causes:** + - API key lacks required permissions + - IP address not whitelisted + - Rate limit exceeded + - Resource belongs to different account + + **Solutions:** + 1. Check API key permissions/scopes + 2. Whitelist Atoms IP addresses (if required) + 3. Wait if rate limited; consider reducing call frequency + + + + **What it means:** Endpoint doesn't exist or resource not found. + + **Common causes:** + - Typo in URL + - Wrong API version + - Resource doesn't exist (e.g., customer not in system) + + **Solutions:** + 1. Double-check the URL against API documentation + 2. Verify API version (e.g., `/v1/` vs `/v2/`) + 3. For lookups, handle "not found" as a valid response + + + + **What it means:** Request took too long. + + **Common causes:** + - Slow external API + - Network issues + - Complex query taking too long + + **Solutions:** + 1. Simplify the request if possible + 2. Increase timeout setting (if available) + 3. Implement retry logic + 4. Design fallback behavior for timeouts + + + + **What it means:** External API had an error. + + **Common causes:** + - Bug in the external system + - Invalid request data + - External service is down + + **Solutions:** + 1. Verify request body is valid JSON + 2. Check external service status page + 3. Try again later + 4. Implement retry logic with backoff + + + +--- + +## Request Issues + +### Invalid JSON Body + +**Symptom:** 400 Bad Request + +**Problem:** +```json +{ + "name": {{customer_name}}, // Missing quotes around variable + "phone": "{{caller_phone}}" +} +``` + +**Solution:** +```json +{ + "name": "{{customer_name}}", // Variables need quotes for strings + "phone": "{{caller_phone}}" +} +``` + +### Missing Required Fields + +**Symptom:** 400 Bad Request with message about missing fields + +**Solution:** Check API documentation for required fields and ensure all are included. + +### Wrong Content-Type + +**Symptom:** 415 Unsupported Media Type or garbled response + +**Solution:** Set correct Content-Type header: +- JSON APIs: `Content-Type: application/json` +- Form data: `Content-Type: application/x-www-form-urlencoded` + +--- + +## Response Issues + +### Variables Not Populated + +**Symptom:** Variables show as empty or literal `{{variable_name}}` + +**Causes:** +1. Response mapping path is wrong +2. API returned different structure +3. Field is null in response + +**Debug:** +1. Check actual response in logs +2. Verify JSON path matches response structure +3. Example: If response is `{"data": {"name": "Sarah"}}`, path is `$.data.name`, not `$.name` + +### Wrong Data Type + +**Symptom:** Numbers show as strings, or vice versa + +**Solution:** Most variables are strings. If you need numbers: +- For display: String is fine +- For calculations: May need backend processing + +--- + +## Timing Issues + +### Pre-Call API Taking Too Long + +**Symptom:** Caller waits several seconds before greeting + +**Solutions:** +1. Optimize the external API (faster queries) +2. Reduce data fetched (only get what you need) +3. Set aggressive timeout (5 seconds max) +4. Have fallback greeting for timeout cases + +### Mid-Call API Causing Awkward Pause + +**Symptom:** Silence during API call + +**Solutions:** +1. Add filler phrase: "Let me check that for you..." +2. Design flow to keep talking while API runs +3. Optimize API speed +4. Consider Pre-Call API for predictable data + +--- + +## Testing API Calls + +### Use the Test Panel + +1. Go to your API call configuration +2. Click **Test** +3. Enter test values for variables +4. Click **Send Request** +5. Verify response and mapping + +### Test with Real Call + +1. Make a test call to your agent +2. Trigger the API call scenario +3. Check **Conversation Logs** for: + - API request details + - Response received + - Variables populated + +### Check External Logs + +Your external system likely has logs too: +- API gateway logs +- Application logs +- Error tracking (Sentry, etc.) + +--- + +## Still Having Issues? + + + ### Gather Information + + Before contacting support, collect: + - API endpoint URL + - Request method and headers + - Request body (sanitize secrets) + - Error message + - Call ID where issue occurred + + ### Check External Service + + - Is the external API working? + - Check their status page + - Test the API directly + + ### Contact Support + + If you've tried everything: + - Provide the information above + - Include screenshots of configuration + - Share relevant call logs + + + + Contact the Atoms support team + diff --git a/fern/products/atoms/pages/platform/features/api-calls/what-are-api-calls.mdx b/fern/products/atoms/pages/platform/features/api-calls/what-are-api-calls.mdx new file mode 100644 index 0000000..18f6ff9 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/api-calls/what-are-api-calls.mdx @@ -0,0 +1,202 @@ +--- +title: "API Calls" +sidebarTitle: "What Are API Calls?" +description: "Connect your agent to external systems — fetch data, trigger actions, and integrate with your tools." +--- + +**API Calls** let your agent communicate with external systems during conversations. Fetch customer data from your CRM, book appointments in your calendar, process payments, or trigger any action in your backend. + +--- + +## Why Use API Calls? + + + + Your agent is limited to: + - Information in its prompt + - Data in the knowledge base + - What the caller tells it + + It can't check real-time data or take actions in your systems. + + + Your agent can: + - Fetch real-time customer data + - Check inventory or availability + - Book appointments + - Create support tickets + - Process orders + - Update CRM records + + ...and anything else your APIs support. + + + +--- + +## Types of API Calls + +Atoms supports several types of API calls, each for different points in the conversation: + +| Type | When It Runs | Best For | +|------|--------------|----------| +| **Pre-Call API** | Before the call starts | Loading customer context | +| **In-Call API** | During the conversation | Checking availability, booking | +| **Post-Call API** | After the call ends | Logging, follow-up triggers | + +--- + +## How API Calls Work + + + ### Configure the Request + + You define: + - **URL** — Where to send the request + - **Method** — GET, POST, PUT, DELETE + - **Headers** — Authentication, content type + - **Body** — Data to send (for POST/PUT) + + ### Include Variables + + Use variables in your request: + ``` + GET https://api.yourcrm.com/customers?phone={{caller_phone}} + ``` + + ### Map the Response + + Extract data from the API response into variables: + ```json + Response: { "name": "Sarah", "balance": "$150" } + + Maps to: + {{customer_name}} = "Sarah" + {{account_balance}} = "$150" + ``` + + ### Use in Conversation + + Your agent can now use this data: + ``` + "Hi {{customer_name}}, I see your current balance is {{account_balance}}." + ``` + + +--- + +## Simple Example: Customer Lookup + +**Scenario:** When a customer calls, look up their info based on phone number. + +**Pre-Call API Configuration:** + +``` +URL: https://api.yourcrm.com/customers +Method: GET +Query Parameters: phone={{caller_phone}} +``` + +**Response:** +```json +{ + "name": "Sarah Johnson", + "account_number": "ACC-12345", + "status": "active", + "balance": 150.00 +} +``` + +**Variable Mapping:** +| Response Field | Variable | +|----------------|----------| +| `name` | `{{customer_name}}` | +| `account_number` | `{{account_number}}` | +| `status` | `{{account_status}}` | +| `balance` | `{{account_balance}}` | + +**Agent prompt uses these:** +``` +Customer: {{customer_name}} (Account: {{account_number}}) +Status: {{account_status}} +Balance: ${{account_balance}} + +Greet them by name and ask how you can help. +``` + +--- + +## What You Can Build + + + + - Customer account information + - Order status + - Appointment details + - Inventory levels + - Pricing and availability + + + + - Check calendar availability + - Book appointments + - Reschedule existing bookings + - Cancel reservations + + + + - Create new leads + - Update contact records + - Log call activities + - Trigger follow-up tasks + + + + - Check account balance + - Process payments + - Send payment links + - Update billing info + + + + - Create support tickets + - Check ticket status + - Update existing tickets + - Escalate to human agents + + + +--- + +## Single Prompt vs. Convo Flow + +API calls work differently depending on your agent type: + +| Aspect | Single Prompt | Convo Flow | +|--------|---------------|------------| +| **Pre-Call API** | ✓ Available | ✓ Available | +| **In-Call API** | Tool-based (agent decides when) | Node-based (you control when) | +| **Post-Call API** | ✓ Available | ✓ Available | +| **Control** | AI decides when to call | You design exact flow | + + + + How to set up API calls for Single Prompt agents + + + Using API Call nodes in workflows + + + +--- + +## Next Steps + + + + Step-by-step setup guide + + + Pre-built patterns for popular services + + diff --git a/fern/products/atoms/pages/platform/features/integrations.mdx b/fern/products/atoms/pages/platform/features/integrations.mdx new file mode 100644 index 0000000..60ebd3c --- /dev/null +++ b/fern/products/atoms/pages/platform/features/integrations.mdx @@ -0,0 +1,60 @@ +--- +title: "Integrations" +sidebarTitle: "Integrations" +description: "Connect your agent to third-party services." +--- + +Integrations let your agent connect directly to external services like CRMs — without custom API configuration. Click, authenticate, and you're connected. + +**Location:** Left Sidebar → Integrations + + + Integrations page + + +--- + +## Available Integrations + +We support integrations with Salesforce, Zendesk, Zoho, Fiserv, TSYS, and more. + +Some integrations (like Salesforce) are available to install directly. Others show **Talk to Sales** — reach out and we'll enable them for your account. + +--- + +## Connecting Salesforce + + + + Find Salesforce on the integrations page and click **Install**. + + + + Enter your Salesforce subdomain (the part before `.my.salesforce.com`). + + + Salesforce subdomain modal + + + + + Follow the prompts to log in and authorize the connection. + + + +Once connected, your agent can sync with Salesforce automatically. + +--- + +## Need Something Else? + +For services not listed, use [API Calls](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) to connect to any REST API. + + + + Connect to any external service + + + Send data when events happen + + diff --git a/fern/products/atoms/pages/platform/features/integrations/calendar.mdx b/fern/products/atoms/pages/platform/features/integrations/calendar.mdx new file mode 100644 index 0000000..cafc81e --- /dev/null +++ b/fern/products/atoms/pages/platform/features/integrations/calendar.mdx @@ -0,0 +1,251 @@ +--- +title: "Calendar Integration" +sidebarTitle: "Calendar" +description: "Connect Google Calendar to enable appointment booking through your agent." +--- + +Calendar integration lets your agent check availability and book appointments in real-time. No more back-and-forth — callers can schedule right on the call. + +--- + +## What You Can Do + +| Capability | Description | +|------------|-------------| +| **Check Availability** | See open slots in real-time | +| **Book Appointments** | Create events directly from calls | +| **Reschedule** | Move existing appointments | +| **Cancel** | Remove appointments when requested | + +--- + +## Supported Calendars + +| Calendar | Status | +|----------|--------| +| **Google Calendar** | ✓ Native integration | +| **Microsoft Outlook** | Via API calls | +| **Calendly** | Via API calls | +| **Cal.com** | Via API calls | + +--- + +## Connecting Google Calendar + + + ### Navigate to Integrations + + Go to **Settings** → **Integrations** → **Google Calendar**. + + ### Click "Connect Google" + + You'll be redirected to Google sign-in. + + ### Select Account and Calendars + + Choose which Google account and which calendars to access. + + ### Grant Permissions + + Atoms needs permission to: + - See your calendars + - View events + - Create events + + ### Verify Connection + + You should see your calendars listed in the integration settings. + + +--- + +## Configuration + +### Select Booking Calendar + +Choose which calendar to book appointments on: + +| Setting | Description | +|---------|-------------| +| **Primary Calendar** | Where to create new events | +| **Check Calendars** | Which calendars to check for conflicts | +| **Buffer Time** | Minutes between appointments | + +### Business Hours + +Define when appointments can be booked: + +| Day | Hours | +|-----|-------| +| Monday - Friday | 9:00 AM - 5:00 PM | +| Saturday | 10:00 AM - 2:00 PM | +| Sunday | Closed | + +### Appointment Types + +Configure different appointment durations: + +| Type | Duration | Description | +|------|----------|-------------| +| Consultation | 30 min | Initial meeting | +| Follow-up | 15 min | Quick check-in | +| Deep Dive | 60 min | Extended session | + +--- + +## Using Calendar in Agents + +### Single Prompt Agent + +Enable calendar tools in your agent: + +``` +CALENDAR INTEGRATION: +Your agent can check availability and book appointments. + +When the customer wants to schedule: +1. Ask what type of appointment they need +2. Ask for their preferred date +3. Check availability and offer 2-3 options +4. Confirm their selection +5. Book the appointment +6. Confirm details and send to their email + +Available appointment types: +- Consultation (30 min) +- Follow-up (15 min) +``` + +### Convo Flow Agent + +Use API nodes to interact with calendar: + +``` +[Collect Preferred Date] + ↓ +[API: Check Availability] + ↓ + ┌────┴────┐ +Slots Found No Slots + ↓ ↓ +[Offer Options] [Suggest Alternatives] + ↓ +[Confirm Selection] + ↓ +[API: Book Appointment] + ↓ +[Confirm Booking] +``` + +--- + +## Example Conversation + +``` +Customer: "I'd like to schedule an appointment." + +Agent: "I'd be happy to help you schedule. What day works best for you?" + +Customer: "How about next Tuesday?" + +Agent: "Let me check Tuesday for you... I have openings at + 9:00 AM, 11:30 AM, and 2:00 PM. Which works best?" + +Customer: "2:00 PM works." + +Agent: "Great! I've booked you for Tuesday, January 23rd at 2:00 PM. + You'll receive a calendar invite at the email on file. + Is there anything else I can help with?" +``` + +--- + +## Best Practices + + + + Don't overwhelm with every available slot: + + **Better:** "I have 9 AM, 2 PM, and 4:30 PM available." + + **Worse:** "I have 9, 9:30, 10, 10:30, 11, 11:30..." (too many) + + + + Before booking, confirm: + - Date and time + - Customer name + - Contact info (email/phone) + - Appointment type + + "Just to confirm: That's Tuesday the 23rd at 2 PM for a consultation. Is that correct?" + + + + Always clarify time zone: + + "That's 2 PM Eastern Time. Does that work for you?" + + Or detect from caller's phone number when possible. + + + + After booking: + - Send calendar invite + - Include video link if virtual + - Add any preparation instructions + + + +--- + +## Troubleshooting + + + + **Possible causes:** + - Business hours not configured + - Checking wrong calendar + - All slots booked + + **Solutions:** + 1. Verify business hours settings + 2. Check which calendars are being queried + 3. Expand date range + + + + **Possible causes:** + - Calendar permissions + - Wrong calendar selected + - API error + + **Solutions:** + 1. Reconnect Google Calendar + 2. Check primary calendar setting + 3. Verify API logs for errors + + + + **Possible causes:** + - Not checking all calendars + - Race condition (simultaneous bookings) + + **Solutions:** + 1. Include all relevant calendars in conflict check + 2. Atoms handles race conditions, but check settings + + + +--- + +## Next Steps + + + + Add customer context to bookings + + + Connect other calendar systems + + diff --git a/fern/products/atoms/pages/platform/features/integrations/convo-flow.mdx b/fern/products/atoms/pages/platform/features/integrations/convo-flow.mdx new file mode 100644 index 0000000..35a2c76 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/integrations/convo-flow.mdx @@ -0,0 +1,278 @@ +--- +title: "Integrations with Convo Flow" +sidebarTitle: "Convo Flow" +description: "How to use integrations in Convo Flow agents with precise control." +--- + +Convo Flow agents give you precise control over when and how integrations are triggered. Use API nodes at specific points in your workflow for predictable, reliable integration behavior. + +--- + +## Integration Points + +``` +[Pre-Call API] → Load context before greeting + ↓ +[Greeting Node] → Use loaded context + ↓ +[API Node: Check Data] → Mid-conversation lookups + ↓ +[API Node: Take Action] → Book, create, update + ↓ +[End Call Node] + ↓ +[Post-Call API] → Log and sync +``` + +--- + +## Pre-Call Integration + +Load customer context before the conversation starts. + +### Setup + +1. Add **Pre-Call API Node** to your flow +2. It runs automatically when call connects +3. Variables are available in all subsequent nodes + +### Example: CRM Lookup + +**Configuration:** +``` +URL: https://crm.example.com/contacts?phone={{caller_phone}} +Method: GET +``` + +**Response Mapping:** +``` +$.name → customer_name +$.account_status → account_status +$.last_order → last_order_date +``` + +**Use in Greeting Node:** +``` +"Hi {{customer_name}}! Thanks for calling. How can I help you today?" +``` + +--- + +## Mid-Flow Integration Nodes + +Use API Call nodes at strategic points in your flow. + +### Pattern: Check → Branch → Act + +``` +[Collect Information] + ↓ +[API: Check System] + ↓ ↓ + Found Not Found + ↓ ↓ +[Process] [Handle Missing] +``` + +### Example: Appointment Booking Flow + +``` +[Ask for Date] +"What day works best for you?" + ↓ +[Capture Date] → {{requested_date}} + ↓ +[API: Check Calendar Availability] +GET /availability?date={{requested_date}} + ↓ + ┌──┴──┐ + Slots No Slots + ↓ ↓ +[Offer Options] [Suggest Alternatives] +"I have 9 AM and 2 PM "That day is fully booked. +available." How about Wednesday?" + ↓ +[Capture Selection] → {{selected_time}} + ↓ +[API: Book Appointment] +POST /appointments +{date: {{requested_date}}, time: {{selected_time}}} + ↓ + ┌──┴──┐ + Success Failed + ↓ ↓ +[Confirm Booking] [Apologize & Offer Help] +``` + +--- + +## Node Configuration + +### API Call Node Settings + +| Setting | Description | Example | +|---------|-------------|---------| +| **Name** | Descriptive label | "Check Calendar" | +| **URL** | Endpoint | `https://api.calendar.com/availability` | +| **Method** | HTTP verb | GET, POST, PUT, DELETE | +| **Headers** | Auth & content type | `Authorization: Bearer {{token}}` | +| **Query/Body** | Request data | `date={{requested_date}}` | + +### Response Handling + +Map response fields to variables: + +| Response Path | Variable | Example Value | +|---------------|----------|---------------| +| `$.available` | `has_availability` | `true` | +| `$.slots[0]` | `first_available` | `"9:00 AM"` | +| `$.slots` | `all_slots` | `["9:00 AM", "2:00 PM"]` | + +### Branching on Results + +Create branches for different outcomes: + +| Condition | Route To | +|-----------|----------| +| `has_availability == true` | Offer Options Node | +| `has_availability == false` | No Availability Node | +| `API Error` | Error Handler Node | + +--- + +## Post-Call Integration + +Log call data after the conversation ends. + +### Setup + +1. Add **Post-Call API Node** to your flow +2. It runs after any End Call node +3. Include all relevant call data + +### Example: CRM Activity Log + +``` +URL: https://crm.example.com/activities +Method: POST + +Body: +{ + "contact_phone": "{{caller_phone}}", + "contact_name": "{{customer_name}}", + "call_duration": {{call_duration}}, + "outcome": "{{disposition}}", + "summary": "{{call_summary}}", + "appointment_booked": "{{appointment_date}}", + "nodes_visited": "{{conversation_path}}" +} +``` + +--- + +## Integration Patterns + +### Pattern 1: Personalized Greeting + +``` +[Pre-Call: CRM Lookup] → {{customer_name}}, {{account_type}} + ↓ + ┌────┴────┐ + VIP Customer Standard + ↓ ↓ +[VIP Greeting] [Standard Greeting] +"Hi {{name}}, "Hi {{name}}, thanks +as a valued for calling. How +customer..." can I help?" +``` + +### Pattern 2: Data Collection → API Action + +``` +[Collect Name] + ↓ +[Collect Email] + ↓ +[Collect Issue] + ↓ +[API: Create Ticket] +POST /tickets +{name, email, issue} + ↓ +[Confirm Ticket Number] +"Your ticket number is {{ticket_id}}" +``` + +### Pattern 3: Lookup → Conditional Logic + +``` +[API: Check Order Status] +GET /orders?phone={{caller_phone}} + ↓ + ┌─────┼─────┐ +No Order Shipped Processing + ↓ ↓ ↓ +[No Order] [Track] [Status] +Found Shipment Update +``` + +--- + +## Best Practices + + + + Don't call APIs with missing data. Add validation: + + ``` + [Collect Date] + ↓ + [Validate Format] → If invalid → [Ask Again] + ↓ + [API: Check Availability] + ``` + + + + Every API node should handle failures: + + - **Success** → Continue flow + - **Not Found** → Handle gracefully + - **Error** → Apologize, offer alternative + + Never leave API nodes without error handling. + + + + Each API call adds latency. Optimize: + + - **Batch when possible** — One call for multiple data points + - **Pre-call for context** — Load once, use throughout + - **Cache where appropriate** — Don't re-fetch unchanged data + + + + If API calls take time, keep the caller engaged: + + ``` + [Say: "Let me check that for you..."] + ↓ + [API: Long Running Call] + ↓ + [Say: "I found it! Here's what I have..."] + ``` + + + +--- + +## Next Steps + + + + Deep dive into API nodes + + + All node types explained + + diff --git a/fern/products/atoms/pages/platform/features/integrations/crm/hubspot.mdx b/fern/products/atoms/pages/platform/features/integrations/crm/hubspot.mdx new file mode 100644 index 0000000..109e954 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/integrations/crm/hubspot.mdx @@ -0,0 +1,235 @@ +--- +title: "HubSpot Integration" +sidebarTitle: "HubSpot" +description: "Connect Atoms to HubSpot for contact lookup and engagement tracking." +--- + +Integrate your Atoms agents with HubSpot to look up contacts, log engagements, and create deals automatically. + +--- + +## What You Can Do + +| Capability | Description | +|------------|-------------| +| **Contact Lookup** | Find contacts by phone number | +| **Engagement Logging** | Log calls as HubSpot engagements | +| **Contact Creation** | Create new contacts for unknown callers | +| **Deal Updates** | Update deal stages based on call outcomes | + +--- + +## Connecting HubSpot + + + ### Navigate to Integrations + + Go to **Settings** → **Integrations** → **HubSpot**. + + ### Click "Connect HubSpot" + + You'll be redirected to HubSpot login. + + ### Authorize Access + + Log in and select your HubSpot account. Grant access to: + - Contacts + - Deals + - Engagements + - Timeline events + + ### Verify Connection + + You should see "Connected" status with your HubSpot account info. + + +--- + +## Configuration Options + +### Contact Lookup + +| Setting | Description | Default | +|---------|-------------|---------| +| **Search Property** | Which property to search | phone | +| **Properties to Fetch** | Contact data to retrieve | firstname, lastname, email, company | +| **Create if Not Found** | Auto-create new contacts | Optional | + +### Engagement Logging + +| Setting | Description | Options | +|---------|-------------|---------| +| **Log Calls** | Create call engagements | Yes/No | +| **Engagement Type** | Type of engagement | CALL | +| **Include Recording** | Attach recording link | If available | + +--- + +## Using HubSpot Data in Agents + +### Pre-Call Lookup + +When a call connects, Atoms searches HubSpot: + +``` +POST /crm/v3/objects/contacts/search + +{ + "filterGroups": [{ + "filters": [{ + "propertyName": "phone", + "operator": "EQ", + "value": "{{caller_phone}}" + }] + }], + "properties": ["firstname", "lastname", "email", "company", "lifecyclestage"] +} +``` + +**Variables populated:** +- `{{first_name}}` — First name +- `{{last_name}}` — Last name +- `{{customer_email}}` — Email address +- `{{company_name}}` — Associated company +- `{{lifecycle_stage}}` — Lead, MQL, Customer, etc. + +### In Your Prompt + +``` +CONTACT CONTEXT (from HubSpot): +- Name: {{first_name}} {{last_name}} +- Company: {{company_name}} +- Stage: {{lifecycle_stage}} + +Greet them by first name. If they're a "customer" stage, +thank them for being a customer. If "lead" or "mql", +focus on qualifying their needs. +``` + +--- + +## Post-Call Engagement Logging + +After each call, Atoms creates a Call engagement: + +```json +{ + "engagement": { + "type": "CALL", + "timestamp": 1705766400000 + }, + "associations": { + "contactIds": [12345] + }, + "metadata": { + "body": "Call Summary:\nCustomer inquired about pricing for Enterprise plan. Sent follow-up email with quote.\n\nOutcome: Interested - Follow up scheduled", + "durationMilliseconds": 225000, + "status": "COMPLETED", + "toNumber": "+14155551234", + "disposition": "Connected" + } +} +``` + +--- + +## Creating Contacts for New Callers + +Enable auto-creation for callers not in HubSpot: + + + ### Enable "Create if Not Found" + + In HubSpot integration settings, turn on contact creation. + + ### Configure Required Fields + + During the call, ensure you collect: + - Name (at minimum) + - Email (recommended) + - Any other required fields + + ### Set Lifecycle Stage + + Choose what stage new contacts start at: + - Subscriber + - Lead + - Or your custom stage + + +### In Your Prompt + +``` +If the caller is new (not found in HubSpot): +1. Collect their name: "May I have your name please?" +2. Collect their email: "And what's the best email to reach you?" +3. Ask how they found us (optional): "How did you hear about us?" + +Store this information for the CRM. +``` + +--- + +## Deal Integration + +Update deals based on call outcomes: + +| Call Outcome | Deal Action | +|--------------|-------------| +| Interested | Move to "Qualified" | +| Demo Scheduled | Move to "Demo Scheduled" | +| Not Interested | Move to "Closed Lost" | +| Follow-up Needed | Add note, keep stage | + +--- + +## Troubleshooting + + + + **Possible causes:** + - Phone number format differs + - Contact has phone in different property + - Contact doesn't exist + + **Solutions:** + 1. Check phone format (country codes, formatting) + 2. Search multiple phone properties + 3. Enable "create if not found" + + + + **Possible causes:** + - Contact wasn't matched + - API permissions issue + - Rate limit exceeded + + **Solutions:** + 1. Verify contact lookup succeeded + 2. Check HubSpot API permissions + 3. Review rate limits in HubSpot + + + + **Possible causes:** + - OAuth token expired + - HubSpot permissions changed + + **Solutions:** + 1. Reconnect the integration + 2. Re-authorize access + + + +--- + +## Next Steps + + + + Set up Salesforce instead + + + Add appointment booking + + diff --git a/fern/products/atoms/pages/platform/features/integrations/crm/salesforce.mdx b/fern/products/atoms/pages/platform/features/integrations/crm/salesforce.mdx new file mode 100644 index 0000000..e0571cd --- /dev/null +++ b/fern/products/atoms/pages/platform/features/integrations/crm/salesforce.mdx @@ -0,0 +1,208 @@ +--- +title: "Salesforce Integration" +sidebarTitle: "Salesforce" +description: "Connect Atoms to Salesforce for customer lookup and activity logging." +--- + +Integrate your Atoms agents with Salesforce to look up customer data, log call activities, and keep your CRM updated automatically. + +--- + +## What You Can Do + +| Capability | Description | +|------------|-------------| +| **Contact Lookup** | Find customers by phone, populate agent with context | +| **Activity Logging** | Automatically create Tasks for completed calls | +| **Lead Creation** | Create new leads for unknown callers | +| **Record Updates** | Update contact fields based on call outcomes | + +--- + +## Prerequisites + +Before connecting: +- Salesforce account with API access +- Admin permissions (or contact your Salesforce admin) +- API usage within your org limits + +--- + +## Connecting Salesforce + + + ### Navigate to Integrations + + Go to **Settings** → **Integrations** → **Salesforce**. + + ### Click "Connect Salesforce" + + You'll be redirected to Salesforce login. + + ### Authorize Access + + Log in and click **Allow** to grant Atoms access to: + - Read contacts and leads + - Create and update tasks + - Access custom objects (if configured) + + ### Verify Connection + + You should see "Connected" status with your Salesforce org name. + + +--- + +## Configuration Options + +### Contact Lookup + +Configure how Atoms finds customers: + +| Setting | Description | Default | +|---------|-------------|---------| +| **Search Field** | Which field to match phone numbers | Phone, MobilePhone | +| **Object Type** | Contact, Lead, or Both | Contact | +| **Fields to Retrieve** | What data to fetch | Name, Email, Account | + +### Activity Logging + +Configure what gets logged after calls: + +| Setting | Description | Options | +|---------|-------------|---------| +| **Create Task** | Log calls as Tasks | Yes/No | +| **Task Subject** | Template for subject line | "Voice Call - {{disposition}}" | +| **Include Summary** | Add call summary to description | Yes/No | +| **Task Status** | Status of created task | Completed | + +--- + +## Using Salesforce Data in Agents + +### Pre-Call Lookup + +When a call connects, Atoms can automatically query Salesforce: + +``` +Query: SELECT Name, Email, Account.Name, Owner.Name + FROM Contact + WHERE Phone = '{{caller_phone}}' + OR MobilePhone = '{{caller_phone}}' +``` + +**Variables populated:** +- `{{customer_name}}` — Contact's full name +- `{{customer_email}}` — Contact's email +- `{{account_name}}` — Related account name +- `{{owner_name}}` — Contact owner (for transfers) + +### In Your Prompt + +``` +CUSTOMER CONTEXT (from Salesforce): +- Name: {{customer_name}} +- Company: {{account_name}} +- Account Owner: {{owner_name}} + +Greet them by name. If they ask to speak with their rep, +you can reference {{owner_name}}. +``` + +--- + +## Post-Call Activity Logging + +After each call, Atoms creates a Task in Salesforce: + +```json +{ + "Subject": "Voice Call - Resolved", + "WhoId": "003XXXXXXXXXXXXXXX", + "Description": "Call Summary:\nCustomer called about billing question. Explained invoice charges. Issue resolved.\n\nDuration: 3 minutes 45 seconds\nAgent: Support Bot", + "Status": "Completed", + "CallType": "Inbound", + "CallDurationInSeconds": 225 +} +``` + +### Customizing the Log + +You can customize what's included: + +| Field | Source | Example | +|-------|--------|---------| +| Subject | Template | "Voice Call - {{disposition}}" | +| Description | Call summary + metadata | Call details | +| WhoId | Matched contact ID | Contact record | +| CallDurationInSeconds | System variable | 225 | +| Custom fields | Your variables | As configured | + +--- + +## Advanced: Custom Objects + +If you use custom Salesforce objects, you can configure Atoms to: + +1. **Query custom objects** — Look up data from any object +2. **Create custom records** — Log to custom objects instead of Tasks +3. **Update custom fields** — Write call data to custom fields + + +Custom object configuration requires some Salesforce knowledge. Contact support if you need help. + + +--- + +## Troubleshooting + + + + **Possible causes:** + - Salesforce org requires IP whitelisting + - User lacks API permissions + - OAuth token expired + + **Solutions:** + 1. Check Salesforce IP restrictions + 2. Verify user has "API Enabled" permission + 3. Reconnect the integration + + + + **Possible causes:** + - Phone number format mismatch + - Contact doesn't exist + - Searching wrong object type + + **Solutions:** + 1. Ensure phone formats match (with/without country code) + 2. Verify contact exists in Salesforce + 3. Check object type setting (Contact vs Lead) + + + + **Possible causes:** + - Contact not found (can't link activity) + - Task creation disabled + - API error + + **Solutions:** + 1. Check that contact was found during lookup + 2. Verify activity logging is enabled + 3. Check Salesforce API limits + + + +--- + +## Next Steps + + + + Set up HubSpot instead + + + Advanced Salesforce queries + + diff --git a/fern/products/atoms/pages/platform/features/integrations/overview.mdx b/fern/products/atoms/pages/platform/features/integrations/overview.mdx new file mode 100644 index 0000000..2923c68 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/integrations/overview.mdx @@ -0,0 +1,205 @@ +--- +title: "Integrations" +sidebarTitle: "Overview" +description: "Connect your agent to CRMs, calendars, and other business tools." +--- + +Atoms integrations let you connect your voice agents to the tools you already use. Pull customer data from your CRM, book appointments in your calendar, and sync call outcomes automatically. + +--- + +## Available Integrations + + + + Sync contacts, log activities, update records + + + Contact lookup, deal updates, activity tracking + + + Check availability, book appointments + + + Connect to any system with an API + + + +--- + +## Why Use Integrations? + + + + - Agent has no customer context + - Bookings need manual entry + - Call data stays in Atoms + - Teams work with incomplete info + + + - Agent greets callers by name + - Appointments sync automatically + - CRM updated after every call + - Complete picture across tools + + + +--- + +## Integration Types + +### Native Integrations + +Pre-built connections that work out of the box: +- **Salesforce** — Full CRM integration +- **HubSpot** — Contact and deal management +- **Google Calendar** — Scheduling integration + +### Custom Integrations + +Use API Calls for any system with an API: +- Proprietary CRMs +- Custom booking systems +- Internal tools +- Any REST API + +--- + +## Setting Up Integrations + + + ### Navigate to Integrations + + Go to **Settings** → **Integrations** in your dashboard. + + ### Choose Your Service + + Click on the service you want to connect. + + ### Authenticate + + For native integrations: + - Click **Connect** + - Log in to the service + - Authorize Atoms access + + For custom integrations: + - Enter API credentials + - Configure endpoints + + ### Configure Data Mapping + + Tell Atoms which fields to sync: + - What customer data to fetch + - What call data to send back + - How to match records + + ### Test the Connection + + Make a test call to verify: + - Data flows correctly + - Variables populate + - Updates appear in your tools + + +--- + +## What You Can Do + + + + When a call comes in: + 1. Atoms checks caller ID against your CRM + 2. Retrieves customer name, history, account status + 3. Agent uses this context in the conversation + + *"Hi Sarah! I see you're a gold member. How can I help today?"* + + + + During the call: + 1. Agent asks for preferred date/time + 2. Atoms checks calendar availability + 3. Books the appointment in your system + 4. Sends confirmation + + *"I've booked you for Tuesday at 2pm with Dr. Johnson. You'll receive a confirmation email shortly."* + + + + After every call: + 1. Atoms sends call details to your CRM + 2. Creates activity/note on contact record + 3. Includes duration, outcome, summary + 4. Updates any changed fields + + + + For new callers: + 1. Agent collects contact information + 2. Atoms creates a new lead/contact + 3. Assigns to appropriate queue + 4. Triggers follow-up workflows + + + +--- + +## Using Integrations with Agents + +### Single Prompt Agents + +Integrations work via Pre-Call and Post-Call APIs: + +``` +[Call Connects] + ↓ +[Pre-Call: Fetch from CRM] → Populates {{customer_name}}, etc. + ↓ +[Conversation with context] + ↓ +[Post-Call: Log to CRM] +``` + + + Using integrations with Single Prompt agents + + +### Convo Flow Agents + +Integrations can be triggered at specific nodes: + +``` +[Pre-Call: Load Customer] + ↓ +[Greeting Node] + ↓ +[API Node: Check Calendar] + ↓ +[API Node: Book Appointment] + ↓ +[Post-Call: Update CRM] +``` + + + Using integrations with Convo Flow agents + + +--- + +## Next Steps + + + + Connect your Salesforce org + + + Connect your HubSpot account + + + Connect Google Calendar + + + Build custom integrations + + diff --git a/fern/products/atoms/pages/platform/features/integrations/single-prompt.mdx b/fern/products/atoms/pages/platform/features/integrations/single-prompt.mdx new file mode 100644 index 0000000..f424e8b --- /dev/null +++ b/fern/products/atoms/pages/platform/features/integrations/single-prompt.mdx @@ -0,0 +1,213 @@ +--- +title: "Integrations with Single Prompt" +sidebarTitle: "Single Prompt" +description: "How to use integrations effectively with Single Prompt agents." +--- + +Single Prompt agents use integrations through Pre-Call APIs, Tool Calls, and Post-Call APIs. Here's how to set them up for maximum effectiveness. + +--- + +## Integration Flow + +``` +[Call Connects] + ↓ +[Pre-Call API] → CRM lookup, load context + ↓ +[Agent Conversation] → Tool calls for real-time actions + ↓ +[Post-Call API] → Log to CRM, trigger follow-ups +``` + +--- + +## Pre-Call: Loading Customer Context + +Pre-Call APIs run before your agent speaks, loading context from your connected systems. + +### CRM Lookup + +``` +When call connects: +1. Atoms checks caller phone in CRM +2. Retrieves: name, email, account status, history +3. Populates variables for your prompt +4. Agent greets with full context +``` + +### Using Context in Your Prompt + +``` +CUSTOMER CONTEXT (automatically populated from CRM): +- Name: {{customer_name}} +- Account Status: {{account_status}} +- Last Contact: {{last_contact_date}} +- Open Issues: {{open_tickets}} + +INSTRUCTIONS: +- Greet {{customer_name}} by name +- If {{account_status}} is "vip", thank them for their loyalty +- If {{open_tickets}} > 0, ask if they're calling about an existing issue +``` + +--- + +## Mid-Call: Tool-Based Integrations + +Single Prompt agents can call integration tools during the conversation when the agent decides they're needed. + +### Available Tools + +Configure which integration tools your agent can use: + +| Tool | Trigger | Action | +|------|---------|--------| +| `check_availability` | Customer asks about scheduling | Queries calendar | +| `book_appointment` | Customer confirms time | Creates calendar event | +| `lookup_order` | Customer asks about order | Queries order system | +| `create_ticket` | New support issue | Creates CRM ticket | + +### Prompt Instructions + +Tell the agent when to use tools: + +``` +AVAILABLE TOOLS: + +1. check_availability + - Use when: Customer asks about appointment times + - Requires: Preferred date (ask if not provided) + +2. book_appointment + - Use when: Customer confirms a time slot + - Requires: Date, time, customer email + - Always confirm details before booking + +3. create_support_ticket + - Use when: Issue can't be resolved on the call + - Requires: Issue description, customer contact + - Tell customer their ticket number +``` + +### Example Flow + +``` +Customer: "I need to schedule an appointment for next week" + +Agent thinks: Need to check availability → use check_availability tool +Agent: "I'd be happy to help. What day next week works best?" + +Customer: "Tuesday if possible" + +Agent calls: check_availability(date: "2024-01-23") +Response: {slots: ["9:00 AM", "2:00 PM", "4:30 PM"]} + +Agent: "I have 9 AM, 2 PM, and 4:30 PM available on Tuesday. Which works for you?" + +Customer: "2 PM please" + +Agent calls: book_appointment(date: "2024-01-23", time: "14:00", email: "sarah@email.com") +Response: {confirmed: true, event_id: "evt_123"} + +Agent: "Perfect! You're all set for Tuesday, January 23rd at 2 PM. + I've sent a calendar invite to your email." +``` + +--- + +## Post-Call: Automatic Logging + +After every call, Post-Call APIs sync data back to your systems. + +### What Gets Logged + +| Data | Destination | Purpose | +|------|-------------|---------| +| Call summary | CRM (Task/Note) | Record of conversation | +| Duration | CRM | Tracking engagement | +| Outcome | CRM | Pipeline/status updates | +| Next steps | CRM | Follow-up triggers | + +### Customizing Post-Call Data + +Control what's sent to your CRM: + +```json +{ + "contact_phone": "{{caller_phone}}", + "contact_name": "{{customer_name}}", + "call_duration": {{call_duration}}, + "call_summary": "{{call_summary}}", + "outcome": "{{disposition}}", + "next_action": "{{follow_up_action}}", + "booked_appointment": "{{appointment_date}}" +} +``` + +--- + +## Best Practices + + + + Not every caller will be in your CRM. Plan for it: + + ``` + If customer data is not found ({{customer_name}} is empty): + - Greet generically: "Hi there! Thanks for calling." + - Ask for their name early + - Offer to create a contact record + - Continue with full service + ``` + + + + Always confirm before taking actions: + + **Booking:** + "Just to confirm, I'll book you for Tuesday at 2 PM. Is that correct?" + + **Creating tickets:** + "I'll create a support ticket for this issue. Should I send updates to sarah@email.com?" + + + + Integrations can fail. Have fallbacks: + + ``` + If calendar check fails: + - Apologize: "I'm having trouble checking our calendar right now" + - Offer alternative: "Can I have someone call you back to schedule?" + - Or: "You can also book online at ourwebsite.com/schedule" + ``` + + + + Structure integration instructions clearly: + + ``` + === CUSTOMER CONTEXT === + [CRM data here] + + === AVAILABLE TOOLS === + [Tool descriptions here] + + === CONVERSATION GUIDELINES === + [How to use context and tools] + ``` + + + +--- + +## Next Steps + + + + More control with node-based flows + + + Configure custom API calls + + diff --git a/fern/products/atoms/pages/platform/features/knowledge-base.mdx b/fern/products/atoms/pages/platform/features/knowledge-base.mdx new file mode 100644 index 0000000..c58c840 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/knowledge-base.mdx @@ -0,0 +1,102 @@ +--- +title: "Knowledge Base" +sidebarTitle: "Knowledge Base" +description: "Give your agents access to information they can reference during conversations." +--- + +A Knowledge Base is a repository of documents and information that your agent can search during conversations. Instead of stuffing everything into a prompt, you upload content and let the agent retrieve what's relevant when needed. + +**Location:** Left Sidebar → Knowledge Base + +--- + +## Your Knowledge Bases + +The Knowledge Base page shows all your KBs on the left. Click any KB to see its documents on the right. + + + Knowledge Base page + + +Each document shows its status — **completed** means it's ready for your agent to use. + +--- + +## Creating a Knowledge Base + +Click the **+** button next to "Knowledge Base" to create a new one. + + + Create Knowledge Base modal + + +| Field | Description | +|-------|-------------| +| **Name** | Descriptive name (max 40 characters) | +| **Description** | Optional notes about this KB (max 150 characters) | + +Click **Create Knowledge Base** to finish. + +--- + +## Adding Documents + +Once your KB is created, click **+ Add Document** to add content. + + + Add Document dropdown + + +| Option | What it does | +|--------|--------------| +| **Upload File** | Upload PDFs and documents directly | +| **Extract from URL** | Pull content from a website by analyzing its sitemap | +| **Add Text** | Coming Soon — manually add text | + +After uploading, documents are processed and indexed. Wait for **completed** status before expecting the agent to find the content. + +--- + +## Connecting to Your Agent + +Once you have a Knowledge Base with content: + +1. Open your agent in the editor +2. Go to **Configuration Panel** (right sidebar) +3. Toggle **Knowledge Base** on +4. Select your KB from the dropdown + +The agent can now search this KB during conversations. + +--- + +## Tips + + + Outdated information leads to wrong answers. Review and update regularly — especially policies, pricing, and procedures. + + + + Use clear headings and concise paragraphs. Q&A format works well because it matches how callers actually ask questions. + + + + Focused, relevant content retrieves better than massive document dumps. Don't upload everything — upload what matters. + + + + Ask your agent questions that should use the new content. Verify it finds and uses the right information. + + +--- + +## Related + + + + Dynamic values in conversations + + + Connect to external services + + diff --git a/fern/products/atoms/pages/platform/features/knowledge-base/best-practices.mdx b/fern/products/atoms/pages/platform/features/knowledge-base/best-practices.mdx new file mode 100644 index 0000000..822bc82 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/knowledge-base/best-practices.mdx @@ -0,0 +1,174 @@ +--- +title: "Knowledge Base Best Practices" +sidebarTitle: "Best Practices" +description: "Tips for creating effective Knowledge Bases." +--- + +A well-structured Knowledge Base improves agent accuracy. A poorly organized one can make things worse. Follow these best practices to get the most from your KB. + +--- + +## Content Structure + +### Use Clear Headings + +Break content into sections with descriptive headings. The agent can better find relevant sections. + +| Bad | Good | +|-----|------| +| Long paragraphs with no structure | ### Return Policy
### Shipping Times
### Refunds | + +### Write in Q&A Format + +Question-answer format matches how callers actually ask: + +``` +Q: How long do I have to return an item? +A: You have 30 days from purchase to return any item in original packaging. + +Q: Can I return opened items? +A: Opened items can be returned if defective. For non-defective items, + a 15% restocking fee applies. +``` + +### Be Concise + +Long, rambling text reduces retrieval accuracy. Be direct: + +| Bad | Good | +|-----|------| +| "In many cases, customers often wonder about our various return policies which have been developed over many years..." | "Return window: 30 days from purchase. Items must be in original packaging." | + +--- + +## Content Quality + +### Keep Information Current + +| Action | Frequency | +|--------|-----------| +| Review for accuracy | Monthly | +| Update changed policies | Immediately | +| Remove obsolete content | As needed | + +Outdated information leads to wrong answers and frustrated callers. + +### Prioritize Accuracy + +Everything in your KB should be 100% correct. Agents will cite it confidently. + +### Include Edge Cases + +Don't just cover the happy path: + +``` +Q: What if I lost my receipt? +A: We can look up your purchase using your credit card or loyalty account. + If neither is available, we offer store credit at current price. +``` + +--- + +## Organization + +### Use Logical Categories + +Group related information: + +``` +Product Information/ +├── Product X/ +│ ├── Features +│ ├── Specifications +│ └── Troubleshooting +├── Product Y/ +│ ├── Features +│ └── Specifications +└── Product Comparison +``` + +### Avoid Duplication + +Same information in multiple places causes confusion: +- Which version is current? +- Which will the agent find? + +One source of truth is better. + +### Name Files Descriptively + +| Bad | Good | +|-----|------| +| doc1.pdf | Product_X_User_Guide_2024.pdf | +| info.txt | Shipping_Rates_and_Zones.txt | + +--- + +## Retrieval Optimization + +### Use Keywords Callers Use + +Write content using the same words callers would say: + +| Technical | Caller Language | +|-----------|-----------------| +| "authentication failure" | "can't log in" | +| "remittance processing" | "payment" | +| "merchandise return" | "give back item" | + +Include both when possible. + +### Test Retrieval + +After uploading: + +1. Ask questions as callers would +2. Check if the right content is retrieved +3. Adjust content wording if not + +### Don't Over-Stuff + +Too much content can hurt: +- Retrieval becomes less precise +- Irrelevant content may surface +- Agent responses become confused + +Focus on information callers actually need. + +--- + +## Maintenance + +### Regular Audits + +Schedule periodic reviews: +- Is content still accurate? +- Are there gaps? +- Is anything unused? + +### Version Control + +When updating important documents: +- Note what changed +- Consider dating versions +- Archive old versions if needed + +### Monitor Conversations + +Review conversation logs to identify: +- Questions KB couldn't answer +- Wrong information surfaced +- Content gaps to fill + +--- + +## What's Next + + + + Learn about event notifications + + + Dynamic values in prompts + + diff --git a/fern/products/atoms/pages/platform/features/knowledge-base/creating.mdx b/fern/products/atoms/pages/platform/features/knowledge-base/creating.mdx new file mode 100644 index 0000000..6b0cddf --- /dev/null +++ b/fern/products/atoms/pages/platform/features/knowledge-base/creating.mdx @@ -0,0 +1,94 @@ +--- +title: "Creating a Knowledge Base" +sidebarTitle: "Creating a Knowledge Base" +description: "Set up a new Knowledge Base for your agents." +--- + +Creating a Knowledge Base is straightforward. This page walks you through the process. + +--- + +## Location + +**Left Sidebar → Build → Knowledge Base** + +--- + +## Steps to Create + +→ **NEEDS PLATFORM INFO:** Detailed KB creation interface and steps + +### Step 1: Navigate to Knowledge Base + +In the left sidebar, under "Build," click **Knowledge Base**. + +### Step 2: Click Create + +Click the **"Create Knowledge Base"** button. + +### Step 3: Configure Settings + +| Field | Description | +|-------|-------------| +| **Name** | Descriptive name (e.g., "Product FAQ", "Support Docs") | +| **Type** | Local (single agent) or Global (shared) | +| **Description** | Optional notes about this KB | + +### Step 4: Create + +Click **Create** to finish setup. + +--- + +## Naming Best Practices + +Choose clear, descriptive names: + +| Good Names | Why | +|------------|-----| +| "Product Support FAQ" | Clear purpose | +| "Shipping Policies v2" | Includes version | +| "TechCorp Returns" | Company + topic | + +| Avoid | Why | +|-------|-----| +| "KB1" | Not descriptive | +| "Stuff" | Too vague | +| "New knowledge base" | Default-sounding | + +--- + +## Local vs Global + +Choose during creation: + +| Type | When to Use | +|------|-------------| +| **Local** | Information specific to one agent | +| **Global** | Information multiple agents should share | + +→ See: [Local vs Global](/atoms/atoms-platform/features/knowledge-base) + +--- + +## After Creation + +Your Knowledge Base is created but empty. Next steps: + +1. **Upload content** — Add documents and information +2. **Wait for processing** — System indexes content +3. **Attach to agent** — Connect in agent's Configuration Panel +4. **Test** — Verify retrieval works + +--- + +## What's Next + + + + Add documents to your KB + + + Understand KB scoping + + diff --git a/fern/products/atoms/pages/platform/features/knowledge-base/local-vs-global.mdx b/fern/products/atoms/pages/platform/features/knowledge-base/local-vs-global.mdx new file mode 100644 index 0000000..37a1399 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/knowledge-base/local-vs-global.mdx @@ -0,0 +1,121 @@ +--- +title: "Local vs Global Knowledge Base" +sidebarTitle: "Local vs Global" +description: "Understand when to use Local versus Global Knowledge Bases." +--- + +Knowledge Bases can be scoped to a single agent (Local) or shared across multiple agents (Global). Choosing the right scope helps you organize information effectively. + +--- + +## Comparison + +| Aspect | Local KB | Global KB | +|--------|----------|-----------| +| **Scope** | One agent only | Multiple agents | +| **Visibility** | Attached agent | All agents you assign | +| **Use case** | Agent-specific info | Company-wide knowledge | +| **Management** | Per-agent | Centralized | + +--- + +## Local Knowledge Base + +A Local KB is attached to and used by only one agent. + +### When to Use + +- Information specific to one agent's purpose +- Content that would confuse other agents +- Specialized product or service details +- Agent-specific policies + +### Example + +**Product Support Agent** has a Local KB with: +- Technical specifications for Product X +- Troubleshooting guides for Product X +- Product X-specific FAQs + +Other agents don't need (and shouldn't have) this specialized information. + +--- + +## Global Knowledge Base + +A Global KB is shared across multiple agents. + +### When to Use + +- Company-wide policies +- General FAQs that apply everywhere +- Brand guidelines +- Universal processes + +### Example + +**Shared Company KB** contains: +- Return policy +- Shipping information +- Company hours and locations +- General FAQ + +Multiple agents (Sales, Support, Billing) all reference this shared knowledge. + +--- + +## Best Practices + +### Use Global for Common Knowledge + +If multiple agents need the same information, make it Global: +- One source of truth +- Update once, applies everywhere +- Consistent answers across agents + +### Use Local for Specialized Knowledge + +If only one agent needs specific information, keep it Local: +- Cleaner retrieval (less noise) +- No confusion with irrelevant content +- Agent-specific accuracy + +### Combine Both + +Many setups use both: + +``` +Agent: Product X Support +├── Global KB: Company Policies +└── Local KB: Product X Documentation + +Agent: Product Y Support +├── Global KB: Company Policies (same) +└── Local KB: Product Y Documentation +``` + +Both agents share policies but have their own product docs. + +--- + +## Managing Multiple KBs + +As you add agents and knowledge bases: + +1. **Audit regularly** — Is content still in the right place? +2. **Avoid duplication** — Same content shouldn't be in multiple KBs +3. **Name clearly** — "Global - Company Policies" vs "Local - Product X" +4. **Document your structure** — Know which KB serves which purpose + +--- + +## What's Next + + + + Optimize your Knowledge Base + + + Back to overview + + diff --git a/fern/products/atoms/pages/dev/build/campaigns/troubleshooting.mdx b/fern/products/atoms/pages/platform/features/knowledge-base/troubleshooting.mdx similarity index 56% rename from fern/products/atoms/pages/dev/build/campaigns/troubleshooting.mdx rename to fern/products/atoms/pages/platform/features/knowledge-base/troubleshooting.mdx index e8accd7..4e02f46 100644 --- a/fern/products/atoms/pages/dev/build/campaigns/troubleshooting.mdx +++ b/fern/products/atoms/pages/platform/features/knowledge-base/troubleshooting.mdx @@ -1,6 +1,7 @@ --- title: "Troubleshooting" -description: "Campaign troubleshooting" --- # Troubleshooting + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/features/knowledge-base/uploading.mdx b/fern/products/atoms/pages/platform/features/knowledge-base/uploading.mdx new file mode 100644 index 0000000..8a88206 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/knowledge-base/uploading.mdx @@ -0,0 +1,146 @@ +--- +title: "Uploading Content" +sidebarTitle: "Uploading Content" +description: "Add documents, text, and URLs to your Knowledge Base." +--- + +After creating a Knowledge Base, you need to add content. This page covers how to upload different content types. + +--- + +## Supported Content Types + +→ **NEEDS PLATFORM INFO:** Exact supported formats and size limits + +| Type | Formats | Best For | +|------|---------|----------| +| **Documents** | PDF, DOCX, TXT | Existing documentation | +| **Text** | Direct input | Quick additions, FAQ entries | +| **URLs** | Web pages | Live web content | + +--- + +## Uploading Documents + +### Step 1: Open Your KB + +Navigate to your Knowledge Base from the Build menu. + +### Step 2: Click Upload + +Click the **"Upload"** or **"Add Content"** button. + +### Step 3: Select Files + +Choose files from your computer. + +### Step 4: Wait for Processing + +Files are: +1. Uploaded to Atoms +2. Parsed (text extracted) +3. Indexed for search +4. Ready for use + +Processing time depends on file size and complexity. + +--- + +## Adding Text Directly + +For quick additions without creating a file: + +1. Click **"Add Text"** or similar +2. Paste or type your content +3. Save + +Good for: +- Individual FAQ entries +- Quick policy updates +- Short information snippets + +--- + +## Adding URLs + +To include web content: + +1. Click **"Add URL"** +2. Enter the web address +3. Atoms crawls the page +4. Content is extracted and indexed + +**Notes:** +- Some pages may not crawl due to access restrictions +- Dynamic content may not be captured +- Re-crawl periodically for fresh content + +--- + +## Content Best Practices + +### Structure for Searchability + +| Good | Bad | +|------|-----| +| Clear headings | Wall of text | +| Question-answer format | Rambling prose | +| Concise paragraphs | Endless sentences | +| Specific topics | Everything mixed together | + +### Keep Content Current + +Outdated content leads to wrong answers. Review and update regularly. + +### Don't Over-Upload + +Quality over quantity. Focused, relevant content retrieves better than massive dumps. + +### Test After Uploading + +Ask questions that should use new content. Verify the agent finds and uses it correctly. + +--- + +## Processing Status + +After uploading, content goes through processing: + +| Status | Meaning | +|--------|---------| +| **Processing** | Being indexed | +| **Ready** | Available for retrieval | +| **Error** | Processing failed | + +Wait for "Ready" status before expecting content to be found. + +--- + +## Managing Content + +### Viewing Uploaded Content + +See all items in your KB with their status. + +### Updating Content + +- Delete old version +- Upload new version +- Wait for processing + +### Deleting Content + +Remove items that are outdated or no longer needed. + +--- + +## What's Next + + + + Understand KB scoping + + + Optimize your Knowledge Base + + diff --git a/fern/products/atoms/pages/platform/features/knowledge-base/using-with-agents.mdx b/fern/products/atoms/pages/platform/features/knowledge-base/using-with-agents.mdx new file mode 100644 index 0000000..ab9e8b3 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/knowledge-base/using-with-agents.mdx @@ -0,0 +1,170 @@ +--- +title: "Using Knowledge Base with Agents" +sidebarTitle: "Using with Agents" +description: "Connect your knowledge base to agents so they can access your documents during calls." +--- + +After uploading documents, you need to connect the knowledge base to your agent. Here's how to set it up and get the most out of it. + +--- + +## Connecting a Knowledge Base + + + + When creating a new agent: + + 1. Scroll to the **Knowledge Base** section + 2. Click **Select Knowledge Base** + 3. Choose from your existing knowledge bases + 4. Continue with agent creation + + + For agents you've already created: + + 1. Open your agent in the editor + 2. Look for **Knowledge Base** in the right panel + 3. Click **Select** or **Change** + 4. Choose the knowledge base you want + 5. Save your changes + + + +--- + +## How Retrieval Works + +When a caller asks a question: + +``` +Caller: "What's your return policy?" + │ + ▼ +┌─────────────────────────────────┐ +│ 1. Question → Search Query │ +│ 2. Search Knowledge Base │ +│ 3. Retrieve Top 3-5 Chunks │ +│ 4. Inject into Agent Context │ +│ 5. Generate Response │ +└─────────────────────────────────┘ + │ + ▼ +Agent: "You can return any item within 30 days + for a full refund. Would you like me to + start a return for you?" +``` + + +The agent doesn't read your entire knowledge base for every question — it retrieves only the most relevant sections, making responses fast and focused. + + +--- + +## Best Practices + +### Write Prompts That Use the Knowledge Base + +Your prompt should tell the agent *when* and *how* to use the knowledge base: + + + + ``` + You are a customer support agent. Help customers + with their questions. + ``` + + Agent might not prioritize KB content. + + + ``` + You are a customer support agent. When customers ask + about products, policies, or procedures, refer to the + knowledge base for accurate information. + + If you can't find the answer in the knowledge base, + say "I don't have that information, but I can connect + you with someone who does." + ``` + + Agent knows to check KB and how to handle missing info. + + + +### Match KB Content to Use Case + +| Agent Type | What to Include in KB | +|------------|----------------------| +| **Customer Support** | FAQs, troubleshooting guides, policies | +| **Sales** | Product specs, pricing, case studies | +| **Booking** | Service descriptions, provider info | +| **Collections** | Account lookup procedures, payment options | + +### Keep Content Current + +- **Review monthly:** Remove outdated information +- **Update after changes:** New pricing? New policies? Update the KB +- **Test regularly:** Call your agent and ask about recently updated content + +--- + +## One Agent, Multiple Knowledge Bases? + +Currently, each agent connects to **one knowledge base** at a time. + + + If you need information from multiple sources: + + 1. Create a single knowledge base for that agent + 2. Upload all relevant documents to it + 3. Use clear naming to organize (e.g., `support-faq.pdf`, `product-specs.pdf`) + + +--- + +## Testing KB Retrieval + +After connecting, test that your agent actually uses the knowledge base: + + + ### Ask a KB-Specific Question + + Ask something that's *only* in your knowledge base, not general knowledge: + - "What's your return policy?" (specific to your company) + - "How much does [specific product] cost?" + - "What are your business hours?" + + ### Check the Response + + The answer should match your uploaded documents. If it doesn't: + - Verify the document uploaded successfully + - Check that the KB is connected to this agent + - Look at the document content — is the answer actually there? + + ### Review Call Logs + + In **Conversation Logs**, you can see if the agent retrieved KB content and what it found. + + +--- + +## Common Issues + +| Problem | Likely Cause | Fix | +|---------|--------------|-----| +| Agent doesn't use KB | KB not connected | Check agent settings | +| Wrong information | Outdated document | Update or replace the file | +| "I don't know" for KB content | Content not found | Check document formatting, try rephrasing | +| Generic answers | Prompt doesn't reference KB | Add KB instructions to prompt | + +--- + +## Next Steps + + + + Fix common knowledge base issues + + + Verify KB retrieval is working + + diff --git a/fern/products/atoms/pages/platform/features/knowledge-base/what-is-kb.mdx b/fern/products/atoms/pages/platform/features/knowledge-base/what-is-kb.mdx new file mode 100644 index 0000000..ad48970 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/knowledge-base/what-is-kb.mdx @@ -0,0 +1,119 @@ +--- +title: "What is Knowledge Base" +sidebarTitle: "What is Knowledge Base" +description: "Give your agents access to information they can reference during conversations." +--- + +A Knowledge Base is a repository of documents, FAQs, and information that your agent can search and reference during conversations. Instead of trying to stuff everything into a prompt, you upload content and let the agent retrieve relevant information when needed. + +--- + +## Why Use Knowledge Base + +### More Accurate Answers + +Agents can cite actual documents rather than guessing or hallucinating. + +### Easy Updates + +Change a policy? Update the document. No prompt editing needed. + +### Scalable Knowledge + +Add hundreds of pages of content that would never fit in a prompt. + +### Reduced Prompt Complexity + +Keep your prompt focused on behavior. Let the KB handle information. + +--- + +## How It Works + +1. **You upload content** — Documents, text, URLs +2. **Atoms processes it** — Content is indexed for search +3. **Caller asks a question** — Agent receives the query +4. **Agent searches KB** — Finds relevant sections +5. **Agent responds** — Uses KB content in the answer + +--- + +## What You Can Upload + +| Content Type | Examples | +|--------------|----------| +| **Documents** | PDFs, Word docs, text files | +| **Text** | Directly pasted content | +| **URLs** | Web pages to crawl | +| **FAQ lists** | Question-answer pairs | + +→ See: [Uploading Content](/atoms/atoms-platform/features/knowledge-base) for details + +--- + +## Local vs Global + +| Type | Scope | Use Case | +|------|-------|----------| +| **Local KB** | Single agent | Agent-specific information | +| **Global KB** | Multiple agents | Shared company knowledge | + +**Example:** +- Global KB: Company policies, general FAQ +- Local KB: Product-specific details for a product support agent + +→ See: [Local vs Global](/atoms/atoms-platform/features/knowledge-base) + +--- + +## Use Cases + +### Customer Support + +- Product documentation +- Troubleshooting guides +- Return/refund policies +- Shipping information + +### Sales + +- Product specifications +- Pricing sheets +- Competitive comparisons +- Case studies + +### Healthcare + +- Appointment policies +- Insurance information +- Procedure descriptions +- Location details + +### Any Industry + +- FAQs +- Policies +- Product information +- How-to guides + +--- + +## Getting Started + +1. **Create a Knowledge Base** — Give it a name and type +2. **Upload content** — Add your documents and information +3. **Attach to agent** — Connect KB in Configuration Panel +4. **Test** — Verify agent finds and uses the right information + +--- + +## What's Next + + + + Set up your first KB + + + Add documents and information + + diff --git a/fern/products/atoms/pages/platform/features/metrics/configuring.mdx b/fern/products/atoms/pages/platform/features/metrics/configuring.mdx new file mode 100644 index 0000000..303ed0f --- /dev/null +++ b/fern/products/atoms/pages/platform/features/metrics/configuring.mdx @@ -0,0 +1,295 @@ +--- +title: "Configuring Metrics" +sidebarTitle: "Configuring" +description: "Set up custom post-call metrics to track what matters for your business." +--- + +This guide walks you through creating custom metrics that are automatically extracted from every call. + +--- + +## Accessing Metric Configuration + + + ### Open Your Agent + + Navigate to the agent you want to configure metrics for. + + ### Find Post Call Metrics + + In the right panel (or settings), locate **Post Call Metrics**. + + ### Add or Edit Metrics + + Click **+ Add Metric** or edit existing ones. + + +--- + +## Creating a Metric + + + ### Choose Metric Type + + | Type | Use When | + |------|----------| + | **Categorical** | Fixed set of possible values | + | **Numeric** | Number or score | + | **Boolean** | Yes/No question | + | **Text** | Free-form extraction | + + ### Name Your Metric + + Give it a clear, descriptive name: + - ✅ "Lead Status" + - ✅ "Issue Resolved" + - ❌ "Metric1" + - ❌ "Status" + + ### Define the Extraction Prompt + + Tell the AI what to look for. Be specific: + + ``` + Based on the conversation, what is the caller's lead status? + + Consider: + - Did they express interest in purchasing? + - Do they have budget and timeline? + - Are they just researching? + + Respond with one of: Hot, Warm, Cold, Not Qualified + ``` + + ### Set Options (for Categorical) + + Define the possible values: + - Hot + - Warm + - Cold + - Not Qualified + + ### Save + + Click **Save** to enable the metric. + + +--- + +## Metric Configuration Examples + +### Lead Status (Categorical) + +``` +Name: Lead Status +Type: Categorical +Options: Hot, Warm, Cold, Not Qualified + +Extraction Prompt: +Based on the conversation, classify this lead: + +HOT - Expressed strong interest, has budget, ready to buy soon +WARM - Interested but needs more info or time +COLD - Minimal interest, just gathering information +NOT QUALIFIED - Wrong fit, no budget, or not decision maker + +Only use one of these exact values. +``` + +### Issue Resolved (Boolean) + +``` +Name: Issue Resolved +Type: Boolean + +Extraction Prompt: +Was the caller's issue fully resolved during this call? + +Consider: +- Did the agent provide a solution? +- Did the caller confirm the issue was fixed? +- Did the call end without resolution? + +Respond with: true or false +``` + +### Satisfaction Score (Numeric) + +``` +Name: Implied Satisfaction +Type: Numeric (1-5) + +Extraction Prompt: +Based on the caller's tone and statements, estimate their +satisfaction level from 1 to 5: + +1 - Very Dissatisfied (angry, complained, threatened to leave) +2 - Dissatisfied (frustrated, unhappy with resolution) +3 - Neutral (matter-of-fact, no strong feelings expressed) +4 - Satisfied (thanked agent, issue resolved) +5 - Very Satisfied (expressed gratitude, praised service) + +Respond with just the number. +``` + +### Issue Summary (Text) + +``` +Name: Issue Summary +Type: Text + +Extraction Prompt: +Summarize the main issue or reason for this call in 1-2 sentences. +Focus on what the caller needed help with. +Be concise and factual. +``` + +### Pain Points (Text) + +``` +Name: Pain Points +Type: Text + +Extraction Prompt: +What problems, frustrations, or challenges did the caller mention? +List them as bullet points. If none mentioned, respond with "None identified." +``` + +--- + +## Best Practices + + + + Vague prompts → inconsistent results + + **Bad:** + ``` + What's the lead status? + ``` + + **Good:** + ``` + Based on the conversation, classify the lead status: + - HOT: Strong buying signals, has budget, immediate need + - WARM: Interested but not urgent, may need nurturing + - COLD: Low interest, just researching + - NOT QUALIFIED: Wrong fit or no decision authority + ``` + + + + For categorical metrics: + - Use distinct, non-overlapping options + - Define what each option means + - Limit to 5-7 options max + + **Too many options:** + ``` + Very Hot, Hot, Somewhat Hot, Warm, Lukewarm, + Cool, Cold, Very Cold, Frozen, Not Qualified + ``` + + **Just right:** + ``` + Hot, Warm, Cold, Not Qualified + ``` + + + + Tell the AI what to do when things are unclear: + + ``` + If the sentiment cannot be determined (call too short, + no clear indicators), respond with "Neutral" + ``` + + + + After creating a metric: + 1. Process a few test calls + 2. Review the extracted values + 3. Adjust the prompt if needed + 4. Repeat until accurate + + + +--- + +## Metric Prompt Templates + +### Intent Classification + +``` +What was the primary reason for this call? + +BILLING - Questions about invoices, charges, payments +SUPPORT - Technical issues, troubleshooting, how-to +SALES - Product inquiries, pricing, purchasing +ACCOUNT - Password, access, account changes +GENERAL - Other inquiries + +Respond with one category. +``` + +### Sentiment Analysis + +``` +What was the caller's overall sentiment? + +POSITIVE - Happy, grateful, satisfied +NEUTRAL - Matter-of-fact, no strong emotion +NEGATIVE - Frustrated, angry, disappointed + +Consider their tone throughout the call, especially +at the end. Respond with one sentiment. +``` + +### Competitor Mentions + +``` +Did the caller mention any competitor products or companies? + +If yes, list the competitor names mentioned. +If no competitors were mentioned, respond with "None" +``` + +### Next Steps + +``` +What follow-up action, if any, was agreed upon? + +DEMO_SCHEDULED - Demo meeting was booked +SEND_INFO - Agent will send information +CALLBACK - Agent or team will call back +ESCALATED - Transferred to specialist/manager +RESOLVED - No follow-up needed +NONE - Call ended without clear next step + +Respond with one value. +``` + +--- + +## Bulk Configuration + +If you have many metrics to set up: + +1. Configure on one agent +2. Export the configuration +3. Import to other agents + +Or use the API to programmatically create metrics across agents. + +--- + +## Next Steps + + + + See your metrics in the dashboard + + + Interpret your metrics effectively + + diff --git a/fern/products/atoms/pages/platform/features/metrics/understanding-data.mdx b/fern/products/atoms/pages/platform/features/metrics/understanding-data.mdx new file mode 100644 index 0000000..54a666b --- /dev/null +++ b/fern/products/atoms/pages/platform/features/metrics/understanding-data.mdx @@ -0,0 +1,262 @@ +--- +title: "Understanding Your Data" +sidebarTitle: "Understanding Data" +description: "How to interpret your post-call metrics and turn insights into action." +--- + +Collecting metrics is just the start. Here's how to interpret your data and use it to improve your agents and business outcomes. + +--- + +## Key Metrics to Track + +### Call Efficiency + +| Metric | What It Tells You | Good Benchmark | +|--------|-------------------|----------------| +| **Average Duration** | How long calls take | Depends on use case | +| **First Call Resolution** | % resolved without follow-up | > 70% | +| **Transfer Rate** | % escalated to humans | < 20% | +| **Containment Rate** | % handled entirely by AI | > 80% | + +### Customer Experience + +| Metric | What It Tells You | Good Benchmark | +|--------|-------------------|----------------| +| **Sentiment** | How callers feel | > 60% positive | +| **Satisfaction** | Implied CSAT | > 4.0 / 5 | +| **Resolution Rate** | Issues actually solved | > 80% | + +### Business Outcomes + +| Metric | What It Tells You | Good Benchmark | +|--------|-------------------|----------------| +| **Conversion Rate** | Leads → customers | Varies by industry | +| **Booking Rate** | Calls → appointments | > 60% for booking agents | +| **Lead Quality** | % qualified leads | > 40% | + +--- + +## Interpreting Common Patterns + +### Pattern: High Transfer Rate + +``` +Transfer Rate: 35% (above 20% target) +``` + +**What it might mean:** +- Agent can't handle certain issue types +- Knowledge base missing information +- Callers requesting human explicitly +- Complex issues beyond AI capability + +**Actions:** +1. Review transferred call transcripts +2. Identify common transfer reasons +3. Update KB or prompts to address gaps +4. Consider if some transfers are appropriate + +--- + +### Pattern: Declining Sentiment + +``` +Week 1: 72% Positive → Week 4: 54% Positive +``` + +**What it might mean:** +- Product/service issues affecting customers +- Agent responses frustrating callers +- Wait times or call quality issues +- External factors (outages, recalls) + +**Actions:** +1. Review recent negative calls +2. Look for common complaints +3. Check for prompt/KB changes +4. Coordinate with product/service teams + +--- + +### Pattern: Long Average Duration + +``` +Average Duration: 8m 32s (vs. 4m target) +``` + +**What it might mean:** +- Complex issues requiring more discussion +- Agent going off-topic +- Callers asking many follow-up questions +- Slow API responses causing delays + +**Actions:** +1. Review longest calls +2. Check for unnecessary conversation loops +3. Ensure agent is efficient +4. Verify API response times + +--- + +### Pattern: Low Resolution Rate + +``` +Resolution Rate: 45% (target: 70%) +``` + +**What it might mean:** +- Knowledge base lacking information +- Issues requiring human intervention +- Agent not understanding requests +- Wrong disposition classification + +**Actions:** +1. Review "unresolved" calls +2. Categorize by issue type +3. Add missing KB content +4. Improve agent prompts + +--- + +## Segment Analysis + +Don't just look at overall numbers — segment your data. + +### By Issue Type + +| Issue Type | Volume | Resolution | Sentiment | +|------------|--------|------------|-----------| +| Billing | 234 | 82% | 71% pos | +| Technical | 189 | 45% | 52% pos | +| Account | 156 | 91% | 78% pos | +| Product | 98 | 73% | 65% pos | + +**Insight:** Technical issues have low resolution and sentiment → Focus improvement there. + +### By Time of Day + +| Time | Volume | Avg Duration | Transfer Rate | +|------|--------|--------------|---------------| +| 9am-12pm | 342 | 3m 45s | 12% | +| 12pm-5pm | 456 | 4m 12s | 18% | +| 5pm-9pm | 234 | 5m 30s | 28% | +| After hours | 89 | 2m 15s | 8% | + +**Insight:** Evening calls have higher transfer rates → Perhaps more complex issues or staffing needed. + +### By Customer Segment + +| Segment | Volume | Satisfaction | Resolution | +|---------|--------|--------------|------------| +| New | 287 | 3.8 | 72% | +| Regular | 456 | 4.2 | 85% | +| VIP | 78 | 4.5 | 92% | + +**Insight:** New customers need more support → Enhance onboarding-related responses. + +--- + +## Setting Benchmarks + +### Start with Baselines + +Before optimizing, establish where you are: + +1. Run agents for 2-4 weeks +2. Collect baseline metrics +3. Set realistic targets based on baselines +4. Improve incrementally + +### Industry Benchmarks + +| Metric | Contact Centers | Voice AI (Realistic) | +|--------|-----------------|---------------------| +| Average Handle Time | 6 min | 4-5 min | +| First Call Resolution | 70-75% | 65-80% | +| Transfer Rate | 15-20% | 15-25% | +| CSAT | 4.0/5 | 3.8-4.2/5 | + +### Progressive Goals + +Set targets that improve over time: + +| Metric | Baseline | 30 Days | 90 Days | +|--------|----------|---------|---------| +| Resolution | 65% | 70% | 80% | +| Sentiment | 58% pos | 62% pos | 70% pos | +| Transfer | 28% | 24% | 18% | + +--- + +## Taking Action + +### Metric → Insight → Action Framework + +``` +Metric: Resolution rate dropped 10% + ↓ +Insight: New product launch causing unknown questions + ↓ +Action: Upload new product documentation to KB + ↓ +Measure: Track resolution rate for product questions +``` + +### Prioritization Matrix + +| Impact | Effort | Priority | +|--------|--------|----------| +| High resolution impact | Low effort | **Do First** | +| High impact | High effort | Plan carefully | +| Low impact | Low effort | Quick wins | +| Low impact | High effort | Skip or defer | + +--- + +## Reporting Best Practices + + + + What to review weekly: + - Call volume trends + - Resolution rate changes + - Sentiment shifts + - Top unresolved issues + + **Time:** 15-30 minutes + + + + What to analyze monthly: + - Segment performance + - Trend analysis + - Benchmark comparisons + - Strategic improvements + + **Time:** 1-2 hours + + + + What executives want to see: + - Call volume and cost savings + - Resolution rate and customer impact + - Business outcomes (conversions, bookings) + - Trend direction (improving/declining) + + **Keep it:** High-level with key takeaways + + + +--- + +## Next Steps + + + + Set up custom metrics + + + Drill into specific calls + + diff --git a/fern/products/atoms/pages/platform/features/metrics/viewing-analytics.mdx b/fern/products/atoms/pages/platform/features/metrics/viewing-analytics.mdx new file mode 100644 index 0000000..014e0c4 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/metrics/viewing-analytics.mdx @@ -0,0 +1,260 @@ +--- +title: "Viewing Analytics" +sidebarTitle: "Viewing Analytics" +description: "Navigate the analytics dashboard to understand your call metrics." +--- + +The analytics dashboard gives you a real-time view of your call performance and custom metrics. + +--- + +## Accessing Analytics + + + ### Navigate to Analytics + + Click **Analytics** in the main navigation. + + ### Select Time Range + + Use the date picker to choose your analysis period: + - Today + - Last 7 days + - Last 30 days + - Custom range + + ### Filter by Agent + + View all agents or select specific ones to analyze. + + +--- + +## Dashboard Overview + +The analytics dashboard shows several sections: + +### Call Volume + +``` +┌─────────────────────────────────────────┐ +│ Total Calls │ Inbound │ Outbound │ +│ 1,234 │ 892 │ 342 │ +├─────────────────────────────────────────┤ +│ [Call Volume Over Time] │ +│ 📈 Daily/weekly trend chart │ +└─────────────────────────────────────────┘ +``` + +Key metrics: +- Total calls +- Inbound vs Outbound split +- Daily/weekly trends +- Peak call times + +### Duration Analytics + +``` +┌─────────────────────────────────────────┐ +│ Average Duration │ Total Minutes │ │ +│ 3m 42s │ 4,567 │ │ +├─────────────────────────────────────────┤ +│ [Duration Distribution] │ +│ Shows how calls distribute by length │ +└─────────────────────────────────────────┘ +``` + +Key metrics: +- Average call duration +- Total talk time +- Duration distribution +- Longest/shortest calls + +### Disposition Breakdown + +``` +┌─────────────────────────────────────────┐ +│ [Disposition Pie Chart] │ +│ │ +│ ● Resolved (68%) │ +│ ● Transferred (15%) │ +│ ● Follow-up Needed (12%) │ +│ ● Unresolved (5%) │ +└─────────────────────────────────────────┘ +``` + +See how calls ended: +- Success rate +- Transfer rate +- Resolution distribution + +--- + +## Custom Metrics Views + +### Categorical Metrics + +For metrics like "Lead Status" or "Issue Type": + +``` +┌─────────────────────────────────────────┐ +│ Lead Status Distribution │ +├──────────┬──────────┬──────────────────┤ +│ Hot │ ████████ │ 23% │ +│ Warm │ ██████████████ │ 45% │ +│ Cold │ ████████ │ 25% │ +│ DQ │ ██ │ 7% │ +└──────────┴──────────┴──────────────────┘ +``` + +Shows: +- Distribution across categories +- Percentage breakdown +- Trend over time + +### Numeric Metrics + +For scores and ratings: + +``` +┌─────────────────────────────────────────┐ +│ Satisfaction Score (1-5) │ +├─────────────────────────────────────────┤ +│ Average: 4.2 │ +│ [Distribution histogram] │ +│ 5 ★ ████████████████ 42% │ +│ 4 ★ ██████████ 28% │ +│ 3 ★ ████████ 20% │ +│ 2 ★ ██ 7% │ +│ 1 ★ █ 3% │ +└─────────────────────────────────────────┘ +``` + +Shows: +- Average value +- Distribution +- Trend over time + +### Boolean Metrics + +For yes/no metrics: + +``` +┌─────────────────────────────────────────┐ +│ Issue Resolved │ +├─────────────────────────────────────────┤ +│ ✓ Yes: 847 (68%) │ +│ ✗ No: 398 (32%) │ +│ [Trend line over time] │ +└─────────────────────────────────────────┘ +``` + +Shows: +- True vs False count +- Percentage +- Trend over time + +--- + +## Filtering & Segmentation + +### By Agent + +Compare performance across agents: + +| Agent | Calls | Avg Duration | Resolution Rate | +|-------|-------|--------------|-----------------| +| Support Bot | 523 | 4m 12s | 72% | +| Sales Bot | 342 | 6m 45s | 68% | +| Booking Bot | 287 | 2m 30s | 94% | + +### By Time Period + +Compare different periods: +- This week vs last week +- This month vs last month +- Custom comparison + +### By Metric Value + +Filter to see subsets: +- Only "Hot" leads +- Only "Negative" sentiment calls +- Only calls over 5 minutes + +--- + +## Exporting Data + +### Export to CSV + +1. Set your filters +2. Click **Export** +3. Choose columns to include +4. Download CSV file + +### Scheduled Reports + +Set up automatic reports: +1. Go to **Reports** → **Scheduled** +2. Click **Create Report** +3. Select metrics and filters +4. Choose frequency (daily/weekly/monthly) +5. Add email recipients + +### API Access + +Query metrics programmatically: + +```javascript +GET /api/v1/analytics/calls +?start_date=2024-01-01 +&end_date=2024-01-31 +&agent_id=agt_123 +&metrics=disposition,sentiment,lead_status +``` + +--- + +## Reading the Data + + + + Look for patterns: + + - **Rising resolution rate** → Agents improving + - **Increasing call duration** → More complex issues or better engagement + - **More "Hot" leads** → Better marketing targeting + - **Rising negative sentiment** → Investigate product/service issues + + + + When comparing periods: + + - Account for seasonality + - Consider day-of-week effects + - Note any external factors (promotions, outages) + - Look at percentage changes, not just absolutes + + + + Investigate unusual data: + + - Very long calls → Complex issues or problems? + - Spike in transfers → Agent capability gap? + - Drop in sentiment → What changed? + + + +--- + +## Next Steps + + + + Interpret metrics effectively + + + Drill into individual calls + + diff --git a/fern/products/atoms/pages/platform/features/metrics/what-are-metrics.mdx b/fern/products/atoms/pages/platform/features/metrics/what-are-metrics.mdx new file mode 100644 index 0000000..b0c4d7b --- /dev/null +++ b/fern/products/atoms/pages/platform/features/metrics/what-are-metrics.mdx @@ -0,0 +1,202 @@ +--- +title: "Post Call Metrics" +sidebarTitle: "What Are Metrics?" +description: "Measure what matters with custom metrics extracted from every call." +--- + +**Post Call Metrics** let you automatically extract and track specific data points from every conversation. Instead of manually reviewing calls, define what you want to measure and Atoms extracts it for you. + +--- + +## Why Use Post Call Metrics? + + + + To understand your calls, you have to: + - Listen to recordings manually + - Read through transcripts + - Manually categorize outcomes + - Build reports by hand + + This doesn't scale. + + + Atoms automatically: + - Extracts key data from every call + - Categorizes outcomes consistently + - Tracks trends over time + - Provides instant analytics + + Zero manual work. + + + +--- + +## How It Works + +After every call: + +``` +[Call Ends] + ↓ +[AI Analyzes Transcript] + ↓ +[Extracts Defined Metrics] + ↓ +[Stores for Analytics] + ↓ +[Available in Dashboard & API] +``` + +The AI reviews the entire conversation and extracts the metrics you've defined — no manual tagging required. + +--- + +## What You Can Measure + +### Categorical Metrics + +Extract categories or labels: + +| Metric | Example Values | +|--------|----------------| +| **Disposition** | Resolved, Escalated, Follow-up Needed | +| **Intent** | Billing, Support, Sales, General Inquiry | +| **Sentiment** | Positive, Neutral, Negative | +| **Lead Quality** | Hot, Warm, Cold, Not Qualified | +| **Outcome** | Booked, Interested, Not Interested | + +### Numeric Metrics + +Extract numbers or scores: + +| Metric | Example Values | +|--------|----------------| +| **Satisfaction Score** | 1-5 rating | +| **Lead Score** | 0-100 | +| **Urgency Level** | 1-3 | +| **Items Discussed** | Count | + +### Boolean Metrics + +Yes/No questions: + +| Metric | True/False | +|--------|------------| +| **Issue Resolved** | Was the problem solved? | +| **Appointment Booked** | Did they schedule? | +| **Follow-up Required** | Need to call back? | +| **Complaint** | Was this a complaint? | + +### Text Extraction + +Pull specific information: + +| Metric | Extracted Text | +|--------|---------------| +| **Issue Summary** | Brief description of the problem | +| **Product Mentioned** | Which product they asked about | +| **Competitor Named** | Any competitors mentioned | +| **Feedback** | Direct quotes of feedback | + +--- + +## Example: Sales Call Metrics + +For a sales team, you might track: + +| Metric | Type | Description | +|--------|------|-------------| +| Lead Status | Categorical | Hot / Warm / Cold / DQ | +| Budget Mentioned | Boolean | Did they discuss budget? | +| Timeline | Categorical | Immediate / This Quarter / Next Year / No Timeline | +| Decision Maker | Boolean | Is this the decision maker? | +| Next Steps | Categorical | Demo / Proposal / Follow-up / None | +| Pain Points | Text | What problems did they mention? | +| Competitor | Text | Any competitor names mentioned | +| Score | Numeric | Overall lead score (0-100) | + +--- + +## Example: Support Call Metrics + +For a support team: + +| Metric | Type | Description | +|--------|------|-------------| +| Issue Category | Categorical | Billing / Technical / Account / Product | +| Resolution | Categorical | Resolved / Escalated / Pending / Transferred | +| First Call Resolution | Boolean | Was it resolved on this call? | +| Sentiment | Categorical | Positive / Neutral / Negative | +| CSAT Implied | Numeric | Implied satisfaction (1-5) | +| Issue Summary | Text | Brief description of the issue | +| Action Taken | Text | What the agent did to help | + +--- + +## Built-in vs Custom Metrics + +### Built-in Metrics + +Always available automatically: + +| Metric | Description | +|--------|-------------| +| **Call Duration** | Length of call in seconds | +| **Direction** | Inbound or Outbound | +| **Disposition** | How the call ended | +| **Summary** | AI-generated call summary | +| **Transcript** | Full conversation text | + +### Custom Metrics + +You define based on your business needs: + +- What categories matter to you +- What questions to answer +- What data to extract +- How to score or rate calls + +--- + +## Where Metrics Appear + + + + Every call record shows its extracted metrics. Filter and search by metric values. + + + + See aggregated metrics over time: + - Disposition breakdown + - Sentiment trends + - Resolution rates + - Custom metric distributions + + + + Metrics are included in webhook payloads for real-time processing. + + + + Query metrics via API for custom reporting and integration. + + + + Export call data with all metrics to CSV for external analysis. + + + +--- + +## Next Steps + + + + Set up custom metrics for your agents + + + Understand your metrics dashboard + + diff --git a/fern/products/atoms/pages/platform/features/post-call-metrics.mdx b/fern/products/atoms/pages/platform/features/post-call-metrics.mdx new file mode 100644 index 0000000..478b3d2 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/post-call-metrics.mdx @@ -0,0 +1,161 @@ +--- +title: "Post-Call Metrics" +sidebarTitle: "Post-Call Metrics" +description: "Extract structured data from every conversation automatically." +--- + +Post-call metrics let you pull specific insights from conversations after they end. Define what you want to know — satisfaction scores, call outcomes, issue categories — and Atoms analyzes each call to fill in the answers. + +**Location:** Left Sidebar → Post Call Metrics + + + ![Post-call metrics list](../building-agents/images/post-call-metrics-list.png) + + +--- + +## How It Works + +1. **You define metrics** — What questions do you want answered about each call? +2. **Call ends** — Conversation completes normally +3. **AI analyzes** — Atoms reviews the transcript against your metrics +4. **Data populated** — Your metrics get filled in automatically +5. **Access anywhere** — View in logs, receive via webhook, export + +--- + +## Creating a New Metric + +Click the **Add Metrics +** button to open the configuration panel. You'll see two options: + + + + + ![Disposition metrics](../building-agents/images/post-call-disposition.png) + + + Build a custom metric from scratch. Fill in the Identifier, Data Type, and Prompt — see details below. + + Use **Add Another +** to create multiple metrics at once. + + + Don't forget to hit **Save** in the Disposition tab once you're done. + + + + + + ![Metric templates](../building-agents/images/post-call-templates.png) + + + Choose from pre-built metrics for common use cases. Just select the ones you want — no manual configuration needed. + + + Don't forget to hit **Save** in the Disposition tab once you're done. + + + + +--- + +## Configuring a Metric + +Each metric needs three things: + +| Field | Required | Description | +|-------|----------|-------------| +| **Identifier** | Yes | Unique name for this metric. Lowercase, numbers, underscores only. | +| **Data Type** | Yes | What kind of value: String, Number, or Boolean | +| **Prompt** | Yes | The question you want answered about the call | + +### Identifier + +This is the key used to reference the metric in exports, webhooks, and the API. + +``` +customer_satisfaction +call_outcome +follow_up_needed +``` + + +**Naming rules:** Lowercase letters, numbers, and underscores only. No spaces or special characters. + + +### Data Type + +| Type | Use for | Example values | +|------|---------|----------------| +| **String** | Free text, categories | "resolved", "escalated", "billing issue" | +| **Boolean** | Yes/no questions | true, false | +| **Integer** | Whole numbers, scores | 1, 5, 10 | +| **Enum** | Fixed set of options | One of: "low", "medium", "high" | +| **Datetime** | Dates and times | "2024-01-15T10:30:00Z" | + +### Prompt + +This is the question the AI answers by analyzing the transcript. Be specific. + +**Good prompts:** +- "Did the agent acknowledge and respond to customer concerns effectively?" +- "Rate customer satisfaction from 1 to 5 based on tone and words used." +- "What was the primary reason for this call? Options: billing, technical, account, other" + +**Vague prompts to avoid:** +- "Was it good?" +- "Customer happy?" + + +**Start with 3-5 metrics.** Too many can slow analysis and clutter your data. Add more as you learn what insights matter most. + + +--- + +## Example Metrics + + + + | Field | Value | + |-------|-------| + | **Identifier** | `call_outcome` | + | **Data Type** | String | + | **Prompt** | "What was the outcome of this call? Options: resolved, escalated, transferred, abandoned, callback_scheduled" | + + + + | Field | Value | + |-------|-------| + | **Identifier** | `satisfaction_score` | + | **Data Type** | Integer | + | **Prompt** | "Rate the customer's apparent satisfaction from 1 to 5, based on their tone and language throughout the call." | + + + + | Field | Value | + |-------|-------| + | **Identifier** | `follow_up_needed` | + | **Data Type** | Boolean | + | **Prompt** | "Does this call require any follow-up action from the team?" | + + + + | Field | Value | + |-------|-------| + | **Identifier** | `issue_category` | + | **Data Type** | Enum | + | **Prompt** | "What was the primary issue category? Options: billing, technical, account, product_info, complaint, other" | + + + +--- + +## Related + + + + View metrics for individual calls + + + See aggregated trends across calls + + diff --git a/fern/products/atoms/pages/platform/features/variables-reference.mdx b/fern/products/atoms/pages/platform/features/variables-reference.mdx new file mode 100644 index 0000000..101c68f --- /dev/null +++ b/fern/products/atoms/pages/platform/features/variables-reference.mdx @@ -0,0 +1,69 @@ +--- +title: "Variables" +sidebarTitle: "Variables" +description: "Dynamic values for personalization and data flow." +--- + +Variables let you insert dynamic values into prompts, conditions, and API calls. Use them to personalize conversations, pass data between systems, and create dynamic flows. + +--- + +## Syntax + +``` +{{variable_name}} +``` + +Variables are replaced with actual values at runtime. + +--- + +## Variable Types + +| Type | Source | Example | +|------|--------|---------| +| **System** | Platform-provided | `{{caller_phone}}`, `{{call_duration}}` | +| **User Defined** | You create | `{{company_name}}`, `{{promo_code}}` | +| **API** | From API responses | `{{customer_name}}`, `{{account_tier}}` | + +--- + +## System Variables + +| Variable | Description | +|----------|-------------| +| `{{caller_phone}}` | Caller's phone number | +| `{{call_time}}` | When call started | +| `{{call_duration}}` | Elapsed seconds | +| `{{call_direction}}` | "inbound" or "outbound" | +| `{{agent_id}}` | This agent's ID | +| `{{call_id}}` | Unique call identifier | + +--- + +## Default Values + +Handle missing variables with fallbacks: + +``` +Hello {{customer_name|there}}! +``` + +If `customer_name` is empty → "Hello there!" + +--- + +## Implementation + +Variables work in both agent types with slight differences in how they're configured. + + + + Variables in Configuration Panel + + + Deprecated + + Variables in Workflow Tab + + diff --git a/fern/products/atoms/pages/platform/features/variables/best-practices.mdx b/fern/products/atoms/pages/platform/features/variables/best-practices.mdx new file mode 100644 index 0000000..823f791 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/variables/best-practices.mdx @@ -0,0 +1,217 @@ +--- +title: "Variable Best Practices" +sidebarTitle: "Best Practices" +description: "Tips and patterns for using variables effectively in your agents." +--- + +Variables are powerful, but using them well takes some thought. Here are best practices learned from thousands of agent deployments. + +--- + +## Naming Conventions + + + + | Variable | Why It's Good | + |----------|---------------| + | `{{customer_name}}` | Clear, lowercase, underscores | + | `{{appointment_date}}` | Descriptive, consistent format | + | `{{account_balance}}` | Obvious what it contains | + | `{{provider_first_name}}` | Specific when needed | + + + | Variable | Problem | + |----------|---------| + | `{{cn}}` | Too abbreviated | + | `{{CustomerName}}` | Inconsistent casing | + | `{{data1}}` | Meaningless | + | `{{the_customers_full_name}}` | Too long | + + + +**Rule of thumb:** Someone reading your prompt should understand what the variable contains without checking documentation. + +--- + +## Always Set Defaults + +Never assume a variable will have a value. Set defaults for graceful fallbacks: + + + + ``` + "Hi {{customer_name}}, thanks for calling!" + ``` + + If `customer_name` is empty: + → "Hi , thanks for calling!" (awkward gap) + + Or worse: + → "Hi {{customer_name}}, thanks for calling!" (literal text) + + + ``` + "Hi {{customer_name | default: 'there'}}, thanks for calling!" + ``` + + If `customer_name` is empty: + → "Hi there, thanks for calling!" (natural) + + + +### Common Default Patterns + +| Variable | Good Default | +|----------|--------------| +| `{{customer_name}}` | `"there"` or `"friend"` | +| `{{appointment_date}}` | `"your scheduled date"` | +| `{{account_number}}` | `"your account"` | +| `{{amount_due}}` | `"the amount"` | + +--- + +## Keep Variables Contextual + +Don't just insert variables — provide context so the agent uses them naturally: + + + + ``` + Customer name: {{customer_name}} + Phone: {{phone}} + Balance: {{balance}} + + Help the customer. + ``` + + Agent doesn't know *how* to use this info. + + + ``` + CUSTOMER CONTEXT: + - Name: {{customer_name}} (use to address them personally) + - Phone: {{phone}} (for verification if needed) + - Balance: {{balance}} (reference if they ask about billing) + + Greet them by name and ask how you can help. + If they have billing questions, you can reference their balance. + ``` + + Agent knows when and how to use each piece. + + + +--- + +## Validate Before Use + +For critical variables, add validation instructions: + +``` +IMPORTANT: Before discussing account details: +1. Verify caller identity by asking for last 4 digits of SSN +2. Only proceed if {{account_status}} is "active" +3. If {{account_status}} is "suspended", explain they need to contact support directly +``` + +--- + +## Handle Missing Data Gracefully + +Tell your agent what to do when data isn't available: + +``` +If you don't have the customer's name ({{customer_name}} is empty): +- Use "there" instead ("Hi there!") +- Ask for their name early in the conversation +- Store it for the rest of the call + +If appointment information is missing: +- Don't guess or make up details +- Say "I don't see your appointment details. Can you tell me + when your appointment was scheduled for?" +``` + +--- + +## Avoid Variable Overload + + + + ``` + Hi {{first_name}} {{last_name}}, this is {{agent_name}} + calling from {{company_name}} regarding your {{product_name}} + {{product_model}} purchased on {{purchase_date}} from + {{store_location}} with order number {{order_id}}... + ``` + + Overwhelming for caller. Too much to process. + + + ``` + Hi {{customer_name}}, this is Sarah from TechCorp + calling about your recent order. + ``` + + Personal but not overwhelming. Additional details + available if the caller asks. + + + +**Rule:** Use 2-3 personalization variables in your opening. Keep the rest available for when needed. + +--- + +## Test with Edge Cases + +Before going live, test what happens when: + +| Scenario | What to Test | +|----------|--------------| +| **Empty values** | All variables missing | +| **Long values** | Very long customer name | +| **Special characters** | Names with apostrophes, accents | +| **Numbers** | Currency, dates, phone numbers | +| **Null vs empty** | API returns null instead of "" | + +--- + +## Security Considerations + + +**Never put sensitive data in prompts that might be logged or visible:** +- Full SSN or government IDs +- Complete credit card numbers +- Passwords or security codes + +**Instead:** Use masked versions (`***-**-1234`) or reference without displaying. + + +--- + +## Variable Documentation + +Keep a reference of your variables: + +| Variable | Source | Type | Example | Default | +|----------|--------|------|---------|---------| +| `customer_name` | CRM API | String | "Sarah Johnson" | "there" | +| `account_balance` | Pre-call API | Currency | "$150.00" | "your balance" | +| `appointment_date` | Campaign CSV | Date | "Jan 21, 2024" | "your appointment" | + + +Keep this documentation updated. When variables change, update your prompts too. + + +--- + +## Next Steps + + + + Populate variables from external systems + + + Use variables in outbound campaigns + + diff --git a/fern/products/atoms/pages/platform/features/variables/system-variables.mdx b/fern/products/atoms/pages/platform/features/variables/system-variables.mdx new file mode 100644 index 0000000..a0aed2f --- /dev/null +++ b/fern/products/atoms/pages/platform/features/variables/system-variables.mdx @@ -0,0 +1,141 @@ +--- +title: "System Variables" +sidebarTitle: "System Variables" +description: "Automatic variables that Atoms provides for every call." +--- + +System variables are automatically populated by Atoms. You don't need to set them up — they're available in every call. + +--- + +## Available System Variables + +### Caller Information + +| Variable | Description | Example Value | +|----------|-------------|---------------| +| `{{caller_phone}}` | Caller's phone number | `+14155551234` | +| `{{caller_country}}` | Country code (if detectable) | `US` | + +### Call Timing + +| Variable | Description | Example Value | +|----------|-------------|---------------| +| `{{call_start_time}}` | When the call began | `2024-01-20T14:30:00Z` | +| `{{current_time}}` | Current time during call | `2024-01-20T14:32:15Z` | +| `{{current_date}}` | Today's date | `2024-01-20` | +| `{{call_duration}}` | How long the call has lasted | `125` (seconds) | + +### Agent Information + +| Variable | Description | Example Value | +|----------|-------------|---------------| +| `{{agent_name}}` | Name of the agent | `Customer Support Bot` | +| `{{agent_id}}` | Unique agent identifier | `agt_abc123` | + +### Call Metadata + +| Variable | Description | Example Value | +|----------|-------------|---------------| +| `{{call_id}}` | Unique call identifier | `call_xyz789` | +| `{{call_direction}}` | Inbound or outbound | `inbound` | +| `{{phone_number}}` | Your Atoms phone number | `+18005551234` | + +--- + +## Using System Variables + +### In Prompts + +``` +You are a support agent. The caller's phone number is {{caller_phone}}. + +When ending the call, say: "This call started at {{call_start_time}} +and your reference number is {{call_id}}." +``` + +### In API Calls + +```json +{ + "phone": "{{caller_phone}}", + "call_id": "{{call_id}}", + "timestamp": "{{call_start_time}}" +} +``` + +### In Webhooks + +System variables are automatically included in webhook payloads for call events. + +--- + +## Common Use Cases + + + + Use `{{caller_phone}}` in a Pre-Call API to fetch customer data: + + ``` + GET https://api.yourcrm.com/customers?phone={{caller_phone}} + ``` + + Response populates user variables like `{{customer_name}}`. + + + + Give callers a reference number for follow-up: + + ``` + "Your reference number for this call is {{call_id}}. + Please save this in case you need to follow up." + ``` + + + + While you can't do complex logic with variables alone, you can reference time: + + ``` + "Thanks for calling at {{current_time}}. + Let me check our current availability..." + ``` + + + + Include call data in your Post-Call API: + + ```json + { + "call_id": "{{call_id}}", + "duration": "{{call_duration}}", + "caller": "{{caller_phone}}", + "agent": "{{agent_name}}" + } + ``` + + + +--- + +## Important Notes + + +**Privacy:** System variables may contain personally identifiable information (PII) like phone numbers. Handle them according to your privacy policies. + + + +**Formatting:** Date/time variables use ISO 8601 format. You may need to format them in your backend systems. + + +--- + +## Next Steps + + + + Create your own custom variables + + + Use variables in API integrations + + diff --git a/fern/products/atoms/pages/platform/features/variables/user-variables.mdx b/fern/products/atoms/pages/platform/features/variables/user-variables.mdx new file mode 100644 index 0000000..ae59864 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/variables/user-variables.mdx @@ -0,0 +1,194 @@ +--- +title: "User Variables" +sidebarTitle: "User Variables" +description: "Create custom variables to personalize conversations with your own data." +--- + +User variables are custom variables you define and populate. Use them to bring in customer data, track information collected during calls, and personalize every interaction. + +--- + +## Creating User Variables + + + + 1. Open your agent in the editor + 2. Look for **User Variables** in the right panel + 3. Click **+ Add Variable** + 4. Enter the variable name (e.g., `customer_name`) + 5. Optionally set a default value + 6. Save + + + 1. Select a node where you want to collect data + 2. In node settings, find **Output Variables** + 3. Define what the node should capture + 4. The variable becomes available in subsequent nodes + + + +--- + +## Populating User Variables + +User variables can get their values from several sources: + +### 1. Campaign Data (Outbound Calls) + +When running outbound campaigns, include variable values in your contact list: + +| phone | customer_name | appointment_date | appointment_time | +|-------|---------------|------------------|------------------| +| +14155551234 | Sarah Johnson | January 21, 2024 | 2:00 PM | +| +14155555678 | Mike Chen | January 22, 2024 | 10:00 AM | + +Each call automatically uses that row's data. + +### 2. Pre-Call API (Inbound Calls) + +Fetch data based on caller ID before the conversation starts: + +``` +GET https://api.yourcrm.com/customers?phone={{caller_phone}} + +Response: +{ + "name": "Sarah Johnson", + "account_status": "active", + "loyalty_tier": "gold" +} + +Maps to: +{{customer_name}} = "Sarah Johnson" +{{account_status}} = "active" +{{loyalty_tier}} = "gold" +``` + +### 3. During the Conversation + +Collect information from what the caller says: + +**Convo Flow:** Use node output variables to capture responses. + +**Single Prompt:** The agent can be instructed to remember information: +``` +When the caller provides their email address, +store it for use in the confirmation. +``` + +### 4. API Call Responses + +Mid-call API calls can populate variables: + +``` +GET https://api.calendar.com/availability?date={{requested_date}} + +Response: +{ + "available_slots": ["9:00 AM", "2:00 PM", "4:30 PM"] +} + +Maps to: +{{available_slots}} = ["9:00 AM", "2:00 PM", "4:30 PM"] +``` + +--- + +## Common User Variables + +Here are variables commonly used across different agent types: + + + + | Variable | Purpose | + |----------|---------| + | `{{customer_name}}` | Personalized greeting | + | `{{account_number}}` | Reference their account | + | `{{last_order_date}}` | Context for inquiries | + | `{{support_tier}}` | Determine escalation path | + + + | Variable | Purpose | + |----------|---------| + | `{{lead_name}}` | Personal connection | + | `{{company_name}}` | B2B context | + | `{{interest_area}}` | Focus the pitch | + | `{{lead_score}}` | Qualification routing | + + + | Variable | Purpose | + |----------|---------| + | `{{customer_name}}` | Personalized greeting | + | `{{appointment_date}}` | Confirm details | + | `{{appointment_time}}` | Confirm details | + | `{{provider_name}}` | Who they're seeing | + | `{{location}}` | Where to go | + + + | Variable | Purpose | + |----------|---------| + | `{{customer_name}}` | Required for compliance | + | `{{amount_due}}` | What they owe | + | `{{due_date}}` | When it's due | + | `{{account_number}}` | Reference | + + + +--- + +## Using User Variables in Prompts + +Once defined, use variables anywhere in your prompts: + +``` +You are a scheduling assistant for Premier Dental. + +IMPORTANT CONTEXT: +- Customer name: {{customer_name}} +- Appointment date: {{appointment_date}} +- Appointment time: {{appointment_time}} +- Provider: {{provider_name}} + +GREETING: +"Hi {{customer_name}}! I'm calling from Premier Dental +to confirm your appointment with {{provider_name}} on +{{appointment_date}} at {{appointment_time}}." +``` + +--- + +## Variable Defaults + +Set default values for when data isn't available: + +| Variable | Default | Result if Empty | +|----------|---------|-----------------| +| `{{customer_name}}` | `"there"` | "Hi there!" | +| `{{appointment_time}}` | `"your scheduled time"` | "...at your scheduled time" | + + +**Always set defaults** for variables that might be empty. Otherwise, the agent might say things like "Hi {{customer_name}}!" literally. + + +--- + +## Variable Scope + + +**Single Prompt:** Variables are available throughout the entire conversation. + +**Convo Flow:** Variables defined in a node are available in that node and all subsequent nodes in the flow. + + +--- + +## Next Steps + + + + Tips for effective variable usage + + + Fetch variables before calls start + + diff --git a/fern/products/atoms/pages/platform/features/variables/what-are-variables.mdx b/fern/products/atoms/pages/platform/features/variables/what-are-variables.mdx new file mode 100644 index 0000000..ce41e70 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/variables/what-are-variables.mdx @@ -0,0 +1,158 @@ +--- +title: "Variables" +sidebarTitle: "What Are Variables?" +description: "Personalize conversations with dynamic data that changes for each caller." +--- + +**Variables** let you inject dynamic information into your agent's conversations. Instead of generic responses, your agent can greet callers by name, reference their account details, and personalize the entire experience. + +--- + +## Why Use Variables? + + + + ``` + Agent: "Hi, thanks for calling. How can I help you today?" + + Agent: "I see you have an appointment scheduled. + Would you like to confirm it?" + ``` + + Generic, impersonal, missing key details. + + + ``` + Agent: "Hi Sarah, thanks for calling Acme Support. + How can I help you today?" + + Agent: "I see you have an appointment on Tuesday at 2pm + with Dr. Johnson. Would you like to confirm it?" + ``` + + Personal, specific, builds trust immediately. + + + +--- + +## Variable Syntax + +Use double curly braces to insert variables: + +``` +Hi {{customer_name}}, thanks for calling! + +Your appointment is scheduled for {{appointment_date}} at {{appointment_time}}. + +Your account balance is {{account_balance}}. +``` + + +Variable names are case-sensitive. `{{Customer_Name}}` and `{{customer_name}}` are different. + + +--- + +## Two Types of Variables + +Atoms has two categories of variables: + +| Type | What It Is | Where It Comes From | +|------|------------|---------------------| +| **System Variables** | Automatic call data | Atoms provides these | +| **User Variables** | Your custom data | You define and populate these | + + + + Caller ID, timestamp, call duration, etc. + + + Customer name, account info, custom data + + + +--- + +## Where Can You Use Variables? + +Variables work in: + +| Location | Example Use | +|----------|-------------| +| **Agent prompts** | "Greet the caller by {{customer_name}}" | +| **Node prompts** (Convo Flow) | "Your balance is {{account_balance}}" | +| **API call bodies** | `{"phone": "{{caller_phone}}"}` | +| **Webhook payloads** | Include call data in notifications | + +--- + +## How Variables Get Their Values + + + ### System Variables + + Populated automatically by Atoms when the call connects: + - `{{caller_phone}}` — The caller's phone number + - `{{call_start_time}}` — When the call began + - `{{agent_name}}` — Your agent's name + + ### User Variables (Outbound Calls) + + When making outbound calls via campaigns, you provide the data: + - Upload a CSV with customer info + - Each row populates variables for that call + - `{{customer_name}}`, `{{appointment_date}}`, etc. + + ### User Variables (Inbound Calls) + + For inbound calls, populate variables via: + - **Pre-Call API** — Fetch data based on caller ID + - **During the call** — Collect and store what the caller says + - **Integrations** — Pull from CRM based on phone number + + +--- + +## Quick Example + +**Prompt using variables:** +``` +You are a scheduling assistant for Premier Dental. + +When the call connects, greet the caller: +"Hi {{customer_name}}, this is a reminder about your +appointment on {{appointment_date}} at {{appointment_time}} +with {{provider_name}}." + +Ask if they'd like to confirm, reschedule, or cancel. +``` + +**For a call where:** +- `customer_name` = "Sarah" +- `appointment_date` = "Tuesday, January 21st" +- `appointment_time` = "2:00 PM" +- `provider_name` = "Dr. Johnson" + +**Agent says:** +``` +"Hi Sarah, this is a reminder about your appointment on +Tuesday, January 21st at 2:00 PM with Dr. Johnson. +Would you like to confirm, reschedule, or cancel?" +``` + +--- + +## Next Steps + + + + See all automatic variables available + + + Create and use your own variables + + + Tips for effective variable usage + + diff --git a/fern/products/atoms/pages/platform/features/webhooks.mdx b/fern/products/atoms/pages/platform/features/webhooks.mdx new file mode 100644 index 0000000..f76911b --- /dev/null +++ b/fern/products/atoms/pages/platform/features/webhooks.mdx @@ -0,0 +1,130 @@ +--- +title: "Webhooks" +sidebarTitle: "Webhooks" +description: "Send call data to your systems when events occur." +--- + +Webhooks notify your systems in real-time when events happen — a call starts, ends, or analytics complete. Atoms sends an HTTP POST to your URL with the relevant data. + +--- + +## Managing Webhooks + +All webhooks are created and managed from the dashboard. + +**Location:** Left Sidebar → Settings → Webhook + + + Webhooks dashboard + + +The table shows all your webhooks with their URL, assigned agents, created date, and status. + +--- + +## Creating a Webhook + +Click **Create** to add a new webhook endpoint. + + + Create webhook + + +| Field | Description | +|-------|-------------| +| **Endpoint URL** | Your server URL that receives the POST requests | +| **Description** | Optional note about what this webhook is for | + +The right panel shows example Flask code for handling webhooks with HMAC signature verification. + +Click **Add endpoint** to save. + +--- + +## Webhook Details + +Click any webhook to see its details and subscriptions. + + + Webhook details + + +| Field | What It Shows | +|-------|---------------| +| **Webhook URL** | The endpoint receiving events | +| **Description** | Your note | +| **Subscriptions** | Which agents are using this webhook and what events | +| **Status** | Enabled or disabled | +| **Signing Secret** | For verifying requests are from Atoms | + +--- + +## Adding to an Agent + +Once a webhook exists, connect it to your agent. + +**Location:** Agent Editor → Agent Settings → Webhook tab + + + Webhook in agent + + +Select your webhook from the list. The agent will now send events to that endpoint. + +--- + +## Events + +| Event | When It Fires | +|-------|---------------| +| **pre-conversation** | Call is about to start | +| **analytics-completed** | Post-call analysis is ready | + +Subscribe to the events you need when connecting the webhook to an agent. + +--- + +## Payload Data + +Each event sends relevant data: + +**pre-conversation:** +- Caller phone number +- Agent ID +- Call direction +- Timestamp + +**analytics-completed:** +- Full transcript +- Call duration +- Post-call metrics +- Variables collected + +--- + +## Tips + + + Use the signing secret to verify requests actually come from Atoms. The example code in the Create modal shows how. + + + + If your endpoint is down, events may be lost. Log everything and consider retry logic. + + + + You can connect the same webhook to multiple agents. The payload includes the agent ID so you know which agent sent it. + + +--- + +## Related + + + + Connect Salesforce, Zendesk, and more + + + Make requests during conversations + + diff --git a/fern/products/atoms/pages/platform/features/webhooks/creating-endpoints.mdx b/fern/products/atoms/pages/platform/features/webhooks/creating-endpoints.mdx new file mode 100644 index 0000000..3e56885 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/webhooks/creating-endpoints.mdx @@ -0,0 +1,141 @@ +--- +title: "Creating Webhook Endpoints" +sidebarTitle: "Creating Endpoints" +description: "Set up endpoints to receive webhook data from Atoms." +--- + +Before you can receive webhooks, you need to create endpoint configurations in Atoms. This page walks you through the process. + +--- + +## What You Need + +Before starting: + +1. **A URL that can receive POST requests** — Your server endpoint +2. **HTTPS** — Required for security +3. **200 OK response** — Endpoint must acknowledge receipt + +--- + +## Creating an Endpoint + +→ **NEEDS PLATFORM INFO:** Detailed endpoint creation interface + +### Step 1: Navigate to Webhooks + +In your Atoms dashboard, find the Webhooks section. + +### Step 2: Click Create + +Click **"Create Endpoint"** or **"Add Webhook"**. + +### Step 3: Configure Endpoint + +| Field | Description | Example | +|-------|-------------|---------| +| **Name** | Descriptive label | "CRM Integration" | +| **URL** | Your endpoint URL | https://api.yoursite.com/webhooks/atoms | +| **Secret** | (Optional) Signing secret | For payload verification | + +### Step 4: Save + +Save the endpoint configuration. + +--- + +## Endpoint Requirements + +### HTTPS Required + +Your endpoint must use HTTPS. HTTP endpoints are not supported for security. + +### Respond with 200 + +Your endpoint must return HTTP 200 to acknowledge receipt: + +```javascript +// Example Node.js endpoint +app.post('/webhooks/atoms', (req, res) => { + const payload = req.body; + + // Process the webhook... + processWebhook(payload); + + // Acknowledge receipt + res.status(200).send('OK'); +}); +``` + +### Timeout Handling + +Atoms waits a limited time for response. If your processing takes long: +1. Acknowledge immediately +2. Process asynchronously + +--- + +## Signing and Verification + +To verify webhooks are actually from Atoms: + +1. Set a **Secret** when creating the endpoint +2. Atoms signs payloads with this secret +3. Your endpoint verifies the signature + +→ **NEEDS PLATFORM INFO:** Signature verification details + +--- + +## Testing Your Endpoint + +### Before Connecting + +1. Create your endpoint on your server +2. Test it can receive POST requests +3. Verify it returns 200 + +### After Creating in Atoms + +1. Assign endpoint to an agent (Webhook Tab) +2. Select events to subscribe +3. Make a test call +4. Check your endpoint received the data + +--- + +## Multiple Endpoints + +You can create multiple endpoints for different purposes: + +| Endpoint | Purpose | +|----------|---------| +| CRM Integration | Send to Salesforce | +| Analytics | Send to data warehouse | +| Notifications | Trigger Slack alerts | + +Assign different endpoints to different agents as needed. + +--- + +## Troubleshooting + +| Issue | Solution | +|-------|----------| +| Not receiving webhooks | Verify endpoint URL, check firewall | +| Getting 401/403 | Check authentication requirements | +| Timeout errors | Process async, respond quickly | +| Missing data | Verify correct events subscribed | + +--- + +## What's Next + + + + Understand data schemas + + + Common webhook patterns + + diff --git a/fern/products/atoms/pages/platform/features/webhooks/event-payloads.mdx b/fern/products/atoms/pages/platform/features/webhooks/event-payloads.mdx new file mode 100644 index 0000000..d18f55e --- /dev/null +++ b/fern/products/atoms/pages/platform/features/webhooks/event-payloads.mdx @@ -0,0 +1,174 @@ +--- +title: "Event Payloads" +sidebarTitle: "Event Payloads" +description: "Data schemas for each webhook event type." +--- + +Each webhook event sends a JSON payload with relevant data. This page documents the structure of each event type. + +--- + +## Common Fields + +All webhooks include these base fields: + +```json +{ + "event": "call.start | call.end | analytics.completed", + "timestamp": "2024-07-15T14:30:00Z", + "agent_id": "agent_abc123", + "call_id": "call_xyz789" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `event` | string | Event type identifier | +| `timestamp` | ISO 8601 | When the event occurred | +| `agent_id` | string | Agent that handled the call | +| `call_id` | string | Unique call identifier | + +--- + +## Call Start Event + +**Event:** `call.start` + +Sent when a call connects, before conversation begins. + +```json +{ + "event": "call.start", + "timestamp": "2024-07-15T14:30:00Z", + "agent_id": "agent_abc123", + "call_id": "call_xyz789", + "call_direction": "inbound", + "caller_phone": "+15551234567", + "agent_phone": "+15559876543" +} +``` + +→ **NEEDS PLATFORM INFO:** Complete start event payload schema + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| `call_direction` | string | "inbound" or "outbound" | +| `caller_phone` | string | Caller's phone number | +| `agent_phone` | string | Phone number called/calling from | + +--- + +## Call End Event + +**Event:** `call.end` + +Sent when a call concludes. + +```json +{ + "event": "call.end", + "timestamp": "2024-07-15T14:35:00Z", + "agent_id": "agent_abc123", + "call_id": "call_xyz789", + "duration_seconds": 300, + "disposition": "successful", + "transcript": [ + {"role": "agent", "content": "Hello! How can I help?", "timestamp": "..."}, + {"role": "user", "content": "I have a question...", "timestamp": "..."} + ], + "variables": { + "customer_name": "Jane Smith", + "issue_type": "billing" + } +} +``` + +→ **NEEDS PLATFORM INFO:** Complete end event payload schema + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| `duration_seconds` | number | Call length in seconds | +| `disposition` | string | Outcome category | +| `transcript` | array | Full conversation | +| `variables` | object | Variables collected/set | + +--- + +## Analytics Completed Event + +**Event:** `analytics.completed` + +Sent when post-call analysis finishes. + +```json +{ + "event": "analytics.completed", + "timestamp": "2024-07-15T14:36:00Z", + "agent_id": "agent_abc123", + "call_id": "call_xyz789", + "metrics": { + "satisfaction_score": 4, + "issue_resolved": true, + "follow_up_needed": false, + "product_discussed": "Widget Pro" + } +} +``` + +→ **NEEDS PLATFORM INFO:** Complete analytics event payload schema + +### Fields + +| Field | Type | Description | +|-------|------|-------------| +| `metrics` | object | Post-call metrics you configured | + +Metric fields depend on what you set up in Post-Call Metrics. + +--- + +## Handling Payloads + +### Parse JSON + +All payloads are JSON: + +```javascript +const payload = JSON.parse(req.body); +const eventType = payload.event; +``` + +### Route by Event Type + +Handle different events appropriately: + +```javascript +switch (payload.event) { + case 'call.start': + handleCallStart(payload); + break; + case 'call.end': + handleCallEnd(payload); + break; + case 'analytics.completed': + handleAnalytics(payload); + break; +} +``` + +--- + +## What's Next + + + + Common webhook patterns + + + Set up your endpoints + + diff --git a/fern/products/atoms/pages/platform/features/webhooks/setting-up.mdx b/fern/products/atoms/pages/platform/features/webhooks/setting-up.mdx new file mode 100644 index 0000000..94b1dbb --- /dev/null +++ b/fern/products/atoms/pages/platform/features/webhooks/setting-up.mdx @@ -0,0 +1,270 @@ +--- +title: "Setting Up Webhooks" +sidebarTitle: "Setting Up" +description: "Configure webhook endpoints to receive real-time event notifications." +--- + +This guide walks you through setting up webhooks to receive real-time notifications from your Atoms agents. + +--- + +## Prerequisites + +Before setting up webhooks, you need: +- An HTTPS endpoint that can receive POST requests +- Ability to respond with 200 status within 30 seconds +- (Recommended) Way to verify webhook signatures + +--- + +## Creating a Webhook + + + ### Navigate to Webhooks + + Go to **Settings** → **Webhooks** in your dashboard. + + ### Click "Add Webhook" + + Click the **+ Add Webhook** button. + + ### Configure Endpoint + + | Field | Description | Example | + |-------|-------------|---------| + | **Name** | Descriptive label | "CRM Sync Webhook" | + | **URL** | Your HTTPS endpoint | `https://api.yoursite.com/webhooks/atoms` | + | **Description** | What it's for | "Syncs call data to Salesforce" | + + ### Select Events + + Choose which events trigger this webhook: + + - [ ] `call.started` — Call connects + - [x] `call.ended` — Call completes + - [ ] `call.transferred` — Transfer initiated + - [x] `recording.ready` — Recording available + - [x] `transcript.ready` — Transcript ready + + ### Configure Authentication (Optional) + + Add headers your endpoint requires: + + | Header | Value | + |--------|-------| + | `Authorization` | `Bearer your-secret-token` | + | `X-Custom-Header` | `custom-value` | + + ### Save and Test + + Click **Save**, then use **Test Webhook** to verify it works. + + +--- + +## Endpoint Requirements + +Your webhook endpoint must: + +### Accept POST Requests + +``` +POST https://your-endpoint.com/webhook +Content-Type: application/json + +{...webhook payload...} +``` + +### Respond Quickly + +- Return `200 OK` within **30 seconds** +- Do heavy processing asynchronously +- Queue data for later if needed + +### Handle Retries + +If your endpoint fails, Atoms retries: + +| Attempt | Delay | +|---------|-------| +| 1st retry | 1 minute | +| 2nd retry | 5 minutes | +| 3rd retry | 30 minutes | +| 4th retry | 2 hours | +| 5th retry | 24 hours | + +After 5 failures, the webhook is paused. + +--- + +## Verifying Webhook Signatures + +Atoms signs all webhooks so you can verify they're authentic. + +### Signature Header + +Each request includes: +``` +X-Atoms-Signature: sha256=abc123... +``` + +### Verification Code + +```javascript +const crypto = require('crypto'); + +function verifySignature(payload, signature, secret) { + const expected = 'sha256=' + crypto + .createHmac('sha256', secret) + .update(payload) + .digest('hex'); + + return crypto.timingSafeEqual( + Buffer.from(signature), + Buffer.from(expected) + ); +} + +// In your handler: +app.post('/webhook', (req, res) => { + const signature = req.headers['x-atoms-signature']; + const isValid = verifySignature( + JSON.stringify(req.body), + signature, + process.env.WEBHOOK_SECRET + ); + + if (!isValid) { + return res.status(401).send('Invalid signature'); + } + + // Process webhook... + res.status(200).send('OK'); +}); +``` + +### Getting Your Secret + +Find your webhook signing secret in **Settings** → **Webhooks** → **Signing Secret**. + +--- + +## Testing Webhooks + +### Use the Test Button + +1. Go to your webhook configuration +2. Click **Test Webhook** +3. Select an event type +4. Click **Send Test** + +### Check Recent Deliveries + +View webhook delivery history: +- Successful deliveries ✓ +- Failed attempts ✗ +- Request/response details + +### Use a Testing Tool + +For development, try: +- [webhook.site](https://webhook.site) — Free public endpoints +- [ngrok](https://ngrok.com) — Tunnel to localhost +- [Beeceptor](https://beeceptor.com) — Mock endpoints + +--- + +## Example Endpoint (Node.js) + +```javascript +const express = require('express'); +const app = express(); + +app.use(express.json()); + +app.post('/webhooks/atoms', async (req, res) => { + // Respond immediately + res.status(200).send('OK'); + + // Process asynchronously + const { event, data, call_id } = req.body; + + switch (event) { + case 'call.ended': + await processCallEnded(data); + break; + case 'recording.ready': + await storeRecording(data.recording_url); + break; + case 'transcript.ready': + await processTranscript(data.transcript_url); + break; + } +}); + +async function processCallEnded(data) { + // Log to your CRM + await crm.createActivity({ + phone: data.caller_phone, + duration: data.duration_seconds, + summary: data.summary, + outcome: data.disposition + }); +} + +app.listen(3000); +``` + +--- + +## Troubleshooting + + + + **Check:** + 1. Endpoint URL is correct and HTTPS + 2. Endpoint is publicly accessible + 3. Firewall allows incoming requests + 4. Events are selected + + **Test:** Use the Test button and check delivery logs + + + + **Check:** + 1. Using correct signing secret + 2. Payload isn't modified before verification + 3. Using raw request body (not parsed JSON) + + + + **Fix:** + - Respond with 200 immediately + - Process webhook data asynchronously + - Don't do heavy work in the request handler + + + + **Why:** Retries can cause duplicates + + **Fix:** Use `call_id` as idempotency key: + ```javascript + if (await alreadyProcessed(call_id)) { + return res.status(200).send('Already processed'); + } + ``` + + + +--- + +## Next Steps + + + + Implementation examples + + + Fix common issues + + diff --git a/fern/products/atoms/pages/platform/features/webhooks/troubleshooting.mdx b/fern/products/atoms/pages/platform/features/webhooks/troubleshooting.mdx new file mode 100644 index 0000000..bff896f --- /dev/null +++ b/fern/products/atoms/pages/platform/features/webhooks/troubleshooting.mdx @@ -0,0 +1,318 @@ +--- +title: "Troubleshooting Webhooks" +sidebarTitle: "Troubleshooting" +description: "Fix common webhook issues and debugging tips." +--- + +Webhooks not working? Here's how to diagnose and fix common issues. + +--- + +## Checking Webhook Status + +### View Delivery Logs + +1. Go to **Settings** → **Webhooks** +2. Click on your webhook +3. View **Recent Deliveries** + +Each delivery shows: +- Timestamp +- Event type +- Status (success/failure) +- Response code +- Request/response bodies + +--- + +## Common Issues + + + + **Symptoms:** No delivery attempts in logs + + **Check:** + 1. **Webhook is active** — Not paused or disabled + 2. **Events selected** — The event type you expect is checked + 3. **Calls happening** — Verify calls are completing + 4. **Agent linked** — Some webhooks are agent-specific + + **Fix:** Re-enable webhook, verify event selection, test with manual call + + + + **Symptoms:** 400, 401, 403, 404 errors + + **400 Bad Request:** + - Your endpoint doesn't like the payload + - Check your handler accepts JSON + + **401 Unauthorized:** + - Auth header not matching + - Check custom headers configuration + + **403 Forbidden:** + - IP restrictions on your endpoint + - Whitelist Atoms IPs or remove restrictions + + **404 Not Found:** + - URL is wrong + - Endpoint doesn't exist + - Check for typos in URL + + + + **Symptoms:** 500, 502, 503, 504 errors + + **500 Internal Server Error:** + - Your code is throwing an error + - Check your server logs + - Handle edge cases in payload + + **502 Bad Gateway:** + - Upstream server issue + - Check your infrastructure + + **503 Service Unavailable:** + - Your server is overloaded + - Scale up or optimize + + **504 Gateway Timeout:** + - Response took > 30 seconds + - Respond immediately, process async + + + + **Symptoms:** Delivery fails with timeout + + **Cause:** Endpoint takes > 30 seconds to respond + + **Fix:** Respond with 200 immediately, then process async: + + ```javascript + app.post('/webhook', (req, res) => { + // Respond immediately + res.status(200).send('OK'); + + // Process in background + processWebhook(req.body).catch(console.error); + }); + ``` + + + + **Symptoms:** Your verification returns invalid + + **Check:** + 1. **Correct secret** — Using the signing secret from settings + 2. **Raw body** — Verify against raw body, not parsed JSON + 3. **No modifications** — Body isn't altered before verification + + **Debug:** + ```javascript + console.log('Received signature:', req.headers['x-atoms-signature']); + console.log('Raw body:', JSON.stringify(req.body)); + console.log('Computed:', computeSignature(req.body, secret)); + ``` + + + + **Symptoms:** Same event received multiple times + + **Cause:** Retries due to: + - Timeout (you didn't respond in time) + - Server error (5xx response) + - Network issue + + **Fix:** Implement idempotency: + ```javascript + const processedEvents = new Set(); + + app.post('/webhook', async (req, res) => { + const eventId = `${req.body.call_id}-${req.body.event}`; + + if (processedEvents.has(eventId)) { + return res.status(200).send('Already processed'); + } + + processedEvents.add(eventId); + res.status(200).send('OK'); + + // Process... + }); + ``` + + + + **Symptoms:** Expected fields are null or missing + + **Causes:** + - Data not collected during call + - Field is optional + - Different event type + + **Fix:** Always check for field existence: + ```javascript + const customerName = data.customer_name || 'Unknown'; + const summary = data.summary || 'No summary available'; + ``` + + + +--- + +## Testing Tips + +### Use webhook.site for Debugging + +1. Go to [webhook.site](https://webhook.site) +2. Copy your unique URL +3. Set as webhook endpoint temporarily +4. Make a test call +5. Inspect the payload + +### Local Development with ngrok + +```bash +# Start your local server +node server.js # Running on port 3000 + +# In another terminal, start ngrok +ngrok http 3000 + +# Use the ngrok URL as your webhook endpoint +# https://abc123.ngrok.io/webhook +``` + +### Manual Test from Dashboard + +1. Go to webhook settings +2. Click **Test Webhook** +3. Select event type +4. Click **Send** +5. Check delivery logs + +--- + +## Retry Behavior + +When delivery fails, Atoms retries: + +| Attempt | Delay After | +|---------|-------------| +| Initial | Immediate | +| Retry 1 | 1 minute | +| Retry 2 | 5 minutes | +| Retry 3 | 30 minutes | +| Retry 4 | 2 hours | +| Retry 5 | 24 hours | + +After 5 failed retries: +- Webhook is **paused** +- You receive notification +- Fix the issue and re-enable + +--- + +## Best Practices for Reliability + + + + ```javascript + // ✅ Good - respond immediately + app.post('/webhook', (req, res) => { + res.status(200).send('OK'); + processAsync(req.body); + }); + + // ❌ Bad - blocking response + app.post('/webhook', async (req, res) => { + await saveToDatabase(req.body); // Slow! + await sendEmail(req.body); // Even slower! + res.status(200).send('OK'); + }); + ``` + + + + For reliable processing, queue webhooks: + + ```javascript + app.post('/webhook', async (req, res) => { + await queue.add('process-webhook', req.body); + res.status(200).send('OK'); + }); + + // Worker processes queue + queue.process('process-webhook', async (job) => { + await processWebhook(job.data); + }); + ``` + + + + Don't fail on unexpected events: + + ```javascript + app.post('/webhook', (req, res) => { + res.status(200).send('OK'); + + switch (req.body.event) { + case 'call.ended': + handleCallEnded(req.body); + break; + case 'recording.ready': + handleRecording(req.body); + break; + default: + // Ignore unknown events gracefully + console.log('Unknown event:', req.body.event); + } + }); + ``` + + + + Log for debugging: + + ```javascript + app.post('/webhook', (req, res) => { + console.log('Webhook received:', { + event: req.body.event, + call_id: req.body.call_id, + timestamp: req.body.timestamp + }); + + res.status(200).send('OK'); + + processWebhook(req.body).catch(err => { + console.error('Webhook processing error:', err); + }); + }); + ``` + + + +--- + +## Getting Help + +If you've tried everything: + +1. **Gather info:** + - Webhook URL + - Event types selected + - Recent delivery logs + - Your server logs + +2. **Check status page:** + - Are there known issues? + +3. **Contact support:** + - Include webhook ID + - Include failed delivery IDs + - Describe expected vs actual behavior + + + Get help from the Atoms team + diff --git a/fern/products/atoms/pages/platform/features/webhooks/use-cases.mdx b/fern/products/atoms/pages/platform/features/webhooks/use-cases.mdx new file mode 100644 index 0000000..fda7fe7 --- /dev/null +++ b/fern/products/atoms/pages/platform/features/webhooks/use-cases.mdx @@ -0,0 +1,244 @@ +--- +title: "Webhook Use Cases" +sidebarTitle: "Use Cases" +description: "Common patterns for using webhooks effectively." +--- + +Webhooks enable powerful integrations. Here are common patterns and how to implement them. + +--- + +## CRM Integration + +### Goal + +Automatically log calls and update contact records in your CRM. + +### Implementation + +**On `call.end`:** + +1. Receive webhook with transcript and outcome +2. Look up contact by phone number +3. Create/update contact record +4. Add call notes with summary +5. Create follow-up tasks if needed + +### Example Flow + +``` +call.end webhook received + ↓ +Parse caller_phone from payload + ↓ +Search CRM for matching contact + ↓ +If exists: Update record +If not: Create new contact + ↓ +Add activity: "Voice call - {duration}min - {disposition}" + ↓ +If follow_up_needed: Create task +``` + +--- + +## Real-Time Dashboard + +### Goal + +Show live call activity and metrics on a dashboard. + +### Implementation + +**On `call.start`:** +- Add to "active calls" count +- Show caller info in live feed + +**On `call.end`:** +- Remove from active calls +- Update completed calls count +- Recalculate metrics + +**On `analytics.completed`:** +- Update satisfaction scores +- Refresh aggregate statistics + +### Technology Options + +- WebSocket connection to frontend +- Push notifications +- Server-sent events +- Polling from dashboard + +--- + +## Email Follow-Up + +### Goal + +Send personalized follow-up emails based on call outcomes. + +### Implementation + +**On `analytics.completed`:** + +1. Receive metrics (satisfaction, resolution, etc.) +2. Determine email template: + - Satisfied + Resolved → "Thank you" email + - Unsatisfied → "We're sorry" + escalation + - Unresolved → "Additional resources" email +3. Queue or send email + +### Example Logic + +```javascript +if (metrics.satisfaction_score >= 4 && metrics.issue_resolved) { + sendEmail('thank_you_template', caller_email); +} else if (metrics.satisfaction_score <= 2) { + sendEmail('escalation_template', caller_email); + notifyManager(call_id); +} +``` + +--- + +## Ticket Creation + +### Goal + +Automatically create support tickets from calls. + +### Implementation + +**On `call.end`:** + +1. Check if issue was resolved +2. If unresolved, create ticket: + - Title: Issue type + - Description: Transcript summary + - Priority: Based on sentiment + - Customer: Caller info +3. Assign to appropriate queue + +--- + +## Data Warehouse + +### Goal + +Store all call data for analytics and reporting. + +### Implementation + +**On `call.end` and `analytics.completed`:** + +1. Receive payload +2. Transform to your schema +3. Insert into data warehouse +4. Available for BI tools + +### Data Points to Capture + +- Call metadata (time, duration, direction) +- Transcript (full or summarized) +- Outcome (disposition) +- Metrics (satisfaction, resolution) +- Variables (customer data collected) + +--- + +## Slack Notifications + +### Goal + +Alert team members about important call events. + +### Implementation + +**On `call.end`:** + +If certain conditions: +- Escalation requested +- Low satisfaction +- High-value customer + +Send Slack message with: +- Call summary +- Customer info +- Action needed + +### Example Triggers + +| Condition | Notification | +|-----------|--------------| +| Transfer requested | "#support - Transfer requested by [customer]" | +| Satisfaction ≤ 2 | "#alerts - Unhappy customer: [summary]" | +| VIP customer | "#vip - VIP call completed: [outcome]" | + +--- + +## Multi-System Sync + +### Goal + +Keep multiple systems updated with call data. + +### Implementation + +Your webhook endpoint fans out to multiple services: + +``` +Webhook received + ↓ + Process payload + ↓ + ├── Update CRM + ├── Update Analytics + ├── Update Helpdesk + └── Log to Database +``` + +Use async processing so one slow system doesn't block others. + +--- + +## Best Practices + +### Acknowledge Quickly + +Return 200 immediately, process asynchronously: + +```javascript +app.post('/webhook', (req, res) => { + res.status(200).send('OK'); // Acknowledge immediately + processAsync(req.body); // Process in background +}); +``` + +### Handle Failures + +What if your processing fails? +- Log the failure +- Queue for retry +- Alert if persistent + +### Idempotency + +Webhooks may be sent more than once. Handle duplicates: +- Track `call_id` to detect duplicates +- Use upsert operations +- Design for idempotency + +--- + +## What's Next + + + + Dynamic values in prompts + + + External API integration + + diff --git a/fern/products/atoms/pages/platform/features/webhooks/what-are-webhooks.mdx b/fern/products/atoms/pages/platform/features/webhooks/what-are-webhooks.mdx new file mode 100644 index 0000000..2b5593e --- /dev/null +++ b/fern/products/atoms/pages/platform/features/webhooks/what-are-webhooks.mdx @@ -0,0 +1,135 @@ +--- +title: "What Are Webhooks" +sidebarTitle: "What Are Webhooks" +description: "Automatically send call data to your systems when events occur." +--- + +Webhooks are HTTP callbacks that notify your systems when something happens in Atoms. When a call starts, ends, or analytics complete, Atoms sends data to your specified URL. + +--- + +## How Webhooks Work + +1. **Event occurs** — A call starts, ends, or analytics finish +2. **Atoms sends POST** — HTTP request to your endpoint +3. **Your system receives** — Payload with event data +4. **You process it** — Log, update CRM, trigger workflows + +--- + +## Why Use Webhooks + +### Real-Time Data + +Get call information instantly: +- Don't poll for updates +- React immediately +- Keep systems synchronized + +### Automation + +Trigger automatic workflows: +- Create CRM records +- Send follow-up emails +- Update dashboards +- Notify teams + +### Integration + +Connect Atoms to any system: +- Your CRM +- Analytics platforms +- Databases +- Third-party services +- Custom applications + +--- + +## Available Events + +| Event | When It Fires | Common Uses | +|-------|---------------|-------------| +| **Start** | Call begins | Start tracking, log initiation | +| **End** | Call concludes | Log transcript, update records | +| **Analytics Completed** | Analysis ready | Capture metrics, update scores | + +--- + +## What's in the Payload + +Each webhook includes relevant data: + +### Start Event + +- Caller phone number +- Agent ID +- Timestamp +- Call direction + +### End Event + +- Full transcript +- Call duration +- Disposition (outcome) +- Variables collected + +### Analytics Completed + +- Post-call metrics +- Sentiment analysis +- Extracted data +- Custom metrics + +→ See: [Event Payloads](/atoms/atoms-platform/features/webhooks) for detailed schemas + +--- + +## Example Use Cases + +### CRM Integration + +When call ends: +1. Webhook sends transcript and outcome +2. Your endpoint creates/updates contact record +3. Notes are added to the contact +4. Tasks are created for follow-up + +### Real-Time Dashboard + +When call starts: +1. Webhook notifies your system +2. Dashboard shows active call +3. Live stats update + +When call ends: +4. Dashboard updates completion stats +5. Metrics recalculate + +### Email Follow-Up + +When analytics complete: +1. Webhook sends satisfaction score +2. Your system evaluates score +3. Sends appropriate follow-up email + +--- + +## Getting Started + +1. **Create an endpoint** — URL that can receive POST requests +2. **Add to Atoms** — Configure in Webhooks settings +3. **Subscribe to events** — Choose which events trigger +4. **Test** — Make test calls and verify receipt + +--- + +## What's Next + + + + Set up your webhook endpoints + + + Understand data schemas + + diff --git a/fern/products/atoms/pages/platform/features/widget.mdx b/fern/products/atoms/pages/platform/features/widget.mdx new file mode 100644 index 0000000..b60d89a --- /dev/null +++ b/fern/products/atoms/pages/platform/features/widget.mdx @@ -0,0 +1,174 @@ +--- +title: "Widget" +sidebarTitle: "Widget" +description: "Embed your voice agent directly on your website." +--- + +The widget lets visitors talk to your agent without leaving your site — no phone call needed. They can either text or speak, right from their browser. + +**Location:** Left Sidebar → Widget + + + ![Widget configuration](../building-agents/images/widget-full.png) + + +--- + +## Embed Code + +At the top, you'll find your embed snippet. Copy this and paste it into your website's HTML. + +```html + + +``` + +That's it — the widget will appear on your site. Everything else on this page customizes how it looks and behaves. + +--- + +## Mode + +Choose how users interact with your widget: + +| Mode | Description | +|------|-------------| +| **Chat** | Users type messages. Good for quieter environments or when voice isn't practical. | +| **Voice** | Users speak directly. The full voice agent experience in the browser. | + +Pick based on your audience. Voice feels more natural for most support scenarios. Chat works better for quick questions or when users might be in public. + +--- + +## Allowlist + +By default, any website can embed your widget. That's convenient for testing, but risky for production. + + +**Set up an allowlist** before going live. Without one, anyone could embed your agent on their site, potentially running up your usage or misrepresenting your brand. + + +Click **+ Add host** to specify which domains can use your widget: + +``` +yourdomain.com +app.yourdomain.com +``` + +Only these sites will be able to load the widget. + +--- + +## Appearance + +### Variant + +How much screen space should the widget take? + + + + + ![Tiny widget](../building-agents/images/widget-tiny.png) + + + Minimal footprint. Just a small button that expands when clicked. Best when you want the widget available but unobtrusive. + + + + + ![Compact widget](../building-agents/images/widget-compact.png) + + + Balanced size. Visible enough to invite interaction, but doesn't dominate the page. Good default for most sites. + + + + + ![Full widget](../building-agents/images/widget-full-variant.png) + + + Prominent and hard to miss. Use when the widget is a primary way users should interact with your site. + + + +### Placement + +Where the widget button appears on screen. Currently supports **Bottom-right**. + + +The preview on this page always shows bottom-right. The placement you choose applies when embedded on your actual site. + + +--- + +## Theme + +Make the widget match your brand. + +| Setting | What it affects | Default | +|---------|-----------------|---------| +| **Widget Background Color** | Main widget background | `#ffffff` | +| **Brand Accent Color** | Buttons, highlights | `#2d9d9f` | +| **Agent Message Background** | Agent response bubbles | `#f3f4f6` | +| **Text on Accent Color** | Text on accent-colored elements | `#FFFFFF` | +| **Primary Text Color** | Main text | `#111827` | +| **Secondary Text Color** | Subtitles, hints | `#6b7280` | + +Click any color to open a picker, or paste a hex code directly. + +--- + +## Avatar + +Upload an image to represent your agent in the widget. This appears in the chat interface and helps humanize the experience. + +| Spec | Value | +|------|-------| +| **Recommended size** | 172px × 172px | +| **Maximum file size** | 2MB | +| **Format** | PNG, JPG | + +Drag and drop or click to upload. + +--- + +## Text Contents + +Customize the copy users see. + +| Field | What it controls | Default | +|-------|------------------|---------| +| **Start Button Text** | Button to begin conversation | "Start" | +| **End Button Text** | Button to end conversation | "End" | +| **Chat Placeholder** | Input field placeholder | "Type your message..." | +| **CTA Name** (optional) | Call-to-action text on widget button | "Talk to Atoms" | +| **Widget Name** (optional) | Name displayed in widget header | "Atoms" | + +Make these match your brand voice. "Chat with us" feels different than "Get help" or "Ask a question." + +--- + +## Terms & Conditions + +If you need users to agree to terms before using the widget, enable **Require consent**. + +When enabled, users see a checkbox they must tick before starting. This is useful for: +- GDPR compliance +- Recording consent +- Specific terms of service + +--- + +## Preview + +As you make changes, the widget preview updates in real-time in the bottom-right corner of the page. Test different settings before deploying. + +--- + +## Deploying + +Once configured, copy the embed code and add it to your website. For detailed installation instructions across different platforms: + + + Step-by-step deployment guide + diff --git a/fern/products/atoms/pages/platform/introduction/add-knowledge-base.mdx b/fern/products/atoms/pages/platform/introduction/add-knowledge-base.mdx new file mode 100644 index 0000000..0130bc7 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/add-knowledge-base.mdx @@ -0,0 +1,7 @@ +--- +title: "Add Knowledge Base" +--- + +# Add Knowledge Base + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/capabilities-overview.mdx b/fern/products/atoms/pages/platform/introduction/capabilities-overview.mdx new file mode 100644 index 0000000..894ea6d --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/capabilities-overview.mdx @@ -0,0 +1,7 @@ +--- +title: "Platform Capabilities Overview" +--- + +# Platform Capabilities Overview + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/core-concepts/agent-types.mdx b/fern/products/atoms/pages/platform/introduction/core-concepts/agent-types.mdx new file mode 100644 index 0000000..276b6bb --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/core-concepts/agent-types.mdx @@ -0,0 +1,104 @@ +--- +title: "Agent Types" +sidebarTitle: "Agent Types" +description: "Choose between Single Prompt and Conversational Flow agents." +--- + +Atoms offers two types of voice agents, each designed for different conversation styles. Choosing the right type is one of the first decisions you'll make when building an agent. + + + [IMAGE: Agent type selection modal showing both options with icons and descriptions] + + +--- + +## Two Agent Types + +| Type | Icon | Description | Best For | +|------|------|-------------|----------| +| **Single Prompt** | 📄 Document | Flexible, free-flowing conversations guided by one comprehensive prompt | FAQ, general support, open-ended discussions | +| **Conversational Flow** | 🔗 Connected nodes | Structured, goal-oriented conversations with visual workflow | Lead qualification, booking, multi-step processes | + +--- + +## Single Prompt Agents + +Single Prompt agents use one prompt to define their behavior. The AI interprets your instructions and handles conversations naturally, adapting to wherever the discussion goes. + +**Characteristics:** +- One main prompt defines everything +- Flexible conversation paths +- AI decides response approach +- Best for unpredictable conversations + +**Ideal for:** +- Customer FAQ bots +- General product information +- Basic support triage +- Creative or advisory interactions +- Simple information lookup + +**Not ideal for:** +- Strict multi-step workflows +- Lead qualification with specific criteria +- Processes requiring exact sequences + +--- + +## Conversational Flow Agents + +Conversational Flow agents use a visual workflow builder. You design each step as a node, connect them with branches, and control exactly how conversations progress. + +**Characteristics:** +- Visual drag-and-drop builder +- Defined conversation paths +- You control each step +- Best for predictable, goal-oriented conversations + +**Ideal for:** +- Lead qualification workflows +- Appointment scheduling +- Insurance or loan intake +- Support ticket routing +- Compliance-required conversations + +**Not ideal for:** +- Completely open-ended conversations +- Simple FAQ (overkill) +- When flexibility is more important than structure + +--- + +## Quick Decision Guide + +| If you need... | Choose | +|----------------|--------| +| Flexible, natural conversations | Single Prompt | +| Specific conversation paths | Conversational Flow | +| Quick setup | Single Prompt | +| Detailed control | Conversational Flow | +| FAQ handling | Single Prompt | +| Multi-step data collection | Conversational Flow | +| Lead scoring | Conversational Flow | +| General support | Single Prompt | + +--- + +## Important Note + +**Agent type is separate from creation method.** + +When you click "Create Agent," you first choose a creation method (Scratch, Template, or AI-assisted), then choose your agent type. The creation method is just how you start — the agent type is what you build. + +--- + +## What's Next + + + + Learn essential vocabulary + + + Build your first agent now + + diff --git a/fern/products/atoms/pages/platform/introduction/core-concepts/how-voice-ai-works.mdx b/fern/products/atoms/pages/platform/introduction/core-concepts/how-voice-ai-works.mdx new file mode 100644 index 0000000..041e986 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/core-concepts/how-voice-ai-works.mdx @@ -0,0 +1,103 @@ +--- +title: "How Voice AI Works" +sidebarTitle: "How Voice AI Works" +description: "Understand the technology behind real-time voice conversations with AI." +--- + +Voice AI agents seem like magic — you speak, they understand, they respond naturally. But under the hood, there's a sophisticated pipeline making it all happen in real-time. Understanding this helps you build better agents. + +--- + +## The Voice AI Pipeline + +Every voice conversation flows through three core stages: + +``` +🎤 Speech-to-Text → 🧠 LLM Processing → 🔊 Text-to-Speech +``` + +### 1. Speech-to-Text (STT) + +When a caller speaks, their audio is captured and converted to text. This happens in real-time, word by word, so the system can start processing before the caller finishes speaking. + +**Key factors:** +- Accuracy of transcription +- Handling accents and background noise +- Speed of processing + +### 2. LLM Processing + +The transcribed text goes to a language model that: +- Understands the intent behind words +- Considers conversation context +- Generates an appropriate response +- Follows your configured prompts and rules + +This is where your agent's "brain" lives — the prompts you write, the knowledge base you attach, and the logic you define. + +### 3. Text-to-Speech (TTS) + +The model's text response is converted back to natural-sounding speech. Modern TTS systems produce remarkably human-like voices with appropriate intonation, pacing, and emotion. + +--- + +## Why Latency Matters + +In voice conversations, **latency** is everything. Humans expect responses within 200-400ms — any longer feels unnatural. + +Traditional large language models struggle here: +- Processing takes 1-2+ seconds +- Users experience awkward pauses +- Conversations feel robotic + +**Atoms solves this with Small Language Models:** +- 100ms streaming responses +- Start speaking before processing completes +- Natural conversation rhythm + +--- + +## Real-Time Considerations + +Building voice AI is different from chatbots: + +| Chatbots | Voice AI | +|----------|----------| +| User can re-read responses | One-time delivery | +| Can send long paragraphs | Must be concise | +| User types carefully | User speaks naturally (with ums, interruptions) | +| Response time less critical | Every millisecond matters | +| Can show buttons/links | Voice-only interface | + +**Implications for your agents:** +- Keep responses short (under 30 words ideal) +- Anticipate interruptions +- Design for ears, not eyes +- Handle "um", "uh", and restarts gracefully + +--- + +## How Atoms Handles It + +Atoms manages the entire pipeline so you can focus on what your agent should say: + +1. **Telephony Integration** — Connects to phone networks +2. **Real-time STT** — Fast, accurate transcription +3. **Optimized LLM** — Small models, big speed +4. **Quality TTS** — Natural-sounding voices +5. **Turn Management** — Handles interruptions and pauses + +You configure the conversation logic. We handle the infrastructure. + +--- + +## What's Next + + + + Choose the right agent type for your use case + + + Learn the vocabulary of voice AI + + diff --git a/fern/products/atoms/pages/platform/introduction/core-concepts/key-terms.mdx b/fern/products/atoms/pages/platform/introduction/core-concepts/key-terms.mdx new file mode 100644 index 0000000..27462bf --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/core-concepts/key-terms.mdx @@ -0,0 +1,70 @@ +--- +title: "Key Terms" +sidebarTitle: "Key Terms" +description: "A glossary of essential terms used throughout Atoms." +--- + +This glossary defines the key terms you'll encounter when building voice AI agents on Atoms. Bookmark this page for quick reference. + +--- + +## Glossary + +| Term | Definition | +|------|------------| +| **Agent** | An AI-powered voice assistant built on Atoms. Agents handle phone calls or web conversations based on your configuration. | +| **Single Prompt** | An agent type using one comprehensive prompt for flexible, free-flowing conversations. | +| **Conversational Flow** | An agent type using a visual workflow for structured, goal-oriented conversations with defined paths. | +| **Node** | A single step in a Conversational Flow workflow. Types include Prompt, Transfer Call, API Call, End Call, Pre-Call API, and Post-Call API. | +| **Branch** | A connection between nodes in a Conversational Flow that determines the next step based on conditions. | +| **Cold Transfer** | A call transfer where the receiving party is connected immediately without any context or briefing. | +| **Warm Transfer** | A call transfer where the AI provides context to the receiving party before connecting the caller. | +| **Whisper Message** | A message spoken only to the transfer recipient (not heard by the caller) during warm transfer. | +| **Knowledge Base** | A repository of documents, FAQs, and information that agents can reference during conversations. | +| **Webhook** | An HTTP callback that sends data to your systems when specific events occur (call start, call end, etc.). | +| **PII Redaction** | Automatic removal or masking of personally identifiable information from transcripts and logs. | +| **Campaign** | An outbound calling program that contacts a list of phone numbers using your agent. | +| **Audience** | A contact list (phone numbers and optional data) used for outbound campaigns. | +| **Disposition** | The outcome category of a call (e.g., successful, unsuccessful, transferred). | +| **Post-Call Metrics** | Analytics and data extracted from completed calls (satisfaction scores, issue types, etc.). | +| **STT (Speech-to-Text)** | Technology that converts spoken audio into text for processing. | +| **TTS (Text-to-Speech)** | Technology that converts text responses into spoken audio. | +| **LLM (Large Language Model)** | The AI model that understands intent and generates responses. | +| **SLM (Small Language Model)** | Atoms' optimized models that deliver fast, efficient voice AI with lower latency and cost. | +| **Latency** | The delay between when a user finishes speaking and when the agent starts responding. | +| **Turn Detection** | The system that determines when a speaker has finished their turn and it's time to respond. | +| **Interruption** | When a caller speaks while the agent is talking, requiring the agent to stop and listen. | + +--- + +## Related Concepts + +### Call Types + +| Type | Description | +|------|-------------| +| **Inbound** | Calls initiated by customers to your agent's phone number | +| **Outbound** | Calls initiated by your agent to customer phone numbers (via campaigns) | +| **Web Call** | Voice conversation through a browser widget | +| **Telephony** | Traditional phone network calls | + +### Agent States + +| State | Description | +|-------|-------------| +| **Draft** | Agent is being edited and not deployed | +| **Locked** | Agent is protected from edits | +| **Live** | Agent is deployed and handling calls | + +--- + +## What's Next + + + + Set up your Atoms account + + + Start building immediately + + diff --git a/fern/products/atoms/pages/platform/introduction/create-account.mdx b/fern/products/atoms/pages/platform/introduction/create-account.mdx new file mode 100644 index 0000000..5e69f5d --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/create-account.mdx @@ -0,0 +1,7 @@ +--- +title: "Create Your Account" +--- + +# Create Your Account + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/first-agent.mdx b/fern/products/atoms/pages/platform/introduction/first-agent.mdx new file mode 100644 index 0000000..ac11ee6 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/first-agent.mdx @@ -0,0 +1,7 @@ +--- +title: "Your First Agent" +--- + +# Your First Agent + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/how-voice-ai-works.mdx b/fern/products/atoms/pages/platform/introduction/how-voice-ai-works.mdx new file mode 100644 index 0000000..97077fd --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/how-voice-ai-works.mdx @@ -0,0 +1,7 @@ +--- +title: "How Voice AI Works" +--- + +# How Voice AI Works + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/intro.mdx b/fern/products/atoms/pages/platform/introduction/intro.mdx new file mode 100644 index 0000000..4018cc1 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/intro.mdx @@ -0,0 +1,104 @@ +--- +title: "Atoms Platform Overview" +sidebarTitle: "Platform Overview" +description: "The visual workspace for building production-ready voice AI agents." +--- + +Atoms Platform is where you build, test, and deploy voice AI agents — without writing code. Design conversations visually, connect your data, pick a voice, and go live. The platform handles real-time speech processing, AI reasoning, and phone infrastructure. + +--- + +## Get Started + + + Create a working voice agent in 5 minutes — no code required. + + +--- + +## Agents + +Two ways to build, depending on your use case. + + + + One prompt powers the entire conversation. Flexible, adaptive, handles unexpected turns. Best for support, FAQ, advisory. + + + Deprecated + + Visual workflow with nodes and branches. Structured paths, predictable outcomes. Best for qualification, booking, intake. + + + +--- + +## Features + + + + Upload docs, PDFs, URLs. Agents search and reference your content automatically. + + + Push call events to your systems in real-time — starts, ends, analytics. + + + Embed voice or chat on your website with one line of code. + + + Connect Salesforce, Zendesk, HubSpot. Sync data automatically. + + + Extract structured data from calls — disposition, satisfaction, custom fields. + + + Dynamic values for personalization — caller info, API data, custom values. + + + +--- + +## Deployment + +Get your agents on real phone lines and run outbound campaigns. + + + + Rent numbers in 40+ countries or bring your own via SIP. + + + Upload and manage contact lists for outbound calling. + + + Schedule outbound calls at scale. Track results in real-time. + + + +--- + +## Analytics + +Monitor performance and review every conversation. + + + + Call volume, duration, outcomes — see what's working. + + + Full transcripts, recordings, and call details. + + + Web call, telephony, and chat testing before you go live. + + + Protect production agents from accidental edits. + + + +--- + +## Reference + + + Every term and concept, linked + \ No newline at end of file diff --git a/fern/products/atoms/pages/platform/introduction/make-it-live.mdx b/fern/products/atoms/pages/platform/introduction/make-it-live.mdx new file mode 100644 index 0000000..87b606a --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/make-it-live.mdx @@ -0,0 +1,7 @@ +--- +title: "Make It Live" +--- + +# Make It Live + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/next-steps.mdx b/fern/products/atoms/pages/platform/introduction/next-steps.mdx new file mode 100644 index 0000000..274ddaa --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/next-steps.mdx @@ -0,0 +1,7 @@ +--- +title: "Next Steps" +--- + +# Next Steps + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/platform-overview.mdx b/fern/products/atoms/pages/platform/introduction/platform-overview.mdx new file mode 100644 index 0000000..a39bbe2 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/platform-overview.mdx @@ -0,0 +1,90 @@ +--- +title: "Platform Overview" +sidebarTitle: "Platform Overview" +description: "A visual tour of the Atoms dashboard and its key features." +--- + +When you log into Atoms, you land on the main dashboard — your command center for building and managing voice AI agents. This page walks you through every section so you know exactly where to find what you need. + + + [IMAGE: Full dashboard with sidebar, agents list, and Create Agent button annotated] + + +--- + +## Dashboard Layout + +The dashboard is organized into three main areas: + +| Area | Location | Description | +|------|----------|-------------| +| **Header** | Top | Atoms logo and workspace selector | +| **Left Sidebar** | Left | Navigation to all platform sections | +| **Main Content** | Center | Agents list and primary workspace | + +--- + +## Left Sidebar Navigation + +The sidebar gives you access to every feature in Atoms: + +| Section | Items | Purpose | +|---------|-------|---------| +| **Our Models** | Speech-to-Text, Text-to-Speech | Access underlying AI models | +| **Build** | Agents, Knowledge Base | Create and configure agents | +| **Deploy** | Phone Numbers, Campaigns, Audiences | Launch agents to production | +| **Observe** | Analytics | Monitor performance | +| **Plan Info** | Plan badge, Balance, Manage Plan | Account and billing | +| **User Account** | Email (bottom) | Account settings | + +--- + +## Agents List Page + +The main content area shows your agents: + +**Header Controls:** +- **"Agents"** title with "How it works" help link +- **Sort by** dropdown — organize your list +- **"All"** filter dropdown — filter by agent type +- **Search bar** — "Search agents..." +- **"Create Agent"** button (green, top right) — start building + +**Agents Table:** + +| Column | Description | +|--------|-------------| +| **Agent** | Name + ID (e.g., "Campaign Agent" / "#7BB090") | +| **Created on** | When the agent was created | +| **Type** | Badge showing "Conversational Flow" or "Single Prompt" | +| **Total calls** | Number of calls handled | +| **Last modified** | Most recent edit timestamp | +| **Actions** | Three-dot menu for options | + +--- + +## Key Actions + +From the dashboard, you can: + +| Action | How | +|--------|-----| +| Create new agent | Click green "Create Agent" button | +| Edit existing agent | Click agent row | +| Search agents | Use search bar | +| Filter by type | Use "All" dropdown | +| Sort list | Use "Sort by" dropdown | +| Access settings | Click agent's three-dot menu | + +--- + +## What's Next + + + + Understand Single Prompt vs Conversational Flow + + + Create and test your first agent + + diff --git a/fern/products/atoms/pages/platform/introduction/quick-ref.mdx b/fern/products/atoms/pages/platform/introduction/quick-ref.mdx new file mode 100644 index 0000000..4e4cf0c --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/quick-ref.mdx @@ -0,0 +1,122 @@ +--- +title: "Quick Reference" +sidebarTitle: "Quick Reference" +description: "Every term, linked to where you need to go." +--- + +A glossary of everything in Atoms, with links to learn more. + +--- + +## Agent Types + +| Term | What It Is | Learn More | +|------|------------|------------| +| **Single Prompt** | Agent powered by one prompt for flexible conversations | [Overview →](/atoms/atoms-platform/single-prompt-agents/overview) | +| **Conversational Flow** | Agent built with visual workflows for structured conversations | [Overview →](/atoms/atoms-platform/conversational-flow-agents/overview) | + +--- + +## Building Agents + +| Term | What It Is | Learn More | +|------|------------|------------| +| **Prompt** | Instructions that define your agent's behavior | [Prompt Editor →](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts) | +| **Node** | A single step in a Conversational Flow | [Node Types →](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/node-types) | +| **Branch** | Connection between nodes based on conditions | [Conditions →](/atoms/atoms-platform/conversational-flow-agents/workflow-tab/conditions) | +| **Model** | The AI that powers your agent (GPT-4o, Electron, etc.) | [Model Selection →](/atoms/atoms-platform/single-prompt-agents/prompt-section/model-selection) | +| **Voice** | How your agent sounds | [Voice Selection →](/atoms/atoms-platform/single-prompt-agents/prompt-section/voice-selection) | + +--- + +## Configuration + +| Term | What It Is | Learn More | +|------|------------|------------| +| **End Call** | When and how the agent terminates calls | [End Call →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/end-call) | +| **Transfer Call** | Handing off to a human agent | [Transfer Call →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/transfer-call) | +| **Knowledge Base** | Documents your agent can search | [Knowledge Base →](/atoms/atoms-platform/features/knowledge-base) | +| **Variables** | Dynamic values used in prompts | [Variables →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/variables) | +| **API Calls** | External requests during conversations | [API Calls →](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) | + +--- + +## Agent Settings + +| Term | What It Is | Learn More | +|------|------------|------------| +| **Voice Settings** | Speech speed, pronunciation, turn detection | [Voice Settings →](/atoms/atoms-platform/single-prompt-agents/agent-settings/voice-settings) | +| **Model Settings** | Language switching, speech formatting | [Model Settings →](/atoms/atoms-platform/single-prompt-agents/agent-settings/model-settings) | +| **Webhooks** | Send call data to your systems | [Webhooks →](/atoms/atoms-platform/features/webhooks) | +| **General Settings** | Timeout behavior | [General Settings →](/atoms/atoms-platform/single-prompt-agents/agent-settings/general-settings) | + +--- + +## Features + +| Term | What It Is | Learn More | +|------|------------|------------| +| **Widget** | Embeddable voice/chat for your website | [Widget →](/atoms/atoms-platform/features/widget) | +| **Integrations** | Connect Salesforce, Zendesk, etc. | [Integrations →](/atoms/atoms-platform/features/integrations) | +| **Post-Call Metrics** | Extract data from completed calls | [Post-Call Metrics →](/atoms/atoms-platform/features/post-call-metrics) | + +--- + +## Deployment + +| Term | What It Is | Learn More | +|------|------------|------------| +| **Phone Numbers** | Numbers your agent can receive/make calls on | [Phone Numbers →](/atoms/atoms-platform/deployment/phone-numbers) | +| **Campaigns** | Outbound calling programs | [Campaigns →](/atoms/atoms-platform/deployment/campaigns) | +| **Audiences** | Contact lists for campaigns | [Audiences →](/atoms/atoms-platform/deployment/audiences) | + +--- + +## Call Handling + +| Term | What It Is | +|------|------------| +| **Inbound** | Calls initiated by customers to your agent | +| **Outbound** | Calls initiated by your agent (via campaigns) | +| **Cold Transfer** | Transfer without briefing the recipient | +| **Warm Transfer** | Transfer with context provided to recipient | +| **Whisper Message** | Message only the transfer recipient hears | +| **PII Redaction** | Automatic removal of personal info from transcripts | + +--- + +## Analytics & Logs + +| Term | What It Is | Learn More | +|------|------------|------------| +| **Conversation Logs** | Full transcripts and call details | [Conversation Logs →](/atoms/atoms-platform/analytics-logs/conversation-logs) | +| **Testing** | Web call, telephony, and chat testing | [Testing →](/atoms/atoms-platform/analytics-logs/testing) | +| **Locking** | Protect agents from accidental edits | [Locking →](/atoms/atoms-platform/analytics-logs/locking) | +| **Analytics** | Call metrics and performance data | [Analytics →](/atoms/atoms-platform/analytics-logs/overview) | + +--- + +## Technical + +| Term | What It Is | +|------|------------| +| **STT** | Speech-to-Text — converts audio to text | +| **TTS** | Text-to-Speech — converts text to audio | +| **LLM** | Large Language Model — the AI brain | +| **SLM** | Small Language Model — Atoms' optimized models | +| **Latency** | Delay between user speaking and agent responding | + +--- + +## Next + + + + Build flexible, adaptive agents + + + Deprecated + + Build structured, visual workflows + + diff --git a/fern/products/atoms/pages/platform/introduction/quick-start.mdx b/fern/products/atoms/pages/platform/introduction/quick-start.mdx new file mode 100644 index 0000000..fd969bc --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/quick-start.mdx @@ -0,0 +1,156 @@ +--- +title: "Build Your First Agent" +sidebarTitle: "Build Your First Agent" +description: "Create and test a working voice agent in 5 minutes." +--- + +Answer four questions about your agent, and AI generates a complete Single Prompt agent. You can refine and test it before deploying. + +--- + +## Step 1: Sign in to the platform + +Sign in at [**app.smallest.ai**](https://app.smallest.ai/login?utm_source=documentation&utm_medium=docs). If you don't have an account yet, create one from the same page. + + + Sign up to atom voice agent platform + + +Once you're in, you'll see your dashboard. + + + Welcome screen for atom voice agent platform + + +--- + +## Step 2: Open Create Agent + +From your dashboard, click the green **Create Agent** button in the top right. + + + Create Agent modal with creation options + + +--- + +## Step 3: Choose Create with AI + +In the modal, select the **Create with AI** option (the third option). Then choose **Single Prompt** as your agent type. + + + Create with AI modal + + +--- + +## Step 4: Configure Your Agent (Left Panel) + +Before writing your prompts, set the basics in the left panel: + +| Field | What to choose | +|-------|----------------| +| **Agent Type** | Single Prompt (already selected) | +| **Call Direction** | **Inbound** if customers call in, **Outbound** if the agent makes calls | +| **Emotive Model** | Toggle on for more expressive voice (Beta), or leave off | +| **Voice** | Pick a voice from the library — use the preview to listen | +| **Knowledge Base** | Optionally attach an existing KB so the agent can use your docs/FAQs | + +--- + +## Step 5: Fill the Four Prompts (Right Panel) + +Describe your agent in four short prompts. Each needs at least 50 characters. The AI uses these to generate the full agent. + +**1. Role & Objective** — Who is this agent and what's their goal? +Example: *"You are Sam, a friendly support agent for TechStore. Your goal is to help customers with orders, returns, and product questions."* + +**2. Conversational Flow** — What steps should the agent follow? +Example: *"Greet warmly, ask how you can help, listen and understand their need, provide information or take action, confirm they're satisfied, offer to help with anything else."* + +**3. Dos, Don'ts & Fallbacks** — How should the agent behave in tricky situations? +Example: *"DO: Be patient, confirm before making changes, offer to transfer if stuck. DON'T: Share other customers' info or make promises you can't keep. If you don't know: say so and offer to find out or transfer."* + +**4. End Conditions** — When should the call end? +Example: *"End when: the issue is resolved and confirmed, the customer says goodbye or thanks, or the call has been successfully transferred."* + + +Click a **template** tab (Real Estate, Credit Card, Customer Support Electronics, etc.) to pre-fill all four prompts, then edit as needed. + + +--- + +## Step 6: Create the Agent + +Click **Create agent** in the top right. Atoms will generate your agent (about 30 seconds). When you see the success message, click **Go to Agent** to open the editor. + + + Single Prompt editor + + +Your prompt, voice, model, and Knowledge Base (if you added one) are already configured. Refine the prompt text if you like. + +--- + +## Step 7: Test Your Agent + +Click **Test Agent** in the top-right to start a test call. + +You can test your agent in three ways: + +- **Web Call** — talk to your agent through your browser microphone +- **Telephony Call** — enter a phone number and get a call from your agent +- **Chat** — text-based conversation with your agent + + + Test your agent via Web Call, Telephony, or Chat + + +Talk through a few scenarios: + +- Ask a normal question +- Ask something unexpected +- Interrupt mid-response + +Listen for clarity and that the agent follows your guidelines. + +--- + +## You Built a Single Prompt Agent + +You now have a working **Single Prompt agent**. Here's what happens when someone calls: + +1. **Pulse** transcribes their speech in 64ms +2. **Your prompt** tells the AI how to respond +3. **Lightning** speaks the response in 175ms + +Total: under 800ms per turn. + +### Understand What You Built + + + How one prompt powers an entire conversation — and when to use it vs. Conversational Flow. + + +### Customize Your Agent + + + + Structure and improve your agent's instructions + + + Ground responses in your actual docs and data + + + Get a real phone number and go live + + + Voice, model, language, and behavior settings + + + +### Need Help? + + + Ask questions, share what you're building, and get help from the community. + \ No newline at end of file diff --git a/fern/products/atoms/pages/platform/introduction/quick-start/create-account.mdx b/fern/products/atoms/pages/platform/introduction/quick-start/create-account.mdx new file mode 100644 index 0000000..6b017e8 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/quick-start/create-account.mdx @@ -0,0 +1,60 @@ +--- +title: "Creating Your Account" +sidebarTitle: "Creating Your Account" +description: "Set up your Atoms account and get ready to build voice AI agents." +--- + +Before you can build voice AI agents, you need an Atoms account. This page walks you through the signup process. + +--- + +## Getting Started + +→ **NEEDS PLATFORM INFO:** Account creation steps, signup flow, and screenshots + +--- + +## Account Setup Steps + +1. **Visit the Atoms Platform** + - Navigate to the Atoms signup page + - → NEEDS PLATFORM INFO: Exact URL + +2. **Create Your Account** + - Enter your email address + - Create a password + - → NEEDS PLATFORM INFO: Additional fields required + +3. **Verify Your Email** + - Check your inbox for verification email + - Click the verification link + - → NEEDS PLATFORM INFO: Verification process details + +4. **Complete Your Profile** + - → NEEDS PLATFORM INFO: Profile setup requirements + +5. **Choose Your Plan** + - → NEEDS PLATFORM INFO: Available plans and pricing + +--- + +## After Signup + +Once your account is created, you'll land on the main dashboard where you can: + +- Create your first agent +- Explore the platform features +- Set up your workspace + +--- + +## What's Next + + + + Build and test your first voice agent + + + Take a tour of the dashboard + + diff --git a/fern/products/atoms/pages/platform/introduction/quick-start/first-agent.mdx b/fern/products/atoms/pages/platform/introduction/quick-start/first-agent.mdx new file mode 100644 index 0000000..1a88c67 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/quick-start/first-agent.mdx @@ -0,0 +1,117 @@ +--- +title: "Your First Agent" +sidebarTitle: "Your First Agent" +description: "Build and test your first voice AI agent in under 10 minutes." +--- + +Let's create your first voice AI agent. In this quick walkthrough, you'll build a simple support assistant and have a real conversation with it. + +--- + +## Step 1: Start Creating + +From your dashboard, click the green **"Create Agent"** button in the top right corner. + +--- + +## Step 2: Choose Your Method + +A modal appears with three options: + +| Option | Icon | When to Use | +|--------|------|-------------| +| **Start from scratch** | + | Build manually from blank | +| **Start with Template** | Grid | Use pre-built templates | +| **Create with AI** | Sparkle | AI-assisted creation | + +For this quickstart, select **"Start from scratch"**. + +--- + +## Step 3: Select Agent Type + +Choose **"Single Prompt"** — it's the simplest way to get started. + +--- + +## Step 4: Write Your Prompt + +You're now in the agent editor. In the main prompt area, write: + +``` +You are a friendly assistant for [Your Company Name]. + +Help users with basic questions about our products and services. +Be polite, concise, and helpful. +If you don't know something, say so honestly. +Keep responses under 30 words when possible. +``` + +Replace `[Your Company Name]` with your actual company name. + +--- + +## Step 5: Select a Voice + +In the top bar, click the **Voice** dropdown and select a voice. We recommend **Tiffany** for a friendly, professional sound. + +Click the play button to preview before selecting. + +--- + +## Step 6: Test Your Agent + +Click **"Test Agent"** in the top right corner. + +A modal appears with three test options: + +| Mode | Description | +|------|-------------| +| **Telephony Call** | Get a real phone call | +| **Web Call** | Talk through your browser | +| **Chat** | Text-only testing | + +Select **"Web Call"** for the quickest test. + +--- + +## Step 7: Have a Conversation + +Your browser will connect to your agent. Try saying: + +- "Hi, can you tell me about your company?" +- "What services do you offer?" +- "Thanks for your help!" + +Listen to how the agent responds. This is your first voice AI agent! + +--- + +## Congratulations + +You've just created and tested your first voice AI agent. It's that simple. + +From here, you can: +- Improve your prompt +- Add a knowledge base +- Configure voice settings +- Deploy to a phone number + +--- + +## What's Next + + + + Explore what you can do next + + + Master prompt writing + + + Give your agent information to reference + + + Fine-tune voice settings + + diff --git a/fern/products/atoms/pages/platform/introduction/quick-start/next-steps.mdx b/fern/products/atoms/pages/platform/introduction/quick-start/next-steps.mdx new file mode 100644 index 0000000..6a77653 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/quick-start/next-steps.mdx @@ -0,0 +1,102 @@ +--- +title: "Next Steps" +sidebarTitle: "Next Steps" +description: "Where to go after creating your first agent." +--- + +You've created your first voice AI agent. Now let's explore what you can do to make it production-ready. + +--- + +## Enhance Your Agent + +### Configure Voice Settings + +Fine-tune how your agent sounds — adjust speech speed, add pronunciation rules, enable background sounds, and more. + + + Configure voice and speech settings + + +### Add a Knowledge Base + +Give your agent access to documents, FAQs, and product information so it can answer questions accurately. + + + Learn about Knowledge Base + + +### Set Up Call Handling + +Configure what happens when calls end or need to be transferred to human agents. + + + + Configure call termination + + + Set up call transfers + + + +--- + +## Deploy Your Agent + +### Get a Phone Number + +Acquire a phone number so customers can call your agent directly. + + + Get a phone number + + +### Embed on Your Website + +Add a voice widget to your website so visitors can talk to your agent. + + + Get embed code + + +### Run Outbound Campaigns + +Have your agent proactively call customers for reminders, follow-ups, or outreach. + + + Learn about campaigns + + +--- + +## Learn from Examples + +Explore our cookbooks for complete examples tailored to specific use cases: + + + + Build a support agent + + + Qualify leads automatically + + + Schedule appointments + + + Browse all examples + + + +--- + +## What's Next + + + + Deep dive into agent building + + + Master the agent editor + + diff --git a/fern/products/atoms/pages/platform/introduction/quick-tour.mdx b/fern/products/atoms/pages/platform/introduction/quick-tour.mdx new file mode 100644 index 0000000..cbf070c --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/quick-tour.mdx @@ -0,0 +1,7 @@ +--- +title: "Quick Tour" +--- + +# Quick Tour + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/understanding-agent-types.mdx b/fern/products/atoms/pages/platform/introduction/understanding-agent-types.mdx new file mode 100644 index 0000000..0046ba9 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/understanding-agent-types.mdx @@ -0,0 +1,7 @@ +--- +title: "Understanding Agent Types" +--- + +# Understanding Agent Types + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/voice-quality.mdx b/fern/products/atoms/pages/platform/introduction/voice-quality.mdx new file mode 100644 index 0000000..5127553 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/voice-quality.mdx @@ -0,0 +1,7 @@ +--- +title: "Voice & Quality" +--- + +# Voice & Quality + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/what-is-atoms-platform.mdx b/fern/products/atoms/pages/platform/introduction/what-is-atoms-platform.mdx new file mode 100644 index 0000000..7b4cae2 --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/what-is-atoms-platform.mdx @@ -0,0 +1,7 @@ +--- +title: "What is Atoms Platform?" +--- + +# What is Atoms Platform? + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/introduction/what-is-atoms.mdx b/fern/products/atoms/pages/platform/introduction/what-is-atoms.mdx new file mode 100644 index 0000000..3a006df --- /dev/null +++ b/fern/products/atoms/pages/platform/introduction/what-is-atoms.mdx @@ -0,0 +1,75 @@ +--- +title: "What is Atoms" +sidebarTitle: "What is Atoms" +description: "Build production-ready voice AI agents with the power of small language models." +--- + +Atoms is a voice AI agent builder platform by smallest.ai, designed to help businesses create, deploy, and manage intelligent voice assistants at scale. Whether you need a customer support bot, sales qualifier, or appointment scheduler, Atoms provides the tools to build voice experiences that feel natural and responsive. + +What makes Atoms different is its foundation on **Small Language Models (SLMs)** — purpose-built AI models that deliver enterprise-grade performance without the overhead of traditional large language models. + +--- + +## Why Small Language Models? + +Traditional large language models are powerful but come with significant drawbacks: high latency, expensive compute costs, and challenging deployment requirements. Atoms takes a different approach. + +### Small Models, Big Impact + +Smaller, more focused models enable easier integrations and faster fine-tuning cycles. You get the intelligence you need without the bloat. + +### Hyper Personalization + +Our SLMs actively learn from interactions, offering unmatched expertise tailored to your specific use case and customer base. + +### Minimal Latency + +With **sub-800ms end-to-end turn times**, your voice agents respond naturally without awkward pauses. Conversations feel human because they flow like human conversations. + +### On Edge Deployment + +Deploy anywhere — from mobile devices to enterprise clouds. Your agents work where you need them, not just where massive GPU clusters exist. + +### Low Cost + +Our compute is **10x cheaper** than traditional large models. No expensive GPU infrastructure required to run production workloads. + +--- + +## Who Is Atoms For? + +Atoms is built for businesses that need voice AI: + +- **Customer Support Teams** — Automate routine inquiries while maintaining quality +- **Sales Organizations** — Qualify leads and book meetings 24/7 +- **Healthcare Providers** — Handle intake, scheduling, and follow-ups +- **Financial Services** — Process inquiries with compliance built-in +- **Any Business** — That wants to scale voice interactions without scaling headcount + +--- + +## What Can You Build? + +With Atoms, you can create voice agents that: + +- Answer customer questions using your knowledge base +- Qualify leads and route to sales teams +- Book and manage appointments +- Collect survey responses and feedback +- Process payments and reminders +- Handle complex multi-step workflows + +The platform gives you two approaches: **Single Prompt** agents for flexible, open-ended conversations, and **Conversational Flow** agents for structured, goal-oriented interactions. + +--- + +## What's Next + + + + Take a visual tour of the Atoms dashboard + + + Build and test your first voice agent in 10 minutes + + diff --git a/fern/products/atoms/pages/platform/single-prompt/ai-assisted.mdx b/fern/products/atoms/pages/platform/single-prompt/ai-assisted.mdx new file mode 100644 index 0000000..cd7db4d --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/ai-assisted.mdx @@ -0,0 +1,118 @@ +--- +title: "Create with AI" +sidebarTitle: "Create with AI" +description: "Describe what you need, and AI generates a complete agent for you." +--- + +Don't want to start from scratch? Answer four questions about your agent, and AI generates a complete Single Prompt agent — ready for you to customize and test. + +--- + +## Step 1: Open Create Agent + +From your dashboard, click the green **Create Agent** button in the top right. + + + Create Agent modal with creation options + + +--- + +## Step 2: Choose Create with AI + +In the modal, select the **Create with AI** option (the third option). Then choose **Single Prompt** as your agent type. + + + Create with AI modal + + +--- + +## Step 3: Configure Your Agent (Left Panel) + +Before writing your prompts, set the basics in the left panel: + +| Field | What to choose | +|-------|----------------| +| **Agent Type** | Single Prompt (already selected) | +| **Call Direction** | **Inbound** if customers call in, **Outbound** if the agent makes calls | +| **Emotive Model** | Toggle on for more expressive voice (Beta), or leave off | +| **Voice** | Pick a voice from the library — use the preview to listen | +| **Knowledge Base** | Optionally attach an existing KB so the agent can use your docs/FAQs | + +--- + +## Step 4: Fill the Four Prompts (Right Panel) + +Describe your agent in four short prompts. Each needs at least 50 characters. The AI uses these to generate the full agent. + +**1. Role & Objective** — Who is this agent and what's their goal? +Example: *"You are Sam, a friendly support agent for TechStore. Your goal is to help customers with orders, returns, and product questions."* + +**2. Conversational Flow** — What steps should the agent follow? +Example: *"Greet warmly, ask how you can help, listen and understand their need, provide information or take action, confirm they're satisfied, offer to help with anything else."* + +**3. Dos, Don'ts & Fallbacks** — How should the agent behave in tricky situations? +Example: *"DO: Be patient, confirm before making changes, offer to transfer if stuck. DON'T: Share other customers' info or make promises you can't keep. If you don't know: say so and offer to find out or transfer."* + +**4. End Conditions** — When should the call end? +Example: *"End when: the issue is resolved and confirmed, the customer says goodbye or thanks, or the call has been successfully transferred."* + + +Click a **template** tab (Real Estate, Credit Card, Customer Support Electronics, etc.) to pre-fill all four prompts, then edit as needed. Use **Clear All** to reset and start fresh. + + +--- + +## Step 5: Create the Agent + +Click **Create agent** in the top right. Atoms will generate your agent (about 30 seconds). When you see the success message, click **Go to Agent** to open the editor. + + + Single Prompt editor + + +Your prompt, voice, model, and Knowledge Base (if you added one) are already configured. Refine the prompt text if you like. + +--- + +## Step 6: Test Your Agent + +Click **Test Agent** in the top-right to start a test call. + +You can test your agent in three ways: + +- **Web Call** — talk to your agent through your browser microphone +- **Telephony Call** — enter a phone number and get a call from your agent +- **Chat** — text-based conversation with your agent + + + Test your agent via Web Call, Telephony, or Chat + + +Talk through a few scenarios: + +- Ask a normal question +- Ask something unexpected +- Interrupt mid-response + +Listen for clarity and that the agent follows your guidelines. + +--- + +## What's Next + + + + Structure and improve your agent's instructions + + + Ground responses in your actual docs and data + + + Get a real phone number and go live + + + Voice, model, language, and behavior settings + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/api-calls.mdx b/fern/products/atoms/pages/platform/single-prompt/config/api-calls.mdx new file mode 100644 index 0000000..2ed5e6a --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/api-calls.mdx @@ -0,0 +1,158 @@ +--- +title: "API Calls" +sidebarTitle: "API Calls" +description: "Connect your agent to external services and data." +--- + +API Calls let your agent fetch and send data during conversations. Look up customer information, check order status, book appointments, create tickets — all in real-time while the conversation is happening. + +**Location:** Config Panel (right sidebar) → API Calls toggle + +--- + +## Setup + +1. Toggle **API Calls** ON +2. Click ⚙️ to open settings +3. Click **+ Add API Call** + +--- + +## Configuration + +The API Call modal has two main sections. + +### Basic Setup & API Configuration + + + ![API call basic setup](../../building-agents/images/api-call-basic.png) + + +| Field | Required | Description | +|-------|----------|-------------| +| **Name** | Yes | Identifier for this API call (e.g., `lookup_customer`) | +| **Description** | Yes | Tells the AI when to trigger this call | +| **LLM Parameters** | No | Parameters the LLM can populate dynamically | +| **URL** | Yes | The API endpoint | +| **Method** | Yes | GET, POST, PUT, or DELETE | +| **Timeout (ms)** | Yes | How long to wait before failing (default: 5000) | + +### Headers, Parameters & Response Extraction + + + ![API call advanced setup](../../building-agents/images/api-call-advanced.png) + + +| Field | Purpose | +|-------|---------| +| **Headers** | Request headers (Authorization, Content-Type, etc.) | +| **Query Parameters** | URL parameters for GET requests | +| **Response Variable Extraction** | Map response fields to variables you can use in prompts | + +--- + +## LLM Parameters + +These are values the AI determines during conversation and passes to your API. Click **+ Add Parameter** to define what the AI should collect. + +For example, if you need an order ID to look up status, add a parameter called `order_id` — the AI will extract it from the conversation and include it in the request. + +--- + +## Response Variable Extraction + +This is where the magic happens. Map fields from the API response to variables you can use in your prompts. + +Click **+ Add Variable** and specify: +- The path in the JSON response (e.g., `customer.name`) +- The variable name (e.g., `customer_name`) + +Then use `{{api.customer_name}}` in your prompt. + +--- + +## Example: Customer Lookup + + + + | Field | Value | + |-------|-------| + | **Name** | `lookup_customer` | + | **Description** | "Look up customer information when the call starts" | + | **URL** | `https://crm.example.com/api/customers` | + | **Method** | GET | + | **Query Parameters** | `phone: {{caller_phone}}` | + + + + | Response Path | Variable | + |---------------|----------| + | `customer.name` | `{{api.customer_name}}` | + | `customer.tier` | `{{api.tier}}` | + | `customer.last_order` | `{{api.last_order}}` | + + + + ``` + Hello {{api.customer_name}}! I see you're a {{api.tier}} + member. Your last order is {{api.last_order}}. + ``` + + + +--- + +## Using Variables in API Calls + +You can use variables anywhere in your API configuration: + +**In URL:** +``` +https://api.example.com/orders/{{collected.order_id}} +``` + +**In Headers:** +``` +Authorization: Bearer {{api_key}} +``` + +**In Query Parameters:** +``` +phone: {{caller_phone}} +``` + +--- + +## Tips + + + The description tells the AI when to make this call. Be specific: + + | Vague | Specific | + |-------|----------| + | "Get customer" | "Look up customer information using their phone number when the call starts" | + | "Check order" | "Fetch order status when the customer provides an order number" | + + + + APIs can fail. In your prompt, tell the agent what to do: + + "If customer data isn't available, greet generically and ask for their name. Don't mention that a lookup failed." + + + + Default is 5000ms (5 seconds). For slow APIs, increase this — but remember the caller is waiting. + + +--- + +## Related + + + + Use API response data in prompts + + + Send data after calls complete + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/end-call.mdx b/fern/products/atoms/pages/platform/single-prompt/config/end-call.mdx new file mode 100644 index 0000000..73419b8 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/end-call.mdx @@ -0,0 +1,116 @@ +--- +title: "End Call" +sidebarTitle: "End Call" +description: "Configure when and how your agent ends calls." +--- + +End Call tells your agent when to hang up. Without proper end conditions, calls may drag on awkwardly or never conclude. Define clear triggers so conversations wrap up naturally. + +**Location:** Config Panel (right sidebar) → End Call toggle + +--- + +## Setup + +1. Toggle **End Call** ON +2. Click the ⚙️ icon to open settings +3. Click **+ Add End Call** to create a function + +--- + +## Configuration + + + ![Add end call](../../building-agents/images/end-call-modal.png) + + +Each end call function has two fields: + +| Field | Purpose | Example | +|-------|---------|---------| +| **Name** | Identifier (used internally) | `end_call_resolved` | +| **Description** | Tells the AI when to trigger | "End the call when the customer confirms their issue is resolved" | + +The Description is critical — it's what the AI uses to decide when to end the call. Be specific. + +--- + +## Common End Call Functions + + + + | Field | Value | + |-------|-------| + | **Name** | `end_resolved` | + | **Description** | "End the call when the customer confirms their issue is completely resolved and they have no additional questions." | + + + + | Field | Value | + |-------|-------| + | **Name** | `end_goodbye` | + | **Description** | "End the call when the customer says goodbye, thank you, or indicates they're done." | + + + + | Field | Value | + |-------|-------| + | **Name** | `end_transferred` | + | **Description** | "End the AI portion of the call after successfully transferring to a human agent." | + + + + | Field | Value | + |-------|-------| + | **Name** | `end_no_response` | + | **Description** | "End the call if the caller doesn't respond after three attempts to re-engage them." | + + + +--- + +## Tips + + + | Vague | Specific | + |-------|----------| + | "When done" | "When customer confirms satisfaction" | + | "If goodbye" | "When customer says goodbye, thanks, or indicates they're finished" | + + + + Don't just handle the happy path: + + - Customer explicitly asks to end + - Customer becomes unresponsive after 3 prompts + - Customer requests callback instead + + + + If your main prompt says "end when resolved," your end call description should match that language. + + + + Think through every way a call might conclude: + + | Function | Scenario | + |----------|----------| + | `end_resolved` | Issue fixed, customer happy | + | `end_goodbye` | Customer says goodbye | + | `end_transfer` | After transferring | + | `end_no_response` | Caller stopped responding | + | `end_out_of_scope` | Can't help, directed elsewhere | + + +--- + +## Related + + + + Hand off to human agents + + + Configure idle timeouts + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/general-settings.mdx b/fern/products/atoms/pages/platform/single-prompt/config/general-settings.mdx new file mode 100644 index 0000000..aa531c1 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/general-settings.mdx @@ -0,0 +1,52 @@ +--- +title: "General Settings" +sidebarTitle: "General Settings" +description: "Configure first message and timeout behavior for your agent." +--- + +General Settings let you configure the agent's opening message and how long it waits before prompting idle callers. + +**Location:** Left Sidebar → Agent Settings → General tab + + + ![General settings](../../building-agents/images/general-settings.png) + + +--- + +## First Message + +Set a static first message that the agent speaks when a conversation begins. This is useful for campaigns at scale since it avoids generating the opening message via the LLM each time, reducing costs significantly. + +If left empty, the agent falls back to generating the first message from the LLM as usual. + + + You can use variables in the first message with double curly braces, e.g. `{{agent_name}}` or `{{company}}`. + + +--- + +## LLM Idle Timeout Settings + +Configure how long the agent waits for user response before sending an inactivity message. After 3 attempts with no response, the conversation automatically ends. + +| Setting | Default | Description | +|---------|---------|-------------| +| **Chat Timeout** | 60 sec | For text chat conversations | +| **Webcall Timeout** | 20 sec | For browser-based voice calls | +| **Telephony Timeout** | 20 sec | For phone calls | + +Each timeout triggers an inactivity prompt. If the user still doesn't respond after 3 prompts, the agent ends the conversation gracefully. + +--- + +## Related + + + + Speech speed and detection tuning + + + Configure call termination + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/knowledge-base.mdx b/fern/products/atoms/pages/platform/single-prompt/config/knowledge-base.mdx new file mode 100644 index 0000000..ae194b1 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/knowledge-base.mdx @@ -0,0 +1,30 @@ +--- +title: "Knowledge Base" +sidebarTitle: "Knowledge Base" +description: "Give your agent access to reference documents." +--- + +A Knowledge Base lets your agent search and retrieve information during conversations — product details, policies, FAQs, anything too detailed for the prompt itself. + +**Location:** Configuration Panel (right sidebar) → Knowledge Base toggle + +--- + +## Connecting a Knowledge Base + +1. Toggle **Knowledge Base** ON +2. Select a KB from the dropdown + +That's it. The agent automatically searches the KB when callers ask questions that need it. + + +You need to create a Knowledge Base first before you can attach it here. + + +--- + +## Learn More + + + Creating KBs, uploading documents, and best practices + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/language-selection.mdx b/fern/products/atoms/pages/platform/single-prompt/config/language-selection.mdx new file mode 100644 index 0000000..be37e0e --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/language-selection.mdx @@ -0,0 +1,36 @@ +--- +title: "Language Selection" +sidebarTitle: "Language Selection" +description: "Set the primary language for your agent." +--- + +This sets the language your agent speaks by default. Simple as that. + +**Location:** Agent Settings → Languages tab + + + ![Languages settings](../../building-agents/images/language-settings.png) + + +--- + +## How It Works + +Click the dropdown, pick your language. The options you see depend on which voice you've selected — different voices support different languages. If you don't see the language you need, try a different voice first. + + +**Need multi-language support?** If your agent should switch languages mid-conversation based on what the caller speaks, configure that in [Model Settings](/atoms/atoms-platform/single-prompt-agents/agent-settings/model-settings). + + +--- + +## Related Settings + + + + Pick your agent's voice + + + Language switching & fine-tuning + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/model-selection.mdx b/fern/products/atoms/pages/platform/single-prompt/config/model-selection.mdx new file mode 100644 index 0000000..50ec3dd --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/model-selection.mdx @@ -0,0 +1,50 @@ +--- +title: "Model Selection" +sidebarTitle: "Model Selection" +description: "Choose the right AI model for your agent." +--- + +The model is your agent's brain. It's what understands what callers say, figures out how to respond, and generates the words your agent speaks. Different models have different strengths — some are faster, some are more expressive, some are more affordable. + +**Location:** Prompt Section (top bar) → Model dropdown + + + ![Model dropdown](../../building-agents/images/model-dropdown.png) + + +--- + +## Available Models + +| Model | Provider | Type | Credit Usage | +|-------|----------|------|--------------| +| **Electron** | Smallest AI | Traditional | Lower | +| **GPT-4o** | OpenAI | Traditional | Moderate | +| **GPT-4.1** | OpenAI | Traditional | Moderate | +| **GPT Realtime** | OpenAI | Emotive | Higher | +| **GPT Realtime Mini** | OpenAI | Emotive | Higher | + +**Emotive models** have emotional awareness — they pick up on caller tone and respond with natural expression. + + +**Getting started?** GPT-4o is a great all-rounder — reliable, capable, and well-tested. + + + +**Try Electron.** It's our own model, built specifically for voice. Fast, affordable, and optimized for real-time conversations. + + +You can switch models anytime — nothing breaks when you do. + +--- + +## Related Settings + + + + Pick your agent's voice + + + Language switching & fine-tuning + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/model-settings.mdx b/fern/products/atoms/pages/platform/single-prompt/config/model-settings.mdx new file mode 100644 index 0000000..74f57fc --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/model-settings.mdx @@ -0,0 +1,71 @@ +--- +title: "Model Settings" +sidebarTitle: "Model Settings" +description: "Configure AI model, language, and speech behavior." +--- + +Model Settings control how the AI behaves — the model powering responses, language handling, and formatting preferences. + +**Location:** Left Sidebar → Agent Settings → Model tab + + + Model settings + + +--- + +## AI Model + +Choose the LLM powering your agent and its primary language. + +| Setting | Description | +|---------|-------------| +| **LLM Model** | The AI model (Electron, GPT-4o, etc.) | +| **Language** | Primary language for responses | + +You can also set the model in the [Prompt Section](/atoms/atoms-platform/single-prompt-agents/prompt-section/model-selection) dropdown at the top of the editor. + +--- + +## Speech Formatting + +When enabled (default: ON), the system automatically formats transcripts for readability — adding punctuation, paragraphs, and proper formatting for dates, times, and numbers. + +--- + +## Language Switching + +Enable your agent to switch languages mid-conversation based on what the caller speaks (default: ON). + +### Advanced Settings + +When Language Switching is enabled, you can fine-tune detection: + +| Setting | Range | Default | What it does | +|---------|-------|---------|--------------| +| **Minimum Words for Detection** | 1-10 | 2 | How many words before considering a switch | +| **Strong Signal Threshold** | 0-1 | 0.7 | Confidence level for immediate switch | +| **Weak Signal Threshold** | 0-1 | 0.3 | Confidence level for tentative detection | +| **Consecutive Weak Signals** | 1-8 | 2 | How many weak signals needed to switch | + +**Understanding the thresholds:** + +- **Strong Signal:** Very confident the caller switched → switches immediately +- **Weak Signal:** Somewhat confident → waits for more evidence +- **Higher thresholds** = More certain before switching (fewer false switches) +- **Lower thresholds** = Quicker to switch (more responsive) + +For most cases, the defaults work well. Adjust only if you're seeing unwanted switching behavior. + +--- + +## Related + + + + Speech speed, pronunciation, and detection + + + Where you write your agent's instructions + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/phone-number.mdx b/fern/products/atoms/pages/platform/single-prompt/config/phone-number.mdx new file mode 100644 index 0000000..1aa4a4a --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/phone-number.mdx @@ -0,0 +1,38 @@ +--- +title: "Phone Number" +sidebarTitle: "Phone Number" +description: "Assign a phone number to your agent." +--- + +The Phone Number tab lets you connect your agent to a phone number for inbound and outbound calls. Once assigned, callers to that number will reach this agent. + +**Location:** Left Sidebar → Agent Settings → Phone Number tab + + + ![Phone number settings](../../building-agents/images/phone-number-settings.png) + + +--- + +## Select Phone Numbers + +Click the dropdown to choose from your available phone numbers. If you haven't set up any numbers yet, you'll see "No phone numbers selected." + +You can assign multiple numbers to the same agent if needed. + + +You need to purchase or configure phone numbers before they appear here. See [Phone Numbers](/atoms/atoms-platform/deployment/phone-numbers) to get started. + + +--- + +## Related + + + + Get and manage phone numbers + + + Set up outbound calling + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/transfer-call.mdx b/fern/products/atoms/pages/platform/single-prompt/config/transfer-call.mdx new file mode 100644 index 0000000..d3b1e75 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/transfer-call.mdx @@ -0,0 +1,164 @@ +--- +title: "Transfer Call" +sidebarTitle: "Transfer Call" +description: "Configure call transfers to human agents." +--- + +Transfer Call lets your agent hand off conversations to humans when needed. You can do a cold transfer (immediate connection) or a warm transfer (AI briefs the human first). + +**Location:** Config Panel (right sidebar) → Transfer Call toggle + +--- + +## Setup + +1. Toggle **Transfer Call** ON +2. Click ⚙️ to open settings +3. Click **+ Add Transfer Call** + +--- + +## Configuration + + + ![Transfer call modal](../../building-agents/images/transfer-call-modal.png) + + +| Field | Required | Description | +|-------|----------|-------------| +| **Name** | Yes | Identifier (e.g., `transfer_to_sales`) | +| **Description** | Yes | When to trigger this transfer | +| **Transfer Number** | Yes | Destination with country code | +| **Type** | Yes | Cold or Warm | + +--- + +## Transfer Types + + + + Immediate handoff. The caller connects directly to the destination without any briefing. + + | Pros | Cons | + |------|------| + | Fast | No context for receiving agent | + | Simple | Caller may repeat themselves | + + **Best for:** Simple escalations, high call volume, when speed matters most. + + + + AI briefs the human first. The receiving agent gets context before the caller joins. + + | Pros | Cons | + |------|------| + | Human has context | Slightly longer | + | Better experience | More configuration | + + **Best for:** Complex issues, VIP callers, when continuity matters. + + + +--- + +## Warm Transfer Options + +When you select Warm Transfer, additional settings appear: + + + ![Warm transfer options](../../building-agents/images/transfer-call-warm.png) + + +### During Transfer + +| Setting | Purpose | +|---------|---------| +| **On-hold Music** | What the caller hears while waiting | + +### During Agent Connection + +| Setting | Description | +|---------|-------------| +| **Whisper Message** | Message the agent hears privately before connecting | +| **Handoff Message** | What the AI says to brief the agent (can be dynamic or static) | + +### After Connection + +| Setting | Description | +|---------|-------------| +| **Three-way Message** | Message both parties hear when connected | + +--- + +## Examples + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_sales` | + | **Description** | "Transfer when the caller expresses strong purchase intent and wants to speak with sales." | + | **Type** | Cold | + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_support` | + | **Description** | "Transfer when the issue requires manual intervention or the caller requests a human." | + | **Type** | Warm | + + + + | Field | Value | + |-------|-------| + | **Name** | `transfer_manager` | + | **Description** | "Transfer when the caller is upset and asks for a manager or supervisor." | + | **Type** | Warm | + + + +--- + +## Tips + + + | Vague | Specific | + |-------|----------| + | "When they ask for help" | "When the caller requests to speak with a human or the issue requires manual intervention" | + | "For sales" | "When the caller expresses strong purchase intent and wants to proceed" | + + + + Only the agent hears this — give them everything they need: + + "Incoming transfer: Customer John calling about a billing dispute. He's been charged twice for order #12345. Already verified his identity." + + + + Both parties hear this — use it for smooth handoffs: + + "I've connected you with Sarah from our billing team. Sarah, John is calling about a duplicate charge on order #12345." + + + + | Transfer | Destination | When | + |----------|-------------|------| + | `transfer_sales` | Sales team | Purchase interest | + | `transfer_support` | Support team | Technical issues | + | `transfer_billing` | Billing team | Payment questions | + | `transfer_manager` | Manager | Escalations | + + +--- + +## Related + + + + Configure call termination + + + Get notified on transfers + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/variables.mdx b/fern/products/atoms/pages/platform/single-prompt/config/variables.mdx new file mode 100644 index 0000000..277c16e --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/variables.mdx @@ -0,0 +1,154 @@ +--- +title: "Variables" +sidebarTitle: "Variables" +description: "Use dynamic values in your prompts and conversations." +--- + +Variables let you personalize conversations with dynamic data. Instead of static text, insert values that change based on the caller, API responses, or information collected during the call. + +**Location:** Config Panel (right sidebar) → Variables + +--- + +## Variable Types + + + + + ![User defined variables](../../building-agents/images/variables-user.png) + + + Variables you create with default values. + + | Example | Use Case | + |---------|----------| + | `{{company_name}}` | Your company name | + | `{{support_hours}}` | Operating hours | + | `{{promo_code}}` | Current promotion | + + + + + ![System variables](../../building-agents/images/variables-system.png) + + + Predefined variables provided by the platform. They are generated at runtime, read-only, and always available. + + | Variable | Description | + |----------|-------------| + | `{{caller_phone}}` | Caller's phone number | + | `{{call_time}}` | When call started | + | `{{call_duration}}` | Elapsed seconds | + | `{{call_direction}}` | "inbound" or "outbound" | + | `{{agent_id}}` | This agent's ID | + | `{{call_id}}` | Unique call identifier | + + + + + ![API variables](../../building-agents/images/variables-api.png) + + + Variables populated from API responses. + + | Syntax | Source | + |--------|--------| + | `{{api.customer_name}}` | API response field | + | `{{api.account_tier}}` | API response field | + + → See [API Calls](/atoms/atoms-platform/single-prompt-agents/configuration-panel/api-calls) + + + +--- + +## Syntax + +Use double curly braces: + +``` +{{variable_name}} +``` + +--- + +## Example + +**In your prompt:** + +``` +Hello {{customer_name}}! Thanks for calling {{company_name}}. + +I see you're a {{api.tier}} member. Your last order was +{{api.last_order_status}}. + +How can I help you today? +``` + +**At runtime:** + +``` +Hello Sarah! Thanks for calling Acme Corp. + +I see you're a Premium member. Your last order was +shipped on Monday. + +How can I help you today? +``` + +--- + +## Default Values + +Handle missing variables gracefully: + +``` +Hello {{customer_name|there}}! +``` + +If `customer_name` is empty → "Hello there!" + +--- + +## Creating User Variables + +1. Open the Variables panel +2. Go to **User Defined** tab +3. Click **+ Add Variable** +4. Enter name and default value +5. Use in prompts with `{{name}}` syntax + +--- + +## Best Practices + + + | Good | Bad | + |------|-----| + | `{{customer_first_name}}` | `{{n}}` | + | `{{appointment_date}}` | `{{d1}}` | + + + + What if the variable is empty? + + - Use default values: `{{name|there}}` + - Handle in prompt: "If customer name is unknown, greet generically" + + + + Verify variables replace correctly. Check Convo Logs to see actual values. + + +--- + +## Related + + + + Fetch data from external APIs + + + Use variables in your prompts + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/voice-selection.mdx b/fern/products/atoms/pages/platform/single-prompt/config/voice-selection.mdx new file mode 100644 index 0000000..fae1331 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/voice-selection.mdx @@ -0,0 +1,45 @@ +--- +title: "Voice Selection" +sidebarTitle: "Voice Selection" +description: "Choose and preview voices for your agent." +--- + +Your agent's voice is often the first thing callers notice. It sets the tone before a single word of your prompt matters. A voice that fits your brand builds immediate trust — one that doesn't can undermine even the best conversation design. + +**Location:** Prompt Section (top bar) → Voice dropdown + + + ![Voice picker](../../building-agents/images/voice-picker.png) + + +--- + +## Finding the Right Voice + +Click the Voice dropdown to open the picker. You'll see: + +| Section | What it does | +|---------|--------------| +| **Search** | Find voices by name or characteristic | +| **Filters** | Narrow by language, accent, age, gender, or model type | +| **Currently Used** | Your selected voice at the top | +| **All Voices** | The full library to browse | + +Take your time here. Filter by the language you need, then explore. Every voice has a personality — some are warm and reassuring, others confident and direct. + + +**Always preview.** Click the ▶️ button next to any voice to hear a sample. Listen with your prompt in mind — does this voice sound like the agent you've written? + + +--- + +## Related Settings + + + + Speed, pronunciation & turn-taking + + + Set the primary language + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/voice-settings.mdx b/fern/products/atoms/pages/platform/single-prompt/config/voice-settings.mdx new file mode 100644 index 0000000..8857af8 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/voice-settings.mdx @@ -0,0 +1,166 @@ +--- +title: "Voice Settings" +sidebarTitle: "Voice Settings" +description: "Fine-tune speech behavior, pronunciation, and voice detection." +--- + +Voice Settings give you precise control over how your agent sounds and listens. From speech speed to background ambiance, pronunciation rules to turn-taking — this is where you shape the audio experience. + +**Location:** Left Sidebar → Agent Settings → Voice tab + + + ![Voice settings](../../building-agents/images/voice-settings.png) + + +--- + +## Voice + +Select the voice for your agent. Click the dropdown to browse available voices — you can preview each one before selecting. + +--- + +## Speech Settings + +### Speech Speed + +Control how fast your agent speaks. + +| Control | Range | Default | +|---------|-------|---------| +| Slider | Slow ↔ Fast | 1 | + +Slide left for a more measured, deliberate pace. Slide right for quicker delivery. Find the sweet spot that matches your use case — slower often works better for complex information, faster for simple confirmations. + +--- + +## Pronunciation & Background + +### Pronunciation Dictionaries + +Add custom pronunciations for words that aren't pronounced correctly by the default voice. + +This is especially useful for: +- Brand names +- Technical terms +- Proper nouns +- Industry-specific jargon + +**To add a pronunciation:** Click **Add Pronunciation** to open the modal. + + + ![Add pronunciation](../../building-agents/images/add-pronunciation.png) + + +| Field | Description | +|-------|-------------| +| **Word** | The word as written | +| **Pronunciation** | How it should sound | + +### Background Sound + +Add ambient audio behind your agent's voice for a more natural feel. + +| Option | Description | +|--------|-------------| +| **None** | Silent background (default) | +| **Office** | Subtle office ambiance | +| **Call Center** | Busy call center sounds | +| **Static** | Light static noise | +| **Cafe** | Coffee shop atmosphere | + +--- + +## Advanced Voice Settings + +### Mute User Until First Bot Response + +When enabled, the user's audio is muted until the agent's first response is complete. Useful for preventing early interruptions during the greeting. + +### Voicemail Detection + +Detects when a call goes to voicemail instead of reaching a live person. + + +Voicemail detection may not work as expected if **Release Time** is less than 0.6 seconds. + + +### Personal Info Redaction (PII) + +Automatically redacts sensitive personal information from transcripts and logs. + +### Denoising + +Filters out background noise and improves voice clarity before processing. This helps reduce false detections caused by environmental sounds — useful when callers are in noisy environments. + +--- + +## Voice Detection + +Fine-tune how your agent recognizes when someone is speaking. + +### Confidence + +Defines how strict the system is when deciding if detected sound is speech. + +- **Higher values** → Less likely to trigger on background noise +- **Lower values** → More sensitive to quiet speech + +| Default | Range | +|---------|-------| +| 0.70 | 0 – 1 | + +### Min Volume + +The minimum volume level required to register as speech. + +| Default | Range | +|---------|-------| +| 0.60 | 0 – 1 | + +### Trigger Time (Seconds) + +How long the system waits after detecting the start of user speech (and after the bot has finished speaking) before processing. This helps avoid overlapping speech and false triggers. + +| Default | Range | +|---------|-------| +| 0.10 | 0 – 1 | + +### Release Time (Seconds) + +How long the system waits after the user stops speaking before the bot begins its response. This ensures the user has completely finished their thought. + +| Default | Range | +|---------|-------| +| 0.30 | 0 – 1+ | + + +**Start with defaults.** Only adjust these if you're experiencing specific issues like missed words or premature responses. + + +--- + +## Smart Turn Detection + +Intelligent detection of when the caller is done speaking. When enabled, the agent uses context and speech patterns — not just silence — to determine when it's time to respond. + +--- + +## Interruption Backoff Timer + +Time in seconds to prevent interruptions after the bot starts speaking (default: 0, disabled). + +This helps prevent conversation loops when the user and bot interrupt each other — the agent will wait this duration before allowing itself to be interrupted again. + +--- + +## Related + + + + Configure AI model and language behavior + + + Choose and preview voices + + diff --git a/fern/products/atoms/pages/platform/single-prompt/config/webhooks.mdx b/fern/products/atoms/pages/platform/single-prompt/config/webhooks.mdx new file mode 100644 index 0000000..982e0ed --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/config/webhooks.mdx @@ -0,0 +1,29 @@ +--- +title: "Webhooks" +sidebarTitle: "Webhooks" +description: "Connect your agent to webhook endpoints." +--- + +Webhooks push real-time data to your systems when call events happen — starts, ends, analytics ready. Use them to update CRMs, create tickets, trigger workflows, or feed analytics pipelines. + +**Location:** Agent Settings → Webhooks + +--- + +## Adding to Your Agent + +Once a webhook endpoint exists, connect it to your agent here. + + + ![Webhook in agent](../../building-agents/images/webhook-agent.png) + + +Select your webhook from the dropdown. The agent will now send events to that endpoint. + +--- + +## Next + + + Create endpoints, manage subscriptions, and view payload details + diff --git a/fern/products/atoms/pages/platform/single-prompt/from-template.mdx b/fern/products/atoms/pages/platform/single-prompt/from-template.mdx new file mode 100644 index 0000000..f828327 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/from-template.mdx @@ -0,0 +1,84 @@ +--- +title: "From Template" +sidebarTitle: "Start with Template" +description: "Jumpstart your Single Prompt agent with pre-built templates." +--- + +Templates give you a proven starting point. Pick one that matches your use case, customize it, and you're ready to go. + +--- + +## Step 1: Choose Start with Template + +From your dashboard, click the green **Create Agent** button in the top right, then select **Start with Template**. + + + Choose Start with Template from the Create Agent modal + + +--- + +## Step 2: Browse and Select a Template + +Browse the template gallery and pick one that matches your use case. Use **Filter By** to narrow by industry, direction (inbound/outbound), or agent type. Click any template to select it, then hit **Create**. + + + Browse and select a template that fits your use case + + +--- + +## Step 3: Customize Your Template + +The editor opens with everything pre-filled — prompt, voice, and structure ready to customize. + + + The editor opens with the template pre-configured + + +Templates are starting points. Always replace the placeholders with your specifics: + +- **Company name and details** — Replace `[Company]` with your actual business +- **Policies and rules** — Update return windows, hours, pricing, etc. +- **Tone adjustments** — Match the personality to your brand + + +**Keep the structure.** Templates are organized intentionally. Replace the content, but keep the section headers — they help both you and the AI stay organized. + + +--- + +## Step 4: Test Your Agent + +Click **Test Agent** in the top-right to start a test call. + +You can test your agent in three ways: + +- **Web Call** — talk to your agent through your browser microphone +- **Telephony Call** — enter a phone number and get a call from your agent +- **Chat** — text-based conversation with your agent + + + Test your agent via Web Call, Telephony, or Chat + + +Talk through a few scenarios — ask a normal question, ask something unexpected, and interrupt mid-response. Listen for clarity and that the agent follows your guidelines. + +--- + +## What's Next + + + + Structure and improve your agent's instructions + + + Ground responses in your actual docs and data + + + Get a real phone number and go live + + + Voice, model, language, and behavior settings + + diff --git a/fern/products/atoms/pages/platform/single-prompt/manual-setup.mdx b/fern/products/atoms/pages/platform/single-prompt/manual-setup.mdx new file mode 100644 index 0000000..e263e61 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/manual-setup.mdx @@ -0,0 +1,105 @@ +--- +title: "Manual Setup" +sidebarTitle: "Start from Scratch" +description: "Build a Single Prompt agent from a blank canvas." +--- + +Starting from scratch gives you complete control. You'll configure every detail yourself — model, voice, prompt, and behavior. + +--- + +## Step 1: Click Start From Scratch + +From your dashboard, click the green **Create Agent** button in the top right, then select **Start from Scratch**. + + + Start From Scratch button in Create Agent modal + + +--- + +## Step 2: Select Single Prompt + +Choose **Single Prompt** as your agent type. + + + Select Single Prompt agent type + + +--- + +## Step 3: The Editor Opens + +The editor opens with everything pre-filled — prompt, voice, and structure ready to customize. + + + The Single Prompt agent editor + + +--- + +## Step 4: Configure Your Agent (Left Panel) + +Before writing your prompt, set the basics in the left panel: + +| Field | What to choose | +|-------|----------------| +| **Agent Type** | Single Prompt (already selected) | +| **Call Direction** | **Inbound** if customers call in, **Outbound** if the agent makes calls | +| **Emotive Model** | Toggle on for more expressive voice (Beta), or leave off | +| **Voice** | Pick a voice from the library — use the preview to listen | +| **Knowledge Base** | Optionally attach an existing KB so the agent can use your docs/FAQs | + +--- + +## Step 5: Write Your Prompt + +The right panel is your prompt editor. This is the heart of your agent — it tells the AI exactly how to behave on every call. + +Write clear instructions covering: + +- **Role & Objective** — Who is this agent and what's their goal? +- **Conversational Flow** — What steps should the agent follow? +- **Dos, Don'ts & Fallbacks** — How should the agent behave in tricky situations? +- **End Conditions** — When should the call end? + + +**First time?** Start simple. Write a few sentences about who the agent is and what it should do. You can refine everything else as you go. + + +--- + +## Step 6: Test Your Agent + +Click **Test Agent** in the top-right to start a test call. + +You can test your agent in three ways: + +- **Web Call** — talk to your agent through your browser microphone +- **Telephony Call** — enter a phone number and get a call from your agent +- **Chat** — text-based conversation with your agent + + + Test your agent via Web Call, Telephony, or Chat + + +Talk through a few scenarios — ask a normal question, ask something unexpected, and interrupt mid-response. Listen for clarity and that the agent follows your guidelines. + +--- + +## What's Next + + + + Structure and improve your agent's instructions + + + Ground responses in your actual docs and data + + + Get a real phone number and go live + + + Voice, model, language, and behavior settings + + diff --git a/fern/products/atoms/pages/platform/single-prompt/overview.mdx b/fern/products/atoms/pages/platform/single-prompt/overview.mdx new file mode 100644 index 0000000..df531d2 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/overview.mdx @@ -0,0 +1,107 @@ +--- +title: "Single Prompt Agents" +sidebarTitle: "Overview" +description: "Build flexible, conversational agents powered by a single prompt." +--- + + +New to Atoms? Start with [Build Your First Agent](/atoms/atoms-platform/get-started/quick-start) — it creates a Single Prompt agent in 5 minutes. + + +A Single Prompt agent runs on one set of instructions. You write a prompt that defines who the agent is, what it knows, and how it should behave — and that prompt governs the entire conversation. The AI interprets your instructions and applies them dynamically, adapting to whatever direction the caller takes. + +--- + +## When to Use + +Single Prompt is ideal for **open-ended, flexible conversations** — customer support, general inquiries, FAQs. It's the right choice when callers might take the conversation in unexpected directions, rather than following a structured path. + +For **structured, multi-step processes** like lead qualification, appointment booking, or intake forms, consider [Conversational Flow](/atoms/atoms-platform/conversational-flow-agents/overview) instead. + +--- + +## How It Works + +Think of your prompt as a briefing. You're telling the agent: here's your role, here's what you know, here's how to handle situations. The AI internalizes this once and then uses that understanding for every exchange. + +When a caller speaks, the agent doesn't follow a script. It reasons through the conversation based on your instructions. This is why Single Prompt agents feel natural — they're not jumping between pre-written responses, they're thinking through each moment. + +--- + +## Capabilities + +**Dynamic tool usage.** You can connect your agent to APIs, databases, and external services. The agent decides when to use them based on the conversation. If a caller asks about their order, the agent can look it up. If they want to book something, it can check availability. + +**Conversation memory.** Everything said in the call stays in context. The agent remembers details from earlier in the conversation and can reference them naturally. + +**Handling the unexpected.** Without a rigid flow, the agent adapts to topic changes, follow-up questions, and tangents. Real conversations rarely follow a straight line — Single Prompt agents are designed for that reality. + +--- + +## Building a Single Prompt Agent + +You'll create three things: + +**1. The Prompt** + +This is the core. Your prompt should cover: +- **Identity** — Who is this agent? What's their name, role, personality? +- **Knowledge** — What do they know? Products, policies, FAQs, context. +- **Behavior** — How should they sound? What's off-limits? How do they handle edge cases? +- **Endings** — When should the call wrap up? When should they transfer? + +**2. Tools** (optional) + +If you want the agent to take actions — look up records, check calendars, create tickets — you'll configure the tools it can access and describe when to use them. + +**3. Voice and Model** + +Pick the voice your agent speaks with and the AI model that powers its reasoning. + +--- + +## The Editor + +Once you create a Single Prompt agent, you land in the editor — your workspace for everything. + + + ![Single Prompt editor](../building-agents/images/sp-editor.png) + + +| Area | Location | What It Does | +|------|----------|--------------| +| **Agent Setup** | Left sidebar | Navigate between [Prompt](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts), [Agent Settings](/atoms/atoms-platform/single-prompt-agents/agent-settings/voice-settings), [Widget](/atoms/atoms-platform/features/widget), [Integrations](/atoms/atoms-platform/features/integrations), [Post Call Metrics](/atoms/atoms-platform/features/post-call-metrics) | +| **[Prompt Section](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts)** | Top bar | Model, voice, and language dropdowns | +| **Prompt Editor** | Center | Where you [write your agent's instructions](/atoms/atoms-platform/single-prompt-agents/prompt-section/writing-prompts) | +| **[Configuration Panel](/atoms/atoms-platform/single-prompt-agents/configuration-panel/end-call)** | Right sidebar | End Call, Transfer Call, Knowledge Base, Variables, API Calls | +| **Actions** | Top right | [Convo Logs](/atoms/atoms-platform/analytics-logs/conversation-logs), [Lock Agent](/atoms/atoms-platform/analytics-logs/locking), [Test Agent](/atoms/atoms-platform/analytics-logs/testing) | + +--- + +## After You Launch + +Once your agent is live, refinement happens in a few places: + +**Prompt updates.** You'll review call logs, find where the agent struggled, and tighten your instructions. Most improvements come from prompt iteration. + +**Voice tuning.** Adjust speech speed, add pronunciation rules for tricky words, tweak turn-taking behavior. + +**Tool adjustments.** Add new capabilities, modify API connections, or change when tools get triggered. + +**Configuration.** Fine-tune end call conditions, transfer settings, timeout behavior, and more. + +--- + +## Get Started + + + + Blank canvas with full control over every setting + + + Pre-built prompts for common use cases + + + Describe what you want, AI generates the prompt + + \ No newline at end of file diff --git a/fern/products/atoms/pages/platform/single-prompt/writing-prompts.mdx b/fern/products/atoms/pages/platform/single-prompt/writing-prompts.mdx new file mode 100644 index 0000000..7ed05f6 --- /dev/null +++ b/fern/products/atoms/pages/platform/single-prompt/writing-prompts.mdx @@ -0,0 +1,105 @@ +--- +title: "The Prompt Editor" +sidebarTitle: "Prompt Editor" +description: "Where you write your agent's instructions — a markdown editor with section navigation." +--- + + +Looking for prompt best practices? See the [Prompting Guide](/atoms/developer-guide/build/agents/llm/prompts) for voice-specific techniques, common mistakes, and a complete example blueprint. + + +The center of the Single Prompt editor is where you tell your agent who to be and how to behave. It's a **markdown editor**, so you can use headings, lists, bold text, and other formatting to organize your instructions clearly. + +--- + +## Section Navigation + +Click the **Prompt Section** dropdown (above the editor) to jump directly to any section of your prompt. + + + Prompt editor + + +The dropdown reads your markdown headings and creates a navigation menu. This is especially useful for longer prompts — instead of scrolling, you can jump straight to: + +- Role & Objective +- Personality & Tone +- Conversational Flow +- Common Questions +- End Conditions +- Or any custom sections you create + + +**Templates are structured this way.** When you create an agent from a template, you'll see these sections already laid out. Even starting from scratch, using clear markdown headings keeps your prompt organized and easy to navigate. + + +--- + +## Writing in Markdown + +Use standard markdown formatting: + +| Format | Syntax | Use For | +|--------|--------|---------| +| **Headings** | `## Section Name` | Organize major sections (shows in dropdown) | +| **Bullet lists** | `- item` | Personality traits, guidelines, rules | +| **Numbered lists** | `1. step` | Conversational flows, procedures | +| **Bold** | `**text**` | Emphasis on key points | + +### Example Structure + +```markdown +## Role & Objective + +You are Sarah, a claims support specialist for Acme Insurance. +Your goal is to help policyholders check claim status and understand next steps. + +## Personality & Tone + +- Warm and empathetic — claims are stressful +- Clear and jargon-free +- Patient and never rushed + +## Conversational Flow + +1. Greet and verify identity +2. Ask how you can help +3. Look up their claim +4. Provide status update +5. Answer follow-up questions +6. Confirm next steps and close + +## End Conditions + +End the call when: +- Caller confirms they have what they need +- They say goodbye +- No response after 3 attempts +``` + +--- + +## Using Variables + +Make your prompts dynamic with variables: + +```markdown +Hello {{customer_name}}! I see you're calling about order #{{order_id}}. +``` + +Variables get replaced at runtime with actual values from your data or API responses. + +→ [Learn more about Variables](/atoms/atoms-platform/single-prompt-agents/configuration-panel/variables) + +--- + +## Best Practices + + + + See how well-structured prompts are organized + + + Deep dive into crafting effective prompts + + diff --git a/fern/products/atoms/pages/platform/testing-logs/conversation-logs.mdx b/fern/products/atoms/pages/platform/testing-logs/conversation-logs.mdx new file mode 100644 index 0000000..f6367f0 --- /dev/null +++ b/fern/products/atoms/pages/platform/testing-logs/conversation-logs.mdx @@ -0,0 +1,166 @@ +--- +title: "Conversation Logs" +sidebarTitle: "Conversation Logs" +description: "Review detailed transcripts and debug agent behavior." +--- + +Conversation Logs provide a detailed record of every conversation your agent has. Use them to debug issues, understand caller behavior, and improve your agent over time. + +--- + +## Location + +**Top right → "Convo Logs" button** + +--- + +## What's Included + +Each conversation log contains: + +| Data | Description | +|------|-------------| +| **Full Transcript** | Every word spoken by caller and agent | +| **Timestamps** | When each message occurred | +| **Caller Information** | Phone number, call duration | +| **Outcome** | How the call ended (disposition) | +| **Variables** | Values at each stage | +| **Node Path** | (Convo Flow) Which nodes were visited | +| **Post-Call Metrics** | Extracted analysis data | + +--- + +## Accessing Logs + +1. Click **"Convo Logs"** button (top right of agent editor) +2. Browse list of conversations +3. Click any conversation to see details + +→ **NEEDS PLATFORM INFO:** Screenshot of conversation logs interface + +--- + +## Using Logs for Debugging + +| Issue | What to Look For | +|-------|------------------| +| **Wrong responses** | Check what caller said vs. agent understanding | +| **Unexpected end** | Review end call trigger and timing | +| **Transfer problems** | Verify transfer triggered at right moment | +| **Variable issues** | Check variable values throughout | +| **Branch mistakes** | (Convo Flow) See which condition matched | + +### Example Debug Flow + +1. Caller says call ended too early +2. Open conversation log for that call +3. Find the end point in transcript +4. Check what triggered end call +5. Adjust end call conditions or prompt +6. Test again + +--- + +## Filtering and Searching + +Find specific conversations by: + +| Filter | Examples | +|--------|----------| +| **Date range** | Last 24 hours, last week, custom | +| **Duration** | Short calls, long calls | +| **Outcome** | Successful, unsuccessful, transferred | +| **Caller** | Specific phone number | +| **Search** | Keywords in transcript | + +--- + +## Analyzing Patterns + +Review logs regularly to identify: + +### Common Questions + +What do callers frequently ask? Add to your prompt or knowledge base. + +### Failure Points + +Where do conversations break down? Improve handling for those scenarios. + +### Unexpected Topics + +Are callers asking about things you didn't anticipate? Expand coverage. + +### Successful Patterns + +What makes good conversations work? Reinforce those approaches. + +--- + +## Conversation Log Details + +When you open a specific conversation: + +### Transcript View + +Full conversation with: +- Caller messages +- Agent responses +- Timestamps +- Turn indicators + +### Variable Tracking + +See how variables changed: +``` +{{customer_name}} → set to "John Smith" (Pre-Call API) +{{issue_type}} → set to "billing" (from conversation) +{{resolved}} → set to "true" (agent determined) +``` + +### Node Path (Convo Flow) + +Visual or list showing: +``` +Start → Greeting → Issue Type → Billing Help → Resolution → End Call +``` + +Helps identify if conversation took expected path. + +--- + +## Best Practices + +### Review Regularly + +Don't wait for complaints. Proactively review logs to catch issues early. + +### Sample Across Scenarios + +Look at: +- Successful calls +- Failed calls +- Long calls +- Short calls +- Different times of day + +### Take Notes + +Document patterns you find. Create a list of improvements to make. + +### Close the Loop + +After making changes, review new conversations to verify improvements. + +--- + +## What's Next + + + + Protect production agents + + + Testing best practices + + diff --git a/fern/products/atoms/pages/platform/testing-logs/locking.mdx b/fern/products/atoms/pages/platform/testing-logs/locking.mdx new file mode 100644 index 0000000..08e5886 --- /dev/null +++ b/fern/products/atoms/pages/platform/testing-logs/locking.mdx @@ -0,0 +1,145 @@ +--- +title: "Locking Your Agent" +sidebarTitle: "Locking Your Agent" +description: "Prevent accidental edits to production agents." +--- + +Locking protects your agent from unintended changes. Once locked, no edits can be made until you explicitly unlock. This is essential for production agents handling real calls. + + + [IMAGE: Lock Agent toggle in both states] + + +--- + +## Location + +**Top right → "Lock Agent" toggle** + +--- + +## Why Lock Agents + +### Protect Production + +A typo in a prompt could affect thousands of calls. Locking prevents accidental changes. + +### Team Safety + +Multiple team members can view a locked agent without risk of conflicting edits. + +### Version Stability + +Lock agents to preserve a working version before experimenting with changes. + +### Deployment Confidence + +Know exactly what configuration is handling calls — it can't change unexpectedly. + +--- + +## How to Lock + +1. Finish configuring your agent +2. Test thoroughly +3. Toggle **"Lock Agent"** to ON +4. Agent is now locked + +--- + +## What Happens When Locked + +| Feature | Locked State | +|---------|--------------| +| Edit prompt | ❌ Disabled | +| Change voice/model | ❌ Disabled | +| Modify configuration | ❌ Disabled | +| Test Agent | ✅ Still works | +| View Convo Logs | ✅ Still works | +| Make/receive calls | ✅ Still works | + +Testing is still available — it doesn't change the agent configuration. + +--- + +## How to Unlock + +1. Toggle **"Lock Agent"** to OFF +2. Make your changes +3. Test the changes +4. Lock again when done + +--- + +## Best Practices + +### Lock Production Agents + +Any agent handling real customer calls should be locked. + +### Unlock → Change → Test → Lock + +Follow this cycle: +1. Unlock the agent +2. Make your changes +3. Test thoroughly +4. Lock again + +### Communicate with Team + +If multiple people work on agents, communicate before unlocking: +- "I'm unlocking Agent X to make prompt changes" +- "Done — Agent X is locked again" + +### Don't Leave Unlocked + +After making changes, always lock again. An unlocked production agent is a risk. + +--- + +## Locked vs Unlocked Indicators + +The interface clearly shows lock state: + +| State | Visual | +|-------|--------| +| **Locked** | Toggle is ON, may show lock icon | +| **Unlocked** | Toggle is OFF, editable | + +--- + +## When to Lock + +| Scenario | Lock? | +|----------|-------| +| Active in production | ✅ Yes | +| Connected to phone number | ✅ Yes | +| Used in campaigns | ✅ Yes | +| Still developing | ❌ Not yet | +| Testing new changes | ❌ Not until ready | + +--- + +## Version Control Alternative + +For more sophisticated versioning: + +1. **Clone the agent** — Create a copy for development +2. **Make changes to copy** — Experiment freely +3. **Test the copy** — Verify it works +4. **Replace production** — Swap when ready + +This keeps your production agent locked at all times. + +--- + +## What's Next + + + + Deploy to a phone number + + + Run outbound campaigns + + diff --git a/fern/products/atoms/pages/platform/testing-logs/testing.mdx b/fern/products/atoms/pages/platform/testing-logs/testing.mdx new file mode 100644 index 0000000..4f18177 --- /dev/null +++ b/fern/products/atoms/pages/platform/testing-logs/testing.mdx @@ -0,0 +1,178 @@ +--- +title: "Testing Your Agent" +sidebarTitle: "Testing Your Agent" +description: "Test your agent before deployment to ensure quality conversations." +--- + +Testing is essential before deploying your agent. Atoms provides multiple testing modes so you can verify behavior across different scenarios. + + + [IMAGE: Test Agent button/modal showing three test mode options] + + +--- + +## Location + +**Top right → "Test Agent" button** + +--- + +## Test Modes + +| Mode | Description | Best For | +|------|-------------|----------| +| **Telephony Call** | Real phone call to your device | Full phone experience, final testing | +| **Web Call** | Browser voice call | Quick voice testing, iterating | +| **Chat** | Text conversation | Testing logic only, fast iteration | + +--- + +## How to Test + +1. Click **"Test Agent"** button (top right) +2. Select your testing mode +3. Start the test +4. Have a conversation as a real caller would +5. Try different scenarios and paths + +--- + +## What to Test + +### Prompt Understanding + +| Test | What to Check | +|------|---------------| +| Does the agent understand your questions? | Correct interpretation | +| Are responses appropriate? | Relevant to what was asked | +| Does it follow your prompt instructions? | Behavioral alignment | + +### Voice Quality + +| Test | What to Check | +|------|---------------| +| Is speech clear? | No audio issues | +| Natural pacing? | Not too fast or slow | +| Correct pronunciation? | Especially for names/terms | +| Appropriate tone? | Matches intended personality | + +### End Call Behavior + +| Test | What to Check | +|------|---------------| +| Say goodbye | Does call end appropriately? | +| Confirm resolution | Does it end when satisfied? | +| Timeout | Does it handle silence correctly? | + +### Transfer Behavior + +| Test | What to Check | +|------|---------------| +| Request transfer | Does it trigger correctly? | +| Whisper message | Is context accurate? | +| Hold experience | Does music/message play? | + +### Variables + +| Test | What to Check | +|------|---------------| +| Are variables substituted? | `{{name}}` → actual name | +| Missing variables? | Graceful handling | + +### Edge Cases + +| Test | What to Check | +|------|---------------| +| Unexpected inputs | Graceful responses | +| Interruptions | Handles mid-sentence breaks | +| Confusion | Asks for clarification | +| Off-topic | Redirects appropriately | + +### Conversational Flow (if applicable) + +| Test | What to Check | +|------|---------------| +| Every branch | All paths work | +| All conditions | Correct routing | +| Dead ends | No stuck states | +| Loops | Don't trap callers | + +--- + +## Testing Tips + +### Test Like a Real Caller + +Don't just test happy paths. Real callers will: +- Interrupt +- Say unexpected things +- Ask off-topic questions +- Be confused +- Speak unclearly + +### Test All Three Modes + +Each mode has different characteristics: +- **Chat** is fastest for logic testing +- **Web Call** adds voice layer +- **Telephony** is most realistic + +### Use Conversation Logs + +After each test, review the logs to see: +- Exact transcripts +- AI's interpretation +- Where things went wrong + +### Iterate + +1. Test +2. Find issues +3. Update prompt/config +4. Test again +5. Repeat + +--- + +## Testing Conversational Flow Agents + +For workflow-based agents, test every path: + +``` +Start → Path A → End ✓ +Start → Path B → End ✓ +Start → Path C → Path D → End ✓ +Start → Path C → Path E → End ✓ +Start → Fallback → Clarify → Path A ✓ +``` + +Missing paths are invisible bugs until a real caller hits them. + +--- + +## Pre-Deployment Checklist + +Before going live: + +- [ ] Tested all happy paths +- [ ] Tested error handling +- [ ] Tested edge cases +- [ ] Tested with different voices/accents +- [ ] Verified end call triggers +- [ ] Verified transfer works +- [ ] Reviewed conversation logs +- [ ] Made final prompt adjustments + +--- + +## What's Next + + + + Debug with detailed logs + + + Protect production agents + + diff --git a/fern/products/atoms/pages/platform/troubleshooting/common-issues/agent-issues.mdx b/fern/products/atoms/pages/platform/troubleshooting/common-issues/agent-issues.mdx new file mode 100644 index 0000000..d7a389a --- /dev/null +++ b/fern/products/atoms/pages/platform/troubleshooting/common-issues/agent-issues.mdx @@ -0,0 +1,140 @@ +--- +title: "Agent Issues" +sidebarTitle: "Agent Issues" +description: "Troubleshoot common problems with voice AI agents." +--- + +This page covers common issues with agent behavior and how to resolve them. + +--- + +## Agent Gives Wrong Answers + +### Symptoms +- Agent provides incorrect information +- Answers don't match your knowledge base +- Agent makes things up + +### Solutions + +| Cause | Solution | +|-------|----------| +| Knowledge Base not attached | Attach KB in Configuration Panel | +| KB content incorrect | Update KB with correct info | +| KB not indexed yet | Wait for processing to complete | +| Prompt overriding KB | Adjust prompt to reference KB | + +### Steps +1. Check KB is attached and shows "Ready" status +2. Test KB search directly +3. Review prompt for conflicting information +4. Update and retest + +--- + +## Agent Doesn't Understand Callers + +### Symptoms +- Agent misinterprets what callers say +- Wrong branches taken (Convo Flow) +- Agent responds to wrong intent + +### Solutions + +| Cause | Solution | +|-------|----------| +| Unclear conditions | Use more specific condition language | +| Background noise | Enable denoising in Voice Tab | +| Accent issues | Test with diverse speakers | +| Speech recognition errors | Review transcripts in logs | + +### Steps +1. Review conversation logs for actual transcripts +2. Compare what caller said vs. what agent heard +3. Adjust conditions or prompt for better matching +4. Enable denoising if environmental noise + +--- + +## Agent Ends Calls Too Early + +### Symptoms +- Calls end unexpectedly +- Agent hangs up mid-conversation +- End call triggers incorrectly + +### Solutions + +| Cause | Solution | +|-------|----------| +| End conditions too broad | Make conditions more specific | +| Misinterpreted phrase | Add exceptions to end conditions | +| Timeout too short | Increase timeout in General Tab | + +### Steps +1. Review logs to see what triggered end +2. Identify the phrase or condition +3. Adjust end call configuration +4. Test the specific scenario + +--- + +## Agent Doesn't End Calls + +### Symptoms +- Calls keep going after resolution +- Agent doesn't recognize goodbye +- Infinite conversation loops + +### Solutions + +| Cause | Solution | +|-------|----------| +| Missing end conditions | Add appropriate end call functions | +| End conditions too specific | Broaden condition descriptions | +| No goodbye recognition | Add "user says goodbye" condition | + +--- + +## Agent Sounds Robotic + +### Symptoms +- Unnatural speech patterns +- Lack of conversational flow +- Stilted responses + +### Solutions + +| Cause | Solution | +|-------|----------| +| Prompt too formal | Add natural language guidance | +| Wrong voice selection | Try different voice | +| Speech speed wrong | Adjust in Voice Tab | +| Missing personality | Add tone/personality to prompt | + +--- + +## Agent Speaks Too Fast/Slow + +### Symptoms +- Speech speed doesn't match expectations +- Hard to understand +- Unnatural pacing + +### Solutions +1. Go to Agent Settings → Voice Tab +2. Adjust Speech Speed slider +3. Test and fine-tune + +--- + +## What's Next + + + + Audio and connection problems + + + Setup and settings problems + + diff --git a/fern/products/atoms/pages/platform/troubleshooting/common-issues/call-quality.mdx b/fern/products/atoms/pages/platform/troubleshooting/common-issues/call-quality.mdx new file mode 100644 index 0000000..baa3d17 --- /dev/null +++ b/fern/products/atoms/pages/platform/troubleshooting/common-issues/call-quality.mdx @@ -0,0 +1,146 @@ +--- +title: "Call Quality Issues" +sidebarTitle: "Call Quality Issues" +description: "Troubleshoot audio and connection problems." +--- + +This page covers common call quality issues and how to resolve them. + +--- + +## Poor Audio Quality + +### Symptoms +- Garbled or distorted audio +- Choppy speech +- Echo or feedback + +### Solutions + +| Cause | Solution | +|-------|----------| +| Network issues | Check internet connection | +| Caller environment | Enable denoising | +| Audio settings | Review Voice Tab settings | + +### For Web Calls +1. Check browser compatibility +2. Ensure microphone permissions +3. Test with different browser +4. Check internet speed + +### For Phone Calls +- Usually network-related +- Check carrier signal +- Try from different phone + +--- + +## Latency/Delay + +### Symptoms +- Long pause before agent responds +- Conversation feels slow +- Overlapping speech + +### Solutions + +| Cause | Solution | +|-------|----------| +| Model selection | Try faster model | +| API calls mid-conversation | Optimize API endpoints | +| Complex prompt | Simplify prompt | +| Turn detection settings | Adjust wait time | + +### Steps +1. Check which model you're using +2. Review any API calls for latency +3. Test with simpler prompt +4. Adjust Smart Turn Detection settings + +--- + +## Calls Not Connecting + +### Symptoms +- Calls don't go through +- Immediate disconnection +- No ring/answer + +### Solutions + +| Cause | Solution | +|-------|----------| +| Phone number not assigned | Assign in Phone Number Tab | +| Agent not locked/published | Verify agent is ready | +| Phone number issue | Check number status | + +### Steps +1. Verify phone number is assigned to agent +2. Check phone number status in Dashboard +3. Try test call from Test Agent +4. Contact support if persistent + +--- + +## Caller Hangs Up Immediately + +### Symptoms +- Calls end in first seconds +- No conversation captured + +### Solutions + +| Cause | Solution | +|-------|----------| +| Poor greeting | Improve opening | +| Voice off-putting | Try different voice | +| Long silence before greeting | Check start configuration | +| Audio quality | Test call quality | + +### Steps +1. Review call logs for pattern +2. Test greeting yourself +3. Adjust voice/greeting +4. Check for audio issues at start + +--- + +## Echo During Calls + +### Symptoms +- Caller hears themselves +- Agent audio echoes back + +### Solutions +1. Enable echo cancellation (if available) +2. Check caller's environment +3. Test with headphones + +--- + +## Background Noise Issues + +### Symptoms +- Agent has trouble hearing +- Transcription errors +- Misinterpretation + +### Solutions +1. Enable Denoising in Voice Tab +2. Adjust Voice Detection Tuning +3. Increase Min Volume threshold +4. Increase Confidence threshold + +--- + +## What's Next + + + + Setup and settings problems + + + Agent behavior problems + + diff --git a/fern/products/atoms/pages/platform/troubleshooting/common-issues/configuration-issues.mdx b/fern/products/atoms/pages/platform/troubleshooting/common-issues/configuration-issues.mdx new file mode 100644 index 0000000..c136497 --- /dev/null +++ b/fern/products/atoms/pages/platform/troubleshooting/common-issues/configuration-issues.mdx @@ -0,0 +1,7 @@ +--- +title: "Configuration Issues" +--- + +# Configuration Issues + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/troubleshooting/common-issues/configuration.mdx b/fern/products/atoms/pages/platform/troubleshooting/common-issues/configuration.mdx new file mode 100644 index 0000000..6609ca5 --- /dev/null +++ b/fern/products/atoms/pages/platform/troubleshooting/common-issues/configuration.mdx @@ -0,0 +1,168 @@ +--- +title: "Configuration Issues" +sidebarTitle: "Configuration Issues" +description: "Troubleshoot setup and settings problems." +--- + +This page covers common configuration issues and how to resolve them. + +--- + +## Changes Not Taking Effect + +### Symptoms +- Updated prompt but behavior unchanged +- New settings not reflected in calls + +### Solutions + +| Cause | Solution | +|-------|----------| +| Unsaved changes | Click Save button | +| Agent is locked | Unlock, make changes, relock | +| Caching | Wait a few minutes, test again | + +### Steps +1. Look for "You have unsaved changes" message +2. Click Save if present +3. Check if agent is locked +4. Test again after saving + +--- + +## Knowledge Base Not Working + +### Symptoms +- Agent doesn't use KB content +- Wrong answers despite correct KB + +### Solutions + +| Cause | Solution | +|-------|----------| +| KB not attached | Attach in Configuration Panel | +| KB still processing | Wait for "Ready" status | +| Wrong KB selected | Verify correct KB | +| Content not indexed well | Improve content structure | + +### Steps +1. Verify KB toggle is ON +2. Check selected KB is correct +3. Verify KB status is "Ready" +4. Test with specific questions + +--- + +## Webhook Not Receiving Data + +### Symptoms +- No webhook calls received +- Missing event data + +### Solutions + +| Cause | Solution | +|-------|----------| +| Endpoint not configured | Set up in Webhook Tab | +| Wrong events selected | Check event subscriptions | +| Endpoint returning errors | Verify endpoint is working | +| URL incorrect | Double-check endpoint URL | + +### Steps +1. Go to Agent Settings → Webhook Tab +2. Verify endpoint is selected +3. Verify events are checked +4. Test endpoint independently +5. Make test call and check + +--- + +## API Calls Failing + +### Symptoms +- API data not appearing in conversation +- Errors in conversation logs + +### Solutions + +| Cause | Solution | +|-------|----------| +| Wrong URL | Verify endpoint URL | +| Authentication | Check API key/token | +| Response format | Verify JSON structure | +| Timeout | Endpoint too slow | + +### Steps +1. Check conversation logs for error details +2. Test API endpoint independently +3. Verify headers and authentication +4. Check response mapping + +--- + +## Variables Not Substituting + +### Symptoms +- See `{{variable_name}}` in output +- Variable values not appearing + +### Solutions + +| Cause | Solution | +|-------|----------| +| Variable not defined | Define in Variables panel | +| Typo in variable name | Check exact spelling | +| Variable not populated | Check data source | +| Wrong syntax | Use `{{double_braces}}` | + +### Steps +1. Verify variable name in prompt matches definition +2. Check Variables panel for value +3. For API variables, check response mapping +4. Test with known values + +--- + +## Phone Number Not Working + +### Symptoms +- Can't make or receive calls +- Number shows inactive + +### Solutions + +| Cause | Solution | +|-------|----------| +| Not assigned to agent | Assign in Phone Number Tab | +| Number inactive | Check number status | +| Billing issue | Check account balance | + +--- + +## Widget Not Appearing + +### Symptoms +- Widget doesn't show on website +- Button missing + +### Solutions + +| Cause | Solution | +|-------|----------| +| Code not added | Add embed code to site | +| JavaScript error | Check browser console | +| Wrong agent ID | Verify code is correct | +| Page caching | Clear cache | + +--- + +## What's Next + + + + Common questions + + + Contact support + + diff --git a/fern/products/atoms/pages/platform/troubleshooting/common-issues/feature-issues.mdx b/fern/products/atoms/pages/platform/troubleshooting/common-issues/feature-issues.mdx new file mode 100644 index 0000000..92c58c6 --- /dev/null +++ b/fern/products/atoms/pages/platform/troubleshooting/common-issues/feature-issues.mdx @@ -0,0 +1,7 @@ +--- +title: "Feature Issues" +--- + +# Feature Issues + +*Content coming soon.* diff --git a/fern/products/atoms/pages/platform/troubleshooting/faq.mdx b/fern/products/atoms/pages/platform/troubleshooting/faq.mdx new file mode 100644 index 0000000..4a4e29c --- /dev/null +++ b/fern/products/atoms/pages/platform/troubleshooting/faq.mdx @@ -0,0 +1,127 @@ +--- +title: "Frequently Asked Questions" +sidebarTitle: "FAQ" +description: "Answers to common questions about Atoms." +--- + +## General Questions + +### What is Atoms? + +Atoms is a voice AI platform by Smallest AI. It lets you create, deploy, and manage AI-powered voice agents that can handle phone calls and web conversations. + +### What languages are supported? + +Atoms supports multiple languages including English, Spanish, French, German, Portuguese, Hindi, and more. Available languages depend on your selected voice. + +### What's the difference between Single Prompt and Conversational Flow? + +**Single Prompt** uses one comprehensive prompt for flexible, open-ended conversations. + +**Conversational Flow** uses a visual workflow builder for structured, step-by-step conversations. + +--- + +## Pricing & Plans + +### How is pricing calculated? + +Atoms uses usage-based pricing. Costs are typically based on: +- Per-minute call usage +- Phone number rental fees +- Feature access by plan tier + +Visit the [Pricing page](https://smallest.ai/pricing?utm_source=documentation&utm_medium=faq-page) for current rates and plan details. + +### Is there a free trial? + +New accounts receive free credits to test the platform. Sign up at [atoms.smallest.ai](https://atoms.smallest.ai?utm_source=documentation&utm_medium=faq-page) to get started. + +### What payment methods are accepted? + +Atoms accepts major credit cards and offers invoice billing for enterprise accounts. Manage your payment methods in Settings → Billing. + +--- + +## Technical Questions + +### Can I use my own phone number? + +Yes. You can bring your own number via SIP trunk integration. Navigate to Deploy → Phone Numbers → Import and select "Import via SIP" to configure your existing number. + +### What LLM models are available? + +GPT-4o, Electron models, and others. Available models may vary by plan. + +### Is my data secure? + +Yes. Atoms uses TLS 1.3 encryption in transit and AES-256 encryption at rest. PII redaction can be enabled for sensitive data. Atoms is SOC 2 Type II certified, GDPR compliant, and offers HIPAA compliance for enterprise accounts. See our [Security Overview](https://security.smallest.ai/?tab=overview) for details. + +### Can I integrate with my CRM? + +Salesforce integration is available. HubSpot and others are coming soon. Custom integrations are possible via API calls and webhooks. + +--- + +## Agent Questions + +### How many agents can I create? + +Agent limits depend on your plan. Free and starter plans have a limited number of agents, while growth and enterprise plans offer higher or unlimited agent counts. Check your current plan in Settings → Billing. + +### Can one agent handle multiple phone numbers? + +Each phone number is assigned to one agent. However, you can clone agents for different numbers. + +### Can I have multiple agents handle one number? + +No, each phone number connects to one specific agent. + +### How do I make my agent available 24/7? + +Agents are available 24/7 by default once deployed to a phone number. + +--- + +## Deployment Questions + +### How do I get a phone number? + +Navigate to Deploy → Phone Numbers and follow the purchase process. + +### Can I use the widget on multiple websites? + +Yes, the same widget code can be used on multiple domains. + +### What happens if an agent fails mid-call? + +The platform includes error handling. Configure fallback behavior (transfer, graceful end) for edge cases. + +--- + +## Still Have Questions? + + + + Ask the community and our team in real time. + + + Reach us at **support@smallest.ai** for detailed help. + + + See real projects built with Atoms. + + + +--- + +## What's Next + + + + Key terms defined + + + Contact support + + diff --git a/fern/products/atoms/pages/platform/troubleshooting/getting-help.mdx b/fern/products/atoms/pages/platform/troubleshooting/getting-help.mdx new file mode 100644 index 0000000..08d4a29 --- /dev/null +++ b/fern/products/atoms/pages/platform/troubleshooting/getting-help.mdx @@ -0,0 +1,105 @@ +--- +title: "Getting Help" +sidebarTitle: "Getting Help" +description: "How to get support when you need it." +--- + +If you can't find the answer in our documentation, here's how to get help. + + + + Get real-time help from the community and the Smallest AI team. + + + See what others have built and get inspiration for your projects. + + + +--- + +## Self-Service Resources + + + + Search these docs — use the search bar at the top or browse the sidebar navigation. + + + Step-by-step code examples and templates for common use cases. + + + Answers to frequently asked questions about Atoms. + + + +--- + +## Contact Support + + + + **support@smallest.ai** — General questions, technical issues, and feature requests. + + + **billing@smallest.ai** — Payment, subscription, and plan questions. + + + **security@smallest.ai** — Report security concerns or vulnerabilities. + + + **sales@smallest.ai** — Enterprise inquiries and custom plans. + + + +--- + +## Reporting Issues + +When reaching out, include as much detail as possible so we can help faster. + + + + - Agent ID + - Agent type (Single Prompt or Conversational Flow) + - Description of expected vs. actual behavior + - Steps to reproduce + - Sample conversation transcript (if available) + + + - Call ID + - Phone number involved + - Time of call + - Description of the issue + - Your location (for regional troubleshooting) + + + - Browser and version + - Steps to reproduce + - Screenshots + - Any error messages + + + +--- + +## Enterprise Support + +Enterprise customers have access to: +- Dedicated support team +- Priority response times +- Custom onboarding +- Technical consultation + +Contact your account manager or email **sales@smallest.ai** for details. + +--- + +## What's Next + + + + Common questions answered + + + Key terms defined + + diff --git a/fern/products/atoms/pages/platform/troubleshooting/glossary.mdx b/fern/products/atoms/pages/platform/troubleshooting/glossary.mdx new file mode 100644 index 0000000..e47db94 --- /dev/null +++ b/fern/products/atoms/pages/platform/troubleshooting/glossary.mdx @@ -0,0 +1,170 @@ +--- +title: "Glossary" +sidebarTitle: "Glossary" +description: "Definitions of key terms used in Atoms." +--- + +## A + +**Agent** +An AI-powered voice assistant built on Atoms that handles phone calls or web conversations. + +**API Call** +A request made by your agent to an external service during a conversation to fetch or send data. + +**Audience** +A contact list used for outbound campaigns. + +## B + +**Branch** +A connection between nodes in Conversational Flow that determines conversation path based on conditions. + +## C + +**Campaign** +An outbound calling program that contacts a list of phone numbers using your agent. + +**Cold Transfer** +A call transfer where the receiving party is connected immediately without any context or briefing. + +**Condition** +A rule in Conversational Flow that determines which branch to follow based on user response. + +**Conversational Flow** +An agent type that uses a visual workflow builder for structured conversations. + +## D + +**Denoising** +Audio processing that removes background noise from caller audio for clearer speech recognition. + +**Disposition** +The outcome category of a call (e.g., successful, unsuccessful, transferred). + +## E + +**Electron** +Smallest' voice-optimized Small Language Model (SLM) that handles reasoning and response generation with sub-500ms latency. + +**Emotive Model** +A speech-to-speech AI model with emotional tone and natural expression (Beta). + +**End Call** +The function that terminates a conversation based on defined conditions. + +## G + +**Global KB** +A Knowledge Base shared across multiple agents. + +## I + +**Inbound Call** +A call initiated by a customer to your agent's phone number. + +## K + +**Knowledge Base (KB)** +A repository of documents and information that agents can reference during conversations. + +## L + +**Latency** +The delay between when a user finishes speaking and when the agent starts responding. + +**Lightning** +Smallest' Text-to-Speech model that synthesizes studio-quality voice at 44kHz with 175ms latency. Supports voice cloning and multiple languages. + +**LLM (Large Language Model)** +The AI model that understands intent and generates responses. + +**Local KB** +A Knowledge Base attached to and used by only one agent. + +**Lock Agent** +A feature that prevents edits to an agent's configuration. + +## N + +**Node** +A single step in a Conversational Flow workflow (e.g., Prompt, Transfer, API Call, End). + +## O + +**Outbound Call** +A call initiated by your agent to a customer (via campaigns). + +## P + +**PII Redaction** +Automatic removal or masking of personally identifiable information from transcripts. + +**Pulse** +Smallest' Speech-to-Text model with 64ms time-to-first-transcript latency. Supports 32 languages with automatic detection and industry-leading accuracy. + +**Post-Call Metrics** +Analytics and data extracted from completed calls. + +**Pre-Call API** +An API call that executes before the conversation starts. + +**Post-Call API** +An API call that executes after the conversation ends. + +## S + +**Single Prompt** +An agent type using one comprehensive prompt for flexible conversations. + +**SLM (Small Language Model)** +Atoms' optimized models that deliver fast, efficient voice AI. + +**Smart Turn Detection** +Intelligent handling of when the agent should start speaking. + +**STT (Speech-to-Text)** +Technology that converts spoken audio into text. + +## T + +**Transfer Call** +Handing a conversation off to a human agent or another phone number. + +**TTS (Text-to-Speech)** +Technology that converts text responses into spoken audio. + +**Turn Detection** +The system that determines when a speaker has finished and it's time to respond. + +## V + +**Variable** +Dynamic values that can be used in prompts (e.g., `{{customer_name}}`). + +## W + +**Warm Transfer** +A call transfer where the AI provides context to the receiving party before connecting. + +**Webhook** +An HTTP callback that sends data when specific events occur. + +**Whisper Message** +A message spoken only to the transfer recipient during warm transfer. + +**Widget** +An embeddable voice interface for websites. + +--- + +## What's Next + + + + Contact support + + + Core concepts overview + + diff --git a/fern/products/atoms/pages/qa-credit-deduction-worker.md b/fern/products/atoms/pages/qa-credit-deduction-worker.md new file mode 100644 index 0000000..5c95948 --- /dev/null +++ b/fern/products/atoms/pages/qa-credit-deduction-worker.md @@ -0,0 +1,1364 @@ +# Credit Deduction Worker - QA Testing Document + +## Document Information + +| Field | Value | +| ------------ | ----------------------- | +| Feature | Credit Deduction Worker | +| Version | 1.0 | +| Last Updated | January 2026 | +| PR | #328 | + +--- + +## 1. Feature Overview + +### What is the Credit Deduction Worker? + +The Credit Deduction Worker is a background service that processes usage events and reports them to Stigg for billing. It runs as part of `console-backend` and consumes messages from RabbitMQ. + +### Key Responsibilities + +1. **Consume usage events** from RabbitMQ queue (`credit-deduction-queue`) +2. **Validate customer entitlements** via Stigg Sidecar cache (sub-millisecond latency) +3. **Report usage to Stigg** for billing and credit deduction +4. **Log all transactions** to ClickHouse for audit trail +5. **Handle failures** with retry logic and Dead Letter Queue (DLQ) + +### Design Philosophy + +| Principle | Description | +| ------------------------- | -------------------------------------------------------------- | +| Validate Before Reporting | Verify entitlements before reporting usage to Stigg | +| No planId Required | Stigg automatically knows customer's active plan | +| Stigg Handles Overage | Usage beyond limits is allowed - Stigg manages overage billing | +| Never Lose Data | Failed events retry, then go to DLQ for manual review | +| No Auto-Provisioning | Subscriptions must exist before usage can be reported | + +--- + +## 2. Architecture + +### System Components + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ EVENT PRODUCERS │ +│ │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ atoms-backend │ │ waves-backend │ │ +│ │ (AI Agents) │ │ (Voice Calls) │ │ +│ └────────┬─────────┘ └────────┬─────────┘ │ +│ │ │ │ +└──────────────┼──────────────────────────────────┼────────────────────────────┘ + │ │ + ▼ ▼ +┌─────────────────────────────────────────────────────────────────────────────┐ +│ RABBITMQ │ +│ │ +│ ┌────────────────────────────┐ ┌────────────────────────────┐ │ +│ │ credit-deduction-queue │ │ credit-deduction-dlq │ │ +│ │ (Main Queue) │ │ (Dead Letter Queue) │ │ +│ └─────────────┬──────────────┘ └────────────────────────────┘ │ +│ │ ▲ │ +└───────────────────┼──────────────────────────────┼───────────────────────────┘ + │ │ + ▼ │ (failed after 2 retries) +┌─────────────────────────────────────────────────────────────────────────────┐ +│ CONSOLE-BACKEND │ +│ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ CreditDeductionWorker │ │ +│ │ │ │ +│ │ 1. Parse & Validate Payload (Zod) │ │ +│ │ 2. Check Entitlement (Stigg Sidecar) │ │ +│ │ 3. Report Usage (Stigg) │ │ +│ │ 4. Log to ClickHouse │ │ +│ │ 5. Emit Metrics (New Relic) │ │ +│ └──────────────────────┬──────────────────────────────────────────┘ │ +│ │ │ +└────────────────────────────┼─────────────────────────────────────────────────┘ + │ + ┌──────────────────┼──────────────────┐ + ▼ ▼ ▼ +┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐ +│ stigg-proxy │ │ ClickHouse │ │ New Relic │ +│ (HTTP → gRPC) │ │ (Audit Logs) │ │ (Metrics) │ +└────────┬─────────┘ └──────────────────┘ └──────────────────┘ + │ + ▼ +┌──────────────────┐ +│ stigg-sidecar │ +│ (gRPC, Cache) │ +└────────┬─────────┘ + │ + ▼ +┌──────────────────┐ +│ Stigg Cloud │ +│ (Billing) │ +└──────────────────┘ +``` + +### Queue Configuration + +| Queue | Purpose | TTL | +| ------------------------ | --------------------------------- | ------ | +| `credit-deduction-queue` | Main processing queue | None | +| `credit-deduction-dlq` | Failed messages for manual review | 7 days | + +--- + +## 3. Processing Flow + +### Step-by-Step Flow + +``` +Message Received + │ + ▼ +┌──────────────────┐ +│ 1. Parse JSON │───► Invalid JSON? ───► DLQ (unparseable) +└────────┬─────────┘ + │ + ▼ +┌──────────────────┐ +│ 2. Validate │───► Invalid payload? ───► DLQ (not retryable) +│ Payload (Zod) │ +└────────┬─────────┘ + │ + ▼ +┌──────────────────┐ +│ 3. Check │───► No access? ─┬─► Retryable? ───► Retry (max 2) +│ Entitlement │ │ +│ (Sidecar) │ └─► Not retryable? ───► DLQ +└────────┬─────────┘ + │ + ▼ +┌──────────────────┐ +│ 4. Report Usage │───► API error? ───► Retry (max 2) ───► DLQ +│ (Stigg) │ +└────────┬─────────┘ + │ + ▼ +┌──────────────────┐ +│ 5. Log to │───► ClickHouse down? ───► Log to console (fallback) +│ ClickHouse │ +└────────┬─────────┘ + │ + ▼ +┌──────────────────┐ +│ 6. ACK Message │ +│ (Success!) │ +└──────────────────┘ +``` + +### Retry Behavior + +| Attempt | Delay | Total Wait | +| --------- | ----------- | ---------- | +| 1st retry | 2 seconds | 2s | +| 2nd retry | 4 seconds | 6s | +| After 2nd | Send to DLQ | - | + +> **Note:** `maxRetries: 3` means 3 total attempts (initial + 2 retries), not 3 retries. + +--- + +## 4. Message Format + +### WorkerTask Wrapper (Required) + +All messages must be wrapped in a `WorkerTask` structure: + +```json +{ + "taskId": "unique-task-id", + "taskType": "REPORT_USAGE", + "payload": { + // Actual usage data here + }, + "createdAt": "2026-01-22T12:00:00.000Z" +} +``` + +### Payload Fields + +| Field | Type | Required | Validation | Description | +| ---------------- | ------ | -------- | ----------------------- | ----------------------------------------------- | +| `eventId` | string | Yes | Non-empty, trimmed | Unique event ID (used as idempotency key) | +| `customerId` | string | Yes | Non-empty, trimmed | Customer/Organization ID in Stigg | +| `featureId` | string | Yes | Non-empty, trimmed | Feature ID (e.g., `feature-create-with-ai-raw`) | +| `usageValue` | number | Yes | Positive, max 1 billion | Raw usage value | +| `eventTimestamp` | string | Yes | ISO 8601 datetime | When the usage occurred | +| `subscriptionId` | string | No | - | Subscription ID (optional) | +| `source` | string | No | - | Event source (e.g., `atoms-backend`) | +| `resourceId` | string | No | Max 50 chars | Resource ID for per-resource tracking | +| `metadata` | object | No | - | Additional metadata for debugging | + +### Sample Complete Message + +```json +{ + "taskId": "task-20260128-001", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "evt-20260128-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:00:00.000Z", + "source": "atoms-backend", + "resourceId": "agent_abc123", + "metadata": { + "agentName": "Sales Bot", + "callId": "call_xyz" + } + }, + "createdAt": "2026-01-28T10:00:00.000Z" +} +``` + +--- + +## 5. Error Codes and Handling + +### Worker Error Codes + +| Error Code | Description | Retryable | Action | +| ------------------------ | ----------------------------------------- | --------- | ----------------------- | +| `INVALID_PAYLOAD` | Missing or invalid fields in payload | No | Sent to DLQ immediately | +| `NO_ACTIVE_SUBSCRIPTION` | Customer has no subscription or expired | Yes | Retry 2 times, then DLQ | +| `ENTITLEMENT_DENIED` | Customer/feature not found or not in plan | No | Sent to DLQ immediately | +| `STIGG_ERROR` | Network/API error calling Stigg | Yes | Retry 2 times, then DLQ | + +### Stigg AccessDeniedReason Codes + +| Code | Name | Meaning | Retryable | +| ---- | ---------------------- | -------------------------------------- | --------- | +| 0 | UNSPECIFIED | Unknown reason | Yes | +| 1 | NO_SUBSCRIPTION | Customer has no subscription | Yes | +| 2 | NO_FEATURE_ENTITLEMENT | Feature not in customer's plan | No | +| 3 | USAGE_LIMIT_EXCEEDED | Usage exceeded limit (allowed through) | N/A | +| 4 | FEATURE_NOT_FOUND | Feature doesn't exist in Stigg | No | +| 5 | SUBSCRIPTION_EXPIRED | Customer's subscription expired | Yes | +| 6 | NOT_IN_TRIAL | Customer not in trial period | Yes | +| 7 | TRIAL_EXPIRED | Trial period expired | Yes | +| 8 | CUSTOMER_NOT_FOUND | Customer doesn't exist in Stigg | No | +| 9 | UNKNOWN | Unknown error | Yes | + +--- + +## 6. Test Environment Setup + +### Test Customers (Dev Environment) + +| Customer Name | Customer ID | Has Subscription | +| ---------------- | ------------------ | ---------------- | +| Test PAYG User 1 | `test-payg-user-1` | Yes (active) | +| Test PAYG User 2 | `test-payg-user-2` | No | +| Test PAYG User 3 | `test-payg-user-3` | Expired | + +### Available Features + +| Feature Name | Feature ID | Type | Credits/Unit | +| --------------------------- | ---------------------------- | ------- | ------------ | +| Create with AI (Raw Events) | `feature-create-with-ai-raw` | Metered | 0.5 credits | +| TTS V2 Usage (Raw) | `feature-tts-v2-usage-raw` | Metered | N/A | +| TTS V3 Usage (Raw) | `feature-tts-v3-usage-raw` | Metered | N/A | +| STT Usage (Raw) | `feature-stt-usage-raw` | Metered | N/A | +| Atoms Voice V2 (Raw) | `feature-atoms-voice-v2-raw` | Metered | N/A | +| Atoms Voice V3 (Raw) | `feature-atoms-voice-v3-raw` | Metered | N/A | + +### Access Points + +| Service | Dev URL | Port | +| ------------------- | ------------------------- | ----- | +| RabbitMQ Management | http://rabbitmq-dev:15672 | 15672 | +| ClickHouse | https://clickhouse-dev | 8443 | +| New Relic | https://one.newrelic.com | - | +| Stigg Dashboard | https://app.stigg.io | - | + +--- + +## 7. Test Cases + +### Category A: Happy Path Tests + +#### TC-A01: Basic Usage Report (Active Subscription) + +| Field | Value | +| ----------------- | --------------------------------------------------- | +| **Test ID** | TC-A01 | +| **Priority** | P0 - Critical | +| **Prerequisites** | Customer `test-payg-user-1` has active subscription | + +**Test Message:** + +```json +{ + "taskId": "tc-a01-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-a01-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:00:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:00:00.000Z" +} +``` + +**Expected Results:** + +- Message processed successfully +- Usage reported to Stigg (10 units = 5 credits) +- ClickHouse audit log: `status = 'success'` +- New Relic: `EventProcessed` with `success = true` + +**Verification:** + +```sql +-- ClickHouse +SELECT * FROM credit_audit WHERE event_id = 'tc-a01-evt-001'; +``` + +--- + +#### TC-A02: Multiple Usage Reports (Same Customer) + +| Field | Value | +| ----------------- | --------------------------------------------------- | +| **Test ID** | TC-A02 | +| **Priority** | P0 - Critical | +| **Prerequisites** | Customer `test-payg-user-1` has active subscription | + +**Test Messages (send both):** + +Message 1: + +```json +{ + "taskId": "tc-a02-task-1", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-a02-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 5, + "eventTimestamp": "2026-01-28T10:01:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:01:00.000Z" +} +``` + +Message 2: + +```json +{ + "taskId": "tc-a02-task-2", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-a02-evt-002", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 3, + "eventTimestamp": "2026-01-28T10:02:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:02:00.000Z" +} +``` + +**Expected Results:** + +- Both messages processed successfully +- Total usage: 8 units = 4 credits deducted +- 2 entries in ClickHouse with `status = 'success'` + +--- + +#### TC-A03: Usage with Resource ID + +| Field | Value | +| ----------------- | --------------------------------------------------- | +| **Test ID** | TC-A03 | +| **Priority** | P1 - High | +| **Prerequisites** | Customer `test-payg-user-1` has active subscription | + +**Test Message:** + +```json +{ + "taskId": "tc-a03-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-a03-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 1, + "resourceId": "agent_sales_bot_001", + "eventTimestamp": "2026-01-28T10:03:00.000Z", + "source": "qa-test", + "metadata": { + "agentName": "Sales Bot", + "department": "Sales" + } + }, + "createdAt": "2026-01-28T10:03:00.000Z" +} +``` + +**Expected Results:** + +- Message processed successfully +- ClickHouse audit includes `resource_id = 'agent_sales_bot_001'` +- Metadata preserved in audit log + +--- + +#### TC-A04: Usage with Subscription ID + +| Field | Value | +| ----------------- | ---------------------------------- | +| **Test ID** | TC-A04 | +| **Priority** | P2 - Medium | +| **Prerequisites** | Customer has known subscription ID | + +**Test Message:** + +```json +{ + "taskId": "tc-a04-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-a04-evt-001", + "customerId": "test-payg-user-1", + "subscriptionId": "sub_12345", + "featureId": "feature-create-with-ai-raw", + "usageValue": 2, + "eventTimestamp": "2026-01-28T10:04:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:04:00.000Z" +} +``` + +**Expected Results:** + +- Message processed successfully +- Subscription ID included in ClickHouse audit + +--- + +### Category B: Validation Failure Tests + +#### TC-B01: Missing customerId + +| Field | Value | +| ----------------- | --------- | +| **Test ID** | TC-B01 | +| **Priority** | P1 - High | +| **Prerequisites** | None | + +**Test Message:** + +```json +{ + "taskId": "tc-b01-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-b01-evt-001", + "customerId": "", + "featureId": "feature-create-with-ai-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:05:00.000Z" + }, + "createdAt": "2026-01-28T10:05:00.000Z" +} +``` + +**Expected Results:** + +- Validation fails immediately +- Message sent to DLQ (no retries) +- Error: `INVALID_PAYLOAD` + +--- + +#### TC-B02: Missing featureId + +| Field | Value | +| ----------------- | --------- | +| **Test ID** | TC-B02 | +| **Priority** | P1 - High | +| **Prerequisites** | None | + +**Test Message:** + +```json +{ + "taskId": "tc-b02-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-b02-evt-001", + "customerId": "test-payg-user-1", + "featureId": "", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:06:00.000Z" + }, + "createdAt": "2026-01-28T10:06:00.000Z" +} +``` + +**Expected Results:** + +- Validation fails immediately +- Message sent to DLQ (no retries) + +--- + +#### TC-B03: Invalid usageValue (Negative) + +| Field | Value | +| ----------------- | --------- | +| **Test ID** | TC-B03 | +| **Priority** | P1 - High | +| **Prerequisites** | None | + +**Test Message:** + +```json +{ + "taskId": "tc-b03-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-b03-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": -5, + "eventTimestamp": "2026-01-28T10:07:00.000Z" + }, + "createdAt": "2026-01-28T10:07:00.000Z" +} +``` + +**Expected Results:** + +- Validation fails (usageValue must be positive) +- Message sent to DLQ + +--- + +#### TC-B04: Invalid usageValue (Zero) + +| Field | Value | +| ----------------- | --------- | +| **Test ID** | TC-B04 | +| **Priority** | P1 - High | +| **Prerequisites** | None | + +**Test Message:** + +```json +{ + "taskId": "tc-b04-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-b04-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 0, + "eventTimestamp": "2026-01-28T10:08:00.000Z" + }, + "createdAt": "2026-01-28T10:08:00.000Z" +} +``` + +**Expected Results:** + +- Validation fails (usageValue must be positive) +- Message sent to DLQ + +--- + +#### TC-B05: Invalid eventTimestamp Format + +| Field | Value | +| ----------------- | ----------- | +| **Test ID** | TC-B05 | +| **Priority** | P2 - Medium | +| **Prerequisites** | None | + +**Test Message:** + +```json +{ + "taskId": "tc-b05-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-b05-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 10, + "eventTimestamp": "2026/01/28 10:00:00" + }, + "createdAt": "2026-01-28T10:09:00.000Z" +} +``` + +**Expected Results:** + +- Validation fails (must be ISO 8601) +- Message sent to DLQ + +--- + +### Category C: Entitlement Failure Tests + +#### TC-C01: Customer Not Found in Stigg + +| Field | Value | +| ----------------- | ---------------------------- | +| **Test ID** | TC-C01 | +| **Priority** | P0 - Critical | +| **Prerequisites** | Use non-existent customer ID | + +**Test Message:** + +```json +{ + "taskId": "tc-c01-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-c01-evt-001", + "customerId": "customer-does-not-exist-xyz", + "featureId": "feature-create-with-ai-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:10:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:10:00.000Z" +} +``` + +**Expected Results:** + +- Entitlement check fails: `CUSTOMER_NOT_FOUND` +- Message sent to DLQ (not retryable) +- Error code: `ENTITLEMENT_DENIED` + +--- + +#### TC-C02: Customer Has No Subscription + +| Field | Value | +| ----------------- | ---------------------------------------------------------- | +| **Test ID** | TC-C02 | +| **Priority** | P0 - Critical | +| **Prerequisites** | Customer `test-payg-user-2` exists but has no subscription | + +**Test Message:** + +```json +{ + "taskId": "tc-c02-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-c02-evt-001", + "customerId": "test-payg-user-2", + "featureId": "feature-create-with-ai-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:11:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:11:00.000Z" +} +``` + +**Expected Results:** + +- Entitlement check fails: `NO_SUBSCRIPTION` +- Retries 2 times with exponential backoff (2s, 4s) +- After retries (3 total attempts), sent to DLQ +- Error code: `NO_ACTIVE_SUBSCRIPTION` + +--- + +#### TC-C03: Feature Not Found in Stigg + +| Field | Value | +| ----------------- | --------------------------- | +| **Test ID** | TC-C03 | +| **Priority** | P1 - High | +| **Prerequisites** | Use non-existent feature ID | + +**Test Message:** + +```json +{ + "taskId": "tc-c03-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-c03-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-does-not-exist", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:12:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:12:00.000Z" +} +``` + +**Expected Results:** + +- Entitlement check fails: `FEATURE_NOT_FOUND` +- Message sent to DLQ (not retryable) +- Error code: `ENTITLEMENT_DENIED` + +--- + +#### TC-C04: Feature Not In Customer's Plan + +| Field | Value | +| ----------------- | ----------------------------------------- | +| **Test ID** | TC-C04 | +| **Priority** | P0 - Critical | +| **Prerequisites** | Feature exists but not in customer's plan | + +**Test Message:** + +```json +{ + "taskId": "tc-c04-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-c04-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-enterprise-only", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:13:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:13:00.000Z" +} +``` + +**Expected Results:** + +- Entitlement check fails: `NO_FEATURE_ENTITLEMENT` +- Message sent to DLQ (not retryable) +- Error code: `ENTITLEMENT_DENIED` + +--- + +#### TC-C05: Subscription Expired + +| Field | Value | +| ----------------- | ---------------------------------------------------- | +| **Test ID** | TC-C05 | +| **Priority** | P1 - High | +| **Prerequisites** | Customer `test-payg-user-3` has expired subscription | + +**Test Message:** + +```json +{ + "taskId": "tc-c05-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-c05-evt-001", + "customerId": "test-payg-user-3", + "featureId": "feature-create-with-ai-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:14:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:14:00.000Z" +} +``` + +**Expected Results:** + +- Entitlement check fails: `SUBSCRIPTION_EXPIRED` +- Retries 2 times with exponential backoff (2s, 4s) +- After retries (3 total attempts), sent to DLQ +- Error code: `NO_ACTIVE_SUBSCRIPTION` + +--- + +### Category D: Idempotency Tests + +#### TC-D01: Duplicate eventId (Same Message Twice) + +| Field | Value | +| ----------------- | -------------------------------- | +| **Test ID** | TC-D01 | +| **Priority** | P0 - Critical | +| **Prerequisites** | Customer has active subscription | + +**Test Message (send TWICE with same eventId):** + +```json +{ + "taskId": "tc-d01-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-d01-idempotent-event", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 100, + "eventTimestamp": "2026-01-28T10:15:00.000Z", + "source": "qa-test-idempotency" + }, + "createdAt": "2026-01-28T10:15:00.000Z" +} +``` + +**Expected Results:** + +- First message: Usage deducted (100 units) +- Second message: Stigg returns cached result, NO double deduction +- ClickHouse has 2 audit entries (both `status = 'success'`) +- Stigg dashboard shows only 100 units consumed (not 200) + +--- + +#### TC-D02: Same eventId with Different usageValue + +| Field | Value | +| ----------------- | -------------------------------- | +| **Test ID** | TC-D02 | +| **Priority** | P1 - High | +| **Prerequisites** | Customer has active subscription | + +**Message 1:** + +```json +{ + "taskId": "tc-d02-task-1", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-d02-same-event-id", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 50, + "eventTimestamp": "2026-01-28T10:16:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:16:00.000Z" +} +``` + +**Message 2 (same eventId, different usageValue):** + +```json +{ + "taskId": "tc-d02-task-2", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-d02-same-event-id", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 75, + "eventTimestamp": "2026-01-28T10:17:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:17:00.000Z" +} +``` + +**Expected Results:** + +- First message's value (50) is used +- Second message is deduplicated by Stigg (ignored) +- Only 50 units charged to customer + +--- + +### Category E: Edge Cases + +#### TC-E01: Maximum usageValue + +| Field | Value | +| ----------------- | -------------------------------- | +| **Test ID** | TC-E01 | +| **Priority** | P2 - Medium | +| **Prerequisites** | Customer has active subscription | + +**Test Message:** + +```json +{ + "taskId": "tc-e01-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-e01-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 999999999, + "eventTimestamp": "2026-01-28T10:18:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:18:00.000Z" +} +``` + +**Expected Results:** + +- Message processed successfully (if within limit) +- Large usage value recorded correctly + +--- + +#### TC-E02: Exceeds Maximum usageValue + +| Field | Value | +| ----------------- | ----------- | +| **Test ID** | TC-E02 | +| **Priority** | P2 - Medium | +| **Prerequisites** | None | + +**Test Message:** + +```json +{ + "taskId": "tc-e02-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-e02-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 1000000001, + "eventTimestamp": "2026-01-28T10:19:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:19:00.000Z" +} +``` + +**Expected Results:** + +- Validation fails (exceeds 1 billion limit) +- Message sent to DLQ + +--- + +#### TC-E03: Maximum resourceId Length + +| Field | Value | +| ----------------- | -------------------------------- | +| **Test ID** | TC-E03 | +| **Priority** | P2 - Medium | +| **Prerequisites** | Customer has active subscription | + +**Test Message:** + +```json +{ + "taskId": "tc-e03-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-e03-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 1, + "resourceId": "12345678901234567890123456789012345678901234567890", + "eventTimestamp": "2026-01-28T10:20:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:20:00.000Z" +} +``` + +**Expected Results:** + +- Message processed successfully (exactly 50 chars) + +--- + +#### TC-E04: Exceeds resourceId Length + +| Field | Value | +| ----------------- | ----------- | +| **Test ID** | TC-E04 | +| **Priority** | P2 - Medium | +| **Prerequisites** | None | + +**Test Message:** + +```json +{ + "taskId": "tc-e04-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-e04-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 1, + "resourceId": "123456789012345678901234567890123456789012345678901", + "eventTimestamp": "2026-01-28T10:21:00.000Z", + "source": "qa-test" + }, + "createdAt": "2026-01-28T10:21:00.000Z" +} +``` + +**Expected Results:** + +- Validation fails (exceeds 50 char limit) +- Message sent to DLQ + +--- + +### Category F: Usage Limit Tests + +#### TC-F01: Usage Limit Exceeded (Overage Allowed) + +| Field | Value | +| ----------------- | --------------------------------- | +| **Test ID** | TC-F01 | +| **Priority** | P1 - High | +| **Prerequisites** | Customer has used all their quota | + +**Test Message:** + +```json +{ + "taskId": "tc-f01-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-f01-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-with-ai-raw", + "usageValue": 999999, + "eventTimestamp": "2026-01-28T10:22:00.000Z", + "source": "qa-test-overage" + }, + "createdAt": "2026-01-28T10:22:00.000Z" +} +``` + +**Expected Results:** + +- Entitlement check may return `USAGE_LIMIT_EXCEEDED` +- Worker allows this through (Stigg handles overage) +- Usage is reported and logged +- Check Stigg dashboard for overage billing + +--- + +### Category G: Retry and DLQ Tests + +#### TC-G01: Verify Retry Behavior + +| Field | Value | +| ----------------- | -------------------------------------------------- | +| **Test ID** | TC-G01 | +| **Priority** | P1 - High | +| **Prerequisites** | Customer without subscription (to trigger retries) | + +**Test Message:** + +```json +{ + "taskId": "tc-g01-task", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "tc-g01-evt-001", + "customerId": "test-payg-user-2", + "featureId": "feature-create-with-ai-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-28T10:23:00.000Z", + "source": "qa-test-retry" + }, + "createdAt": "2026-01-28T10:23:00.000Z" +} +``` + +**Expected Results:** + +- First attempt fails +- Retry after 2 seconds +- Retry after 4 seconds (6s total) +- Sent to DLQ after 2 retries (3 total attempts) +- Total time: ~6 seconds + +**Verification:** + +- Watch logs for retry messages +- Check DLQ after ~10 seconds + +--- + +#### TC-G02: Verify DLQ Message Format + +| Field | Value | +| ----------------- | --------------------------------- | +| **Test ID** | TC-G02 | +| **Priority** | P1 - High | +| **Prerequisites** | Message in DLQ from previous test | + +**Steps:** + +1. Go to RabbitMQ Management UI +2. Navigate to `credit-deduction-dlq` +3. Get message from queue + +**Expected DLQ Message Headers:** + +- `x-dlq-failure-reason`: Error description +- `x-dlq-failed-at`: ISO timestamp +- `x-dlq-worker`: `CreditDeductionWorker` + +**Expected DLQ Message Body:** + +- Original message preserved exactly (can be replayed) + +--- + +#### TC-G03: DLQ Message Replay + +| Field | Value | +| ----------------- | ----------------------------------------- | +| **Test ID** | TC-G03 | +| **Priority** | P2 - Medium | +| **Prerequisites** | Message in DLQ, cause of failure resolved | + +**Steps:** + +1. Get message from `credit-deduction-dlq` +2. Update `attemptCount` to 0 (or remove it) +3. Publish to `credit-deduction-queue` +4. Verify message processes successfully + +--- + +## 8. Verification Queries + +### New Relic NRQL Queries + +#### All Usage Events (Last Hour) + +```sql +SELECT * FROM EventProcessed +WHERE workerName = 'credit-deduction' +SINCE 1 hour ago +``` + +#### Success vs Failure Rate + +```sql +SELECT count(*) FROM EventProcessed +WHERE workerName = 'credit-deduction' +FACET success +SINCE 1 hour ago +``` + +#### Failed Events by Reason + +```sql +SELECT count(*) FROM EventProcessed +WHERE workerName = 'credit-deduction' AND success = false +FACET failureReason +SINCE 1 hour ago +``` + +#### External API Latency + +```sql +SELECT average(latencyMs), percentile(latencyMs, 95) +FROM ExternalApiCall +FACET service, operation +SINCE 1 hour ago TIMESERIES +``` + +#### Worker Heartbeat (Health Check) + +```sql +SELECT latest(timestamp) FROM WorkerHeartbeat +WHERE workerName = 'CreditDeductionWorker' +SINCE 5 minutes ago +``` + +### ClickHouse SQL Queries + +#### All Audit Entries (Recent) + +```sql +SELECT * FROM credit_audit +ORDER BY processed_at DESC +LIMIT 100; +``` + +#### Events by Status + +```sql +SELECT status, count(*) as count +FROM credit_audit +WHERE processed_at > now() - INTERVAL 1 HOUR +GROUP BY status; +``` + +#### Failed Events with Errors + +```sql +SELECT event_id, customer_id, feature_id, error_message, processed_at +FROM credit_audit +WHERE status = 'failed' +ORDER BY processed_at DESC +LIMIT 50; +``` + +#### Usage by Customer + +```sql +SELECT customer_id, sum(usage_value) as total_usage, count(*) as event_count +FROM credit_audit +WHERE status = 'success' +GROUP BY customer_id +ORDER BY total_usage DESC; +``` + +#### Specific Event Lookup + +```sql +SELECT * FROM credit_audit +WHERE event_id = 'YOUR_EVENT_ID_HERE'; +``` + +--- + +## 9. How to Publish Test Messages + +### Option 1: RabbitMQ Management UI + +1. Open RabbitMQ Management UI (port 15672) +2. Navigate to **Queues** tab +3. Click on `credit-deduction-queue` +4. Expand **Publish message** section +5. Set **Properties**: + - `content_type: application/json` +6. Paste JSON message in **Payload** +7. Click **Publish message** + +### Option 2: Command Line (rabbitmqadmin) + +```bash +rabbitmqadmin publish \ + exchange=amq.default \ + routing_key=credit-deduction-queue \ + properties='{"content_type":"application/json"}' \ + payload='' +``` + +### Option 3: Port Forward to Local + +```bash +# Port forward RabbitMQ +kubectl port-forward svc/rabbitmq 15672:15672 -n smallest-dev-aps1 + +# Access at http://localhost:15672 +``` + +--- + +## 10. Test Execution Checklist + +### Pre-Test Setup + +| Step | Action | Status | +| ---- | ------------------------------------ | ------ | +| 1 | Verify console-backend is running | [ ] | +| 2 | Verify RabbitMQ is accessible | [ ] | +| 3 | Verify ClickHouse is accessible | [ ] | +| 4 | Verify Stigg sidecar is running | [ ] | +| 5 | Verify test customers exist in Stigg | [ ] | +| 6 | Clear any old test data (optional) | [ ] | + +### Test Execution + +| Test ID | Test Name | Priority | Status | Notes | +| ------- | ----------------------------- | -------- | ------ | ----- | +| TC-A01 | Basic Usage Report | P0 | [ ] | | +| TC-A02 | Multiple Usage Reports | P0 | [ ] | | +| TC-A03 | Usage with Resource ID | P1 | [ ] | | +| TC-A04 | Usage with Subscription ID | P2 | [ ] | | +| TC-B01 | Missing customerId | P1 | [ ] | | +| TC-B02 | Missing featureId | P1 | [ ] | | +| TC-B03 | Invalid usageValue (Negative) | P1 | [ ] | | +| TC-B04 | Invalid usageValue (Zero) | P1 | [ ] | | +| TC-B05 | Invalid eventTimestamp | P2 | [ ] | | +| TC-C01 | Customer Not Found | P0 | [ ] | | +| TC-C02 | No Subscription | P0 | [ ] | | +| TC-C03 | Feature Not Found | P1 | [ ] | | +| TC-C04 | Feature Not In Plan | P0 | [ ] | | +| TC-C05 | Subscription Expired | P1 | [ ] | | +| TC-D01 | Duplicate eventId | P0 | [ ] | | +| TC-D02 | Same eventId, Different Value | P1 | [ ] | | +| TC-E01 | Maximum usageValue | P2 | [ ] | | +| TC-E02 | Exceeds Maximum usageValue | P2 | [ ] | | +| TC-E03 | Maximum resourceId Length | P2 | [ ] | | +| TC-E04 | Exceeds resourceId Length | P2 | [ ] | | +| TC-F01 | Usage Limit Exceeded | P1 | [ ] | | +| TC-G01 | Verify Retry Behavior | P1 | [ ] | | +| TC-G02 | Verify DLQ Message Format | P1 | [ ] | | +| TC-G03 | DLQ Message Replay | P2 | [ ] | | + +### Post-Test Verification + +| Step | Action | Status | +| ---- | -------------------------------------------- | ------ | +| 1 | Verify all expected ClickHouse entries | [ ] | +| 2 | Verify New Relic metrics | [ ] | +| 3 | Verify Stigg usage in dashboard | [ ] | +| 4 | Verify DLQ contains expected failed messages | [ ] | +| 5 | Clean up test data | [ ] | + +--- + +## 11. Troubleshooting + +### Messages Not Being Processed + +1. Check if worker is running: + ```bash + kubectl logs -f deployment/console-backend -n smallest-dev-aps1 | grep CreditDeductionWorker + ``` +2. Look for "Started successfully" message +3. Verify queue has consumers in RabbitMQ UI + +### Messages Going to DLQ Unexpectedly + +1. Check DLQ message headers for `x-dlq-failure-reason` +2. Look up error code in Error Codes table +3. Verify customer exists in Stigg +4. Verify feature is in customer's plan + +### ClickHouse Not Receiving Logs + +1. Check for "CLICKHOUSE_AUDIT_FALLBACK" in console logs +2. Verify ClickHouse connectivity +3. Check console-backend logs for ClickHouse errors + +### Stigg API Errors + +1. Check stigg-proxy logs: + ```bash + kubectl logs -f deployment/stigg-proxy -n smallest-dev-aps1 + ``` +2. Verify stigg-sidecar is running +3. Check API key configuration + +--- + +## 12. Contact + +| Role | Name | Contact | +| ------------- | --------- | --------------------- | +| Feature Owner | Pratiksha | pratiksha@smallest.ai | +| Backend Team | - | #backend-team | +| DevOps | - | #devops | diff --git a/fern/products/atoms/pages/testing-credit-deduction-worker.md b/fern/products/atoms/pages/testing-credit-deduction-worker.md new file mode 100644 index 0000000..76650ec --- /dev/null +++ b/fern/products/atoms/pages/testing-credit-deduction-worker.md @@ -0,0 +1,634 @@ +# Credit Deduction Worker - Testing Guide + +## Overview + +The credit deduction worker processes usage events from RabbitMQ and reports them to Stigg. + +**Design Philosophy: Validate Before Reporting** + +- ✅ **Entitlement check** - Verify feature is in customer's plan (via sidecar cache) +- ✅ **No planId in payload** - Stigg knows the customer's plan automatically +- ✅ **Stigg handles overage** - Usage beyond limits is handled by Stigg +- ✅ **Usage is never lost** - Events retry and go to DLQ for manual review +- ❌ **NO auto-provisioning** - Subscriptions must exist beforehand + +--- + +## Queue Names + +- **Main Queue**: `credit-deduction-queue` +- **Dead Letter Queue**: `credit-deduction-dlq` + +--- + +## Message Format + +**IMPORTANT:** Messages must be wrapped in a `WorkerTask` structure: + +```json +{ + "taskId": "unique-task-id", + "taskType": "REPORT_USAGE", + "payload": { + // Your actual data here + }, + "createdAt": "2026-01-22T12:00:00.000Z" +} +``` + +| Field | Type | Required | Description | +| -------------- | ------------ | -------- | ---------------------------- | +| `taskId` | string | ✅ | Unique task identifier | +| `taskType` | string | ✅ | Task type (`REPORT_USAGE`) | +| `payload` | object | ✅ | The actual usage report data | +| `createdAt` | ISO datetime | ✅ | When the task was created | +| `attemptCount` | number | ❌ | Retry count (default: 0) | +| `priority` | number | ❌ | Message priority | + +--- + +## Validation Checks + +**Optimized Flow (single sidecar call):** + +1. Validate payload fields (sync, instant) +2. **Entitlement check** (Stigg Sidecar cache - <1ms!) - covers ALL checks +3. Report usage to Stigg + +| Check | Error Code | Retryable? | Description | +| ----------------------- | ------------------------ | ---------- | ------------------------------------------------------------- | +| Basic payload | `INVALID_PAYLOAD` | ❌ No | Missing customerId, featureId, eventId, or invalid usageValue | +| Customer not found | `ENTITLEMENT_DENIED` | ❌ No | Customer doesn't exist in Stigg | +| No subscription | `NO_ACTIVE_SUBSCRIPTION` | ✅ Yes | Customer has no subscription | +| Subscription expired | `NO_ACTIVE_SUBSCRIPTION` | ✅ Yes | Customer's subscription expired | +| Feature not found | `ENTITLEMENT_DENIED` | ❌ No | Feature doesn't exist in Stigg | +| **Feature not in plan** | `ENTITLEMENT_DENIED` | ❌ No | Feature not included in customer's plan | +| Usage limit exceeded | ⚠️ Allowed | N/A | Stigg handles overage automatically | +| Sidecar errors | `STIGG_ERROR` | ✅ Yes | Network/API errors when calling Stigg | + +**What we DON'T block on:** + +- ❌ planId in payload - Stigg applies usage to customer's active subscription +- ❌ Usage limits - Stigg handles overage automatically + +--- + +## Your Stigg Customers (Dev) + +| Name | Customer ID (use this in `customerId`) | +| ---------------- | -------------------------------------- | +| Test PAYG User 1 | `test-payg-user-1` | +| Test PAYG User 2 | `test-payg-user-2` | +| Test PAYG User 3 | `test-payg-user-3` | + +--- + +## Available Features (plan-payg) + +| Feature Name | Feature ID | Type | +| --------------------------- | ----------------------------- | ------------------------------ | +| Create with AI (Raw Events) | `feature-create-ai-agent-raw` | Metered (0.5 credits/creation) | +| TTS V2 Access | `feature-tts-v2-access` | Boolean | + +--- + +## Test Scenarios + +### Scenario 1: Happy Path - Customer with Active Subscription + +Test a customer who has an active subscription and reports usage. +**Prerequisite:** Customer must have an active subscription in Stigg! + +```json +{ + "taskId": "test-task-001", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-001", + "customerId": "test-payg-user-1", + "featureId": "feature-create-ai-agent-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-22T12:00:00.000Z", + "source": "manual-test" + }, + "createdAt": "2026-01-22T12:00:00.000Z" +} +``` + +**Expected Behavior:** + +- `getEntitlement` returns `hasAccess: true` +- Usage reported to Stigg (10 creations × 0.5 credits = 5 credits) +- ClickHouse audit log with `status: success` +- New Relic event: `UsageReportEvent` with `status: success` + +--- + +### Scenario 2: No Active Subscription (Failure Case) + +Test a customer who has NO active subscription - should fail and retry. + +```json +{ + "taskId": "test-task-002", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-002", + "customerId": "test-payg-user-2", + "featureId": "feature-create-ai-agent-raw", + "usageValue": 5, + "eventTimestamp": "2026-01-22T12:05:00.000Z", + "source": "manual-test" + }, + "createdAt": "2026-01-22T12:05:00.000Z" +} +``` + +**Expected Behavior (when customer has NO subscription):** + +- Validation fails with `NO_ACTIVE_SUBSCRIPTION` error code +- Message retries 3 times (2s, 4s, 8s exponential backoff) +- After retries exhausted → goes to DLQ +- New Relic event: `UsageValidationFailed` with `errorCode: NO_ACTIVE_SUBSCRIPTION` +- ClickHouse audit log with `status: failed` + +**To make this test pass:** First create a subscription for the customer in Stigg! + +--- + +### Scenario 3: Multiple Usage Reports (Same Customer) + +Report multiple usage events for the same customer: + +**First Event:** + +```json +{ + "taskId": "test-task-003a", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-003a", + "customerId": "test-payg-user-1", + "featureId": "feature-create-ai-agent-raw", + "usageValue": 1, + "eventTimestamp": "2026-01-22T12:10:00.000Z", + "source": "manual-test" + }, + "createdAt": "2026-01-22T12:10:00.000Z" +} +``` + +**Second Event:** + +```json +{ + "taskId": "test-task-003b", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-003b", + "customerId": "test-payg-user-1", + "featureId": "feature-create-ai-agent-raw", + "usageValue": 2, + "eventTimestamp": "2026-01-22T12:11:00.000Z", + "source": "manual-test" + }, + "createdAt": "2026-01-22T12:11:00.000Z" +} +``` + +**Expected Behavior:** + +- Both events processed successfully +- Total usage: 3 creations = 1.5 credits consumed + +--- + +### Scenario 4: Idempotency Test (Same Event ID) + +Send the **same message twice** to verify Stigg's deduplication: + +```json +{ + "taskId": "test-task-004", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-004-duplicate", + "customerId": "test-payg-user-1", + "featureId": "feature-create-ai-agent-raw", + "usageValue": 100, + "eventTimestamp": "2026-01-22T12:15:00.000Z", + "source": "manual-test-idempotency" + }, + "createdAt": "2026-01-22T12:15:00.000Z" +} +``` + +**Expected Behavior:** + +- First message: Usage deducted (100 creations = 50 credits) +- Second message: Stigg returns cached result, NO double deduction +- ClickHouse will have 2 entries (audit only), but Stigg only charged once + +--- + +### Scenario 5: Invalid Customer (Failure Case) + +Test with a non-existent customer to trigger retry/DLQ: + +```json +{ + "taskId": "test-task-005", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-005", + "customerId": "invalid_customer_xyz", + "featureId": "feature-create-ai-agent-raw", + "usageValue": 10, + "eventTimestamp": "2026-01-22T12:20:00.000Z", + "source": "manual-test-failure" + }, + "createdAt": "2026-01-22T12:20:00.000Z" +} +``` + +**Expected Behavior:** + +- Will retry 3 times with exponential backoff (2s, 4s, 8s) +- After max retries, sent to DLQ: `credit-deduction-dlq` +- New Relic event: `UsageReportDLQ` +- ClickHouse logs with `status: failed` + +--- + +### Scenario 6: Invalid Feature ID (Feature Doesn't Exist in Stigg) + +Test with a non-existent feature - Stigg will return an error: + +```json +{ + "taskId": "test-task-006", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-006", + "customerId": "test-payg-user-1", + "featureId": "feature-does-not-exist", + "usageValue": 10, + "eventTimestamp": "2026-01-22T12:25:00.000Z", + "source": "manual-test-invalid-feature" + }, + "createdAt": "2026-01-22T12:25:00.000Z" +} +``` + +**Expected Behavior:** + +- Entitlement check fails: `hasAccess: false`, `accessDeniedReason: FEATURE_NOT_FOUND (4)` +- Validation fails with `ENTITLEMENT_DENIED` error code +- Message goes to DLQ (not retryable) +- ClickHouse logs with `status: failed` + +--- + +### Scenario 7: Feature NOT in Customer's Plan (Critical!) + +Test with a feature that **exists in Stigg** but is **NOT in the customer's plan**: + +```json +{ + "taskId": "test-task-007", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-007", + "customerId": "test-payg-user-1", + "featureId": "feature-enterprise-only", + "usageValue": 10, + "eventTimestamp": "2026-01-22T12:27:00.000Z", + "source": "manual-test-feature-not-in-plan" + }, + "createdAt": "2026-01-22T12:27:00.000Z" +} +``` + +**Expected Behavior:** + +- Entitlement check fails: `hasAccess: false`, `accessDeniedReason: NO_FEATURE_ENTITLEMENT (2)` +- Validation fails with `ENTITLEMENT_DENIED` error code +- Message goes to DLQ (not retryable - feature won't magically appear in plan) +- New Relic event: `UsageValidationFailed` with details +- ClickHouse logs with `status: failed` + +**⚠️ Why This Matters:** Without this check, usage would be reported to Stigg even for features not in the plan! + +--- + +### Scenario 8: Usage Limit Exceeded (Overage) + +Test when customer has exceeded their usage quota: + +```json +{ + "taskId": "test-task-008", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-008", + "customerId": "test-payg-user-1", + "featureId": "feature-create-ai-agent-raw", + "usageValue": 999999, + "eventTimestamp": "2026-01-22T12:28:00.000Z", + "source": "manual-test-overage" + }, + "createdAt": "2026-01-22T12:28:00.000Z" +} +``` + +**Expected Behavior (depends on plan configuration):** + +- If plan allows overage: ✅ Success, Stigg tracks overage +- If plan blocks overage: Entitlement check fails with `accessDeniedReason: USAGE_LIMIT_EXCEEDED (3)` +- Check Stigg dashboard for overage webhook + +--- + +### Scenario 9: Invalid Payload (Missing Required Fields) + +Test with missing required fields: + +```json +{ + "taskId": "test-task-009", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-009", + "customerId": "", + "featureId": "feature-create-ai-agent-raw", + "usageValue": -5, + "eventTimestamp": "2026-01-22T12:29:00.000Z" + }, + "createdAt": "2026-01-22T12:29:00.000Z" +} +``` + +**Expected Behavior:** + +- Validation fails immediately with `INVALID_PAYLOAD` error +- NOT retryable (bad data won't fix itself) +- Goes to DLQ immediately +- ClickHouse logs with error details + +--- + +### Scenario 10: Resource-Specific Usage + +Test with a resource ID (e.g., per-agent usage): + +```json +{ + "taskId": "test-task-010", + "taskType": "REPORT_USAGE", + "payload": { + "eventId": "test-evt-010", + "customerId": "test-payg-user-1", + "featureId": "feature-create-ai-agent-raw", + "usageValue": 5, + "resourceId": "agent_abc123", + "eventTimestamp": "2026-01-22T12:30:00.000Z", + "source": "manual-test", + "metadata": { + "agentName": "Sales Bot", + "callId": "call_xyz" + } + }, + "createdAt": "2026-01-22T12:30:00.000Z" +} +``` + +**Expected Behavior:** + +- Entitlement check passes +- Usage reported to Stigg with resource ID +- ClickHouse audit includes `resource_id` + +--- + +## Publishing Messages to RabbitMQ + +### Via RabbitMQ Management Console + +1. Go to RabbitMQ Management UI (usually port 15672) +2. Navigate to **Queues** → `credit-deduction-queue` +3. Click **Publish message** +4. Set **Properties**: `content_type: application/json` +5. Paste the JSON payload + +### Via CLI + +```bash +rabbitmqadmin publish exchange=amq.default routing_key=credit-deduction-queue \ + payload='{"eventId":"test-001","customerId":"cust1","featureId":"api-calls","usageValue":10,"eventTimestamp":"2026-01-21T12:00:00.000Z"}' +``` + +--- + +## New Relic Queries (NRQL) + +### All Usage Report Events + +```sql +SELECT * FROM UsageReportEvent +WHERE appName = 'console-backend' +SINCE 1 hour ago +``` + +### Success vs Failure Rate + +```sql +SELECT count(*) FROM UsageReportEvent +FACET status +SINCE 1 hour ago +``` + +### Auto-Provisioned Subscriptions + +```sql +SELECT * FROM SubscriptionProvisioned +WHERE appName = 'console-backend' +SINCE 1 hour ago +``` + +### DLQ Messages (Failed after retries) + +```sql +SELECT * FROM UsageReportDLQ +WHERE appName = 'console-backend' +SINCE 1 hour ago +``` + +### Stigg API Health + +```sql +SELECT average(duration), percentage(count(*), WHERE success = true) as 'Success Rate' +FROM StiggApiCall +FACET operation +SINCE 1 hour ago TIMESERIES +``` + +### Processing Latency by Feature + +```sql +SELECT average(duration), percentile(duration, 95) +FROM UsageReportEvent +WHERE status = 'success' +FACET featureId +SINCE 1 hour ago +``` + +### Worker Heartbeat (Health Check) + +```sql +SELECT latest(timestamp) FROM WorkerHeartbeat +WHERE workerName = 'CreditDeductionWorker' +SINCE 5 minutes ago +``` + +### Usage by Customer + +```sql +SELECT sum(usageValue) FROM UsageReportEvent +WHERE status = 'success' +FACET customerId +SINCE 1 hour ago +``` + +--- + +## ClickHouse Queries (Audit Logs) + +### All Events + +```sql +SELECT * FROM credit_audit +ORDER BY processed_at DESC +LIMIT 100; +``` + +### Success vs Failure + +```sql +SELECT status, count(*) +FROM credit_audit +WHERE processed_at > now() - INTERVAL 1 HOUR +GROUP BY status; +``` + +### Auto-provisioned Subscriptions + +```sql +SELECT * FROM credit_audit +WHERE JSONExtractBool(metadata, 'wasProvisioned') = true +ORDER BY processed_at DESC; +``` + +### Failed Events by Customer + +```sql +SELECT customer_id, count(*), max(error_message) +FROM credit_audit +WHERE status = 'failed' +GROUP BY customer_id; +``` + +--- + +## Test Checklist + +| # | Test | Task ID | Expected | NR Event | +| --- | -------------------------------- | ---------------- | ----------------------------- | ------------------------------------------------ | +| 1 | Happy path (active subscription) | test-task-001 | ✅ Success | `UsageReportEvent` | +| 2 | No active subscription | test-task-002 | ❌ Fail → DLQ | `UsageValidationFailed` (NO_ACTIVE_SUBSCRIPTION) | +| 3 | Multiple usage reports | test-task-003a/b | ✅ Success x2 | `UsageReportEvent` x2 | +| 4 | Idempotency (same eventId x2) | test-task-004 x2 | ✅ Success (no double charge) | `UsageReportEvent` x2 | +| 5 | Invalid customer | test-task-005 | ❌ Fail → DLQ | `UsageValidationFailed` (NO_ACTIVE_SUBSCRIPTION) | +| 6 | Feature doesn't exist in Stigg | test-task-006 | ❌ Fail → DLQ | `UsageValidationFailed` (ENTITLEMENT_DENIED) | +| 7 | **Feature NOT in plan** | test-task-007 | ❌ Fail → DLQ | `UsageValidationFailed` (ENTITLEMENT_DENIED) | +| 8 | Usage limit exceeded | test-task-008 | ⚠️ Depends on plan | `UsageReportEvent` or `UsageValidationFailed` | +| 9 | Invalid payload | test-task-009 | ❌ Fail → DLQ | `UsageValidationFailed` (INVALID_PAYLOAD) | +| 10 | With resource ID | test-task-010 | ✅ Success | `UsageReportEvent` | + +**AccessDeniedReason Codes (from Stigg):** +| Code | Reason | Retryable? | +|------|--------|------------| +| 1 | NO_SUBSCRIPTION | ✅ Yes | +| 2 | NO_FEATURE_ENTITLEMENT | ❌ No | +| 3 | USAGE_LIMIT_EXCEEDED | ❌ No | +| 4 | FEATURE_NOT_FOUND | ❌ No | +| 5 | SUBSCRIPTION_EXPIRED | ✅ Yes | +| 8 | CUSTOMER_NOT_FOUND | ❌ No | + +--- + +## Troubleshooting + +### Queues Not Created + +- Check pod is running: `kubectl get pods -n dev` +- Check logs: `kubectl logs -f deployment/console-backend -n dev` +- Verify RabbitMQ URL in secrets + +### Messages Not Processing + +- Check worker started: Look for `[CreditDeductionWorker] Started successfully` in logs +- Check Stigg sidecar connectivity +- Verify ClickHouse connectivity + +### DLQ Building Up + +- Check New Relic for `UsageReportDLQ` events +- Review error messages in ClickHouse +- Common issues: Invalid customer, Stigg API errors, network issues + +--- + +## Message Payload Schema + +```typescript +{ + // Required fields + eventId: string; // Unique ID - used as Stigg idempotency key + customerId: string; // Customer/Organization ID + featureId: string; // Feature ID (e.g., 'api-calls', 'tts-characters') + usageValue: number; // Raw usage value (positive number) + eventTimestamp: string; // ISO 8601 datetime + + // Optional fields + subscriptionId?: string; // Subscription ID (if known) + source?: string; // Event source (e.g., 'atoms-backend', 'waves-backend') + resourceId?: string; // Resource ID for per-resource tracking + metadata?: object; // Additional metadata +} +``` + +--- + +## Architecture + +``` +┌─────────────────┐ ┌──────────────────────┐ ┌─────────────────┐ +│ atoms-backend │─────▶│ credit-deduction │─────▶│ Stigg Sidecar │ +│ waves-backend │ │ queue (RabbitMQ) │ │ (gRPC) │ +└─────────────────┘ └──────────────────────┘ └─────────────────┘ + │ │ + ▼ │ + ┌──────────────────┐ │ + │ CreditDeduction │◀─────────────────┘ + │ Worker │ + └──────────────────┘ + │ + ┌──────────────┼──────────────┐ + ▼ ▼ ▼ + ┌─────────────┐ ┌──────────┐ ┌──────────────┐ + │ ClickHouse │ │ New Relic│ │ Stigg Backend│ + │ (Audit Log) │ │ (Metrics)│ │ (Provision) │ + └─────────────┘ └──────────┘ └──────────────┘ +``` + +# Trigger build diff --git a/fern/products/waves/pages/audio/stt-sample-audio.wav b/fern/products/waves/pages/audio/stt-sample-audio.wav new file mode 100644 index 0000000..5f9e2f4 Binary files /dev/null and b/fern/products/waves/pages/audio/stt-sample-audio.wav differ diff --git a/fern/products/waves/pages/audio/tts-sample-hello.wav b/fern/products/waves/pages/audio/tts-sample-hello.wav new file mode 100644 index 0000000..ade03e7 Binary files /dev/null and b/fern/products/waves/pages/audio/tts-sample-hello.wav differ diff --git a/fern/products/waves/pages/getting-started/models.mdx b/fern/products/waves/pages/getting-started/models.mdx deleted file mode 100644 index 2a0cfa8..0000000 --- a/fern/products/waves/pages/getting-started/models.mdx +++ /dev/null @@ -1,28 +0,0 @@ ---- -title: "Models" -description: "Find detailed description of each model along with their capabilities and supported languages." -icon: "cube" ---- - - - - Our fastest model, optimized for low-latency applications. It can generate 10 seconds of audio in just 100 milliseconds, making it ideal for real-time applications such as voicebots and interactive systems. - - - Offers more emotional depth and expressiveness compared to the Lightning model. It supports voice cloning and has a latency of just under 300 milliseconds, making it suitable for applications requiring high-quality, expressive speech. - - - - -## Model Overview - -| Model ID | Description | Languages Supported | -|-----------------------|-----------------------------------------------------------------------------|---------------------| -| **lightning** | Fastest model with an RTF of 0.01, generating 10 seconds of audio in 100 ms. | English, Hindi | -| **lightning-large** | More emotional depth and expressiveness, supports voice cloning, latency under 300 ms. | English, Hindi | -| **lightning-multilingual** | Supports 30 languages, currently in beta. | 30 languages | - - -## Pricing - -Our pricing model is designed to be flexible and scalable, catering to different usage needs. For detailed pricing information, please visit our [pricing page](https://smallest.ai/text-to-speech) or contact our sales team at [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/pages/images/agent-dashboard-conversions.png b/fern/products/waves/pages/images/agent-dashboard-conversions.png new file mode 100644 index 0000000..edf540d Binary files /dev/null and b/fern/products/waves/pages/images/agent-dashboard-conversions.png differ diff --git a/fern/products/waves/pages/images/agent-dashboard.png b/fern/products/waves/pages/images/agent-dashboard.png new file mode 100644 index 0000000..888291a Binary files /dev/null and b/fern/products/waves/pages/images/agent-dashboard.png differ diff --git a/fern/products/waves/pages/images/api-keys-page-create-button.png b/fern/products/waves/pages/images/api-keys-page-create-button.png new file mode 100644 index 0000000..d7ffbcb Binary files /dev/null and b/fern/products/waves/pages/images/api-keys-page-create-button.png differ diff --git a/fern/products/waves/pages/images/checks-passed.png b/fern/products/waves/pages/images/checks-passed.png new file mode 100644 index 0000000..3303c77 Binary files /dev/null and b/fern/products/waves/pages/images/checks-passed.png differ diff --git a/fern/products/waves/pages/images/console-api-keys.png b/fern/products/waves/pages/images/console-api-keys.png new file mode 100644 index 0000000..f6e52e7 Binary files /dev/null and b/fern/products/waves/pages/images/console-api-keys.png differ diff --git a/fern/products/waves/pages/images/conversions-list.png b/fern/products/waves/pages/images/conversions-list.png new file mode 100644 index 0000000..aa46a94 Binary files /dev/null and b/fern/products/waves/pages/images/conversions-list.png differ diff --git a/fern/products/waves/pages/images/create-api-key-modal.png b/fern/products/waves/pages/images/create-api-key-modal.png new file mode 100644 index 0000000..b5f77c8 Binary files /dev/null and b/fern/products/waves/pages/images/create-api-key-modal.png differ diff --git a/fern/products/waves/pages/images/create-audience.png b/fern/products/waves/pages/images/create-audience.png new file mode 100644 index 0000000..e1e062d Binary files /dev/null and b/fern/products/waves/pages/images/create-audience.png differ diff --git a/fern/products/waves/pages/images/create-campaign.png b/fern/products/waves/pages/images/create-campaign.png new file mode 100644 index 0000000..a7efb9d Binary files /dev/null and b/fern/products/waves/pages/images/create-campaign.png differ diff --git a/fern/products/waves/pages/images/create-conversion.png b/fern/products/waves/pages/images/create-conversion.png new file mode 100644 index 0000000..2e95316 Binary files /dev/null and b/fern/products/waves/pages/images/create-conversion.png differ diff --git a/fern/products/waves/pages/images/download.svg b/fern/products/waves/pages/images/download.svg new file mode 100644 index 0000000..f41f94e --- /dev/null +++ b/fern/products/waves/pages/images/download.svg @@ -0,0 +1,3 @@ + + + diff --git a/fern/products/waves/pages/images/hero-dark.svg b/fern/products/waves/pages/images/hero-dark.svg new file mode 100644 index 0000000..c6a30e8 --- /dev/null +++ b/fern/products/waves/pages/images/hero-dark.svg @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/images/hero-light.svg b/fern/products/waves/pages/images/hero-light.svg new file mode 100644 index 0000000..297d68f --- /dev/null +++ b/fern/products/waves/pages/images/hero-light.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/images/ivc-image-1.png b/fern/products/waves/pages/images/ivc-image-1.png new file mode 100644 index 0000000..5c18503 Binary files /dev/null and b/fern/products/waves/pages/images/ivc-image-1.png differ diff --git a/fern/products/waves/pages/images/ivc-image-2.png b/fern/products/waves/pages/images/ivc-image-2.png new file mode 100644 index 0000000..b4d3f32 Binary files /dev/null and b/fern/products/waves/pages/images/ivc-image-2.png differ diff --git a/fern/products/waves/pages/images/ivc-image-3.png b/fern/products/waves/pages/images/ivc-image-3.png new file mode 100644 index 0000000..0cf238e Binary files /dev/null and b/fern/products/waves/pages/images/ivc-image-3.png differ diff --git a/fern/products/waves/pages/images/ivc-image-4.png b/fern/products/waves/pages/images/ivc-image-4.png new file mode 100644 index 0000000..5ecf5d6 Binary files /dev/null and b/fern/products/waves/pages/images/ivc-image-4.png differ diff --git a/fern/products/waves/pages/images/lightning_cover.png b/fern/products/waves/pages/images/lightning_cover.png new file mode 100644 index 0000000..acaebb8 Binary files /dev/null and b/fern/products/waves/pages/images/lightning_cover.png differ diff --git a/fern/products/waves/pages/images/platform-switcher-settings.png b/fern/products/waves/pages/images/platform-switcher-settings.png new file mode 100644 index 0000000..3ffcdc4 Binary files /dev/null and b/fern/products/waves/pages/images/platform-switcher-settings.png differ diff --git a/fern/products/waves/pages/images/pvc_page.png b/fern/products/waves/pages/images/pvc_page.png new file mode 100644 index 0000000..8bf1c5e Binary files /dev/null and b/fern/products/waves/pages/images/pvc_page.png differ diff --git a/fern/products/waves/pages/images/save-campaign.png b/fern/products/waves/pages/images/save-campaign.png new file mode 100644 index 0000000..972b0df Binary files /dev/null and b/fern/products/waves/pages/images/save-campaign.png differ diff --git a/fern/products/waves/pages/images/sign-up-page.png b/fern/products/waves/pages/images/sign-up-page.png new file mode 100644 index 0000000..897301e Binary files /dev/null and b/fern/products/waves/pages/images/sign-up-page.png differ diff --git a/fern/products/waves/pages/images/smallest_cover.jpeg b/fern/products/waves/pages/images/smallest_cover.jpeg new file mode 100644 index 0000000..2f584f4 Binary files /dev/null and b/fern/products/waves/pages/images/smallest_cover.jpeg differ diff --git a/fern/products/waves/pages/images/started-campaign.png b/fern/products/waves/pages/images/started-campaign.png new file mode 100644 index 0000000..0c1857a Binary files /dev/null and b/fern/products/waves/pages/images/started-campaign.png differ diff --git a/fern/products/waves/pages/images/test-agent.png b/fern/products/waves/pages/images/test-agent.png new file mode 100644 index 0000000..f90fbec Binary files /dev/null and b/fern/products/waves/pages/images/test-agent.png differ diff --git a/fern/products/waves/pages/images/thunder.png b/fern/products/waves/pages/images/thunder.png new file mode 100644 index 0000000..2a64ee9 Binary files /dev/null and b/fern/products/waves/pages/images/thunder.png differ diff --git a/fern/products/waves/pages/images/thunder.svg b/fern/products/waves/pages/images/thunder.svg new file mode 100644 index 0000000..35f6a0e --- /dev/null +++ b/fern/products/waves/pages/images/thunder.svg @@ -0,0 +1,26 @@ + + + + + + + + + + image/svg+xml + + + + + Openclipart + + + + + + + + + + + diff --git a/fern/products/waves/pages/api-references/add-voice-api.mdx b/fern/products/waves/pages/v2.2.0/api-references/add-voice-api.mdx similarity index 82% rename from fern/products/waves/pages/api-references/add-voice-api.mdx rename to fern/products/waves/pages/v2.2.0/api-references/add-voice-api.mdx index fa4fc8c..b1911c3 100644 --- a/fern/products/waves/pages/api-references/add-voice-api.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/add-voice-api.mdx @@ -1,7 +1,7 @@ --- title: "Add your Voice" description: "Add your voice using the Waves API." -openapi: "POST /api/v1/lightning-large/add_voice" +openapi: "POST /waves/v1/lightning-large/add_voice" hideApiMarker: False --- @@ -9,7 +9,7 @@ hideApiMarker: False ## Sample cURL Example ```bash -curl -X POST https://waves-api.smallest.ai/api/v1/lightning-large/add_voice \ +curl -X POST https://api.smallest.ai/waves/v1/lightning-large/add_voice \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "displayName=my voice" \ -F "file=@my_voice.wav;type=audio/wav" @@ -20,7 +20,7 @@ Here is a Python example using the `requests` library: ```python python import requests -url = "https://waves-api.smallest.ai/api/v1/lightning-large/add_voice" +url = "https://api.smallest.ai/waves/v1/lightning-large/add_voice" payload = {'displayName': 'my voice'} files=[ ('file', ('my_voice.wav', open('my_voice.wav','rb'), 'audio/wav')) diff --git a/fern/products/waves/pages/api-references/authentication.mdx b/fern/products/waves/pages/v2.2.0/api-references/authentication.mdx similarity index 82% rename from fern/products/waves/pages/api-references/authentication.mdx rename to fern/products/waves/pages/v2.2.0/api-references/authentication.mdx index 4b4bc89..e6f5e5c 100644 --- a/fern/products/waves/pages/api-references/authentication.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/authentication.mdx @@ -1,6 +1,7 @@ --- title: Authentication description: Learn how to authenticate requests using API keys. +icon: key --- # Authentication @@ -9,7 +10,7 @@ Our API requires authentication using API keys to ensure secure access. ## Obtaining Your API Key -To access our API, you need an API key. You can find your API key by visiting [Smallest AI Console](https://console.smallest.ai/apikeys). +To access our API, you need an API key. You can find your API key by visiting [Smallest AI Console](https://app.smallest.ai/dashboard/settings/apikeys). ## Using API Keys @@ -30,4 +31,4 @@ Authorization: Bearer YOUR_API_KEY_HERE - **Use environment variables**: Store API keys securely instead of hardcoding them in your source code. - **Monitor API usage**: Keep track of your API calls to detect any unauthorized access. -For more details, visit our [API Documentation](/docs/authentication). +For more details, visit our [API Documentation](/v2.2.0/content/getting-started/authentication). diff --git a/fern/products/waves/pages/api-references/delete-cloned-voice.mdx b/fern/products/waves/pages/v2.2.0/api-references/delete-cloned-voice.mdx similarity index 73% rename from fern/products/waves/pages/api-references/delete-cloned-voice.mdx rename to fern/products/waves/pages/v2.2.0/api-references/delete-cloned-voice.mdx index 29b3853..a9d01e8 100644 --- a/fern/products/waves/pages/api-references/delete-cloned-voice.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/delete-cloned-voice.mdx @@ -1,6 +1,6 @@ --- title: 'Delete Cloned Voice' description: 'Delete a cloned voice using the new Waves API.' -openapi: 'DELETE /api/v1/lightning-large' +openapi: 'DELETE /waves/v1/lightning-large' hideApiMarker: False --- diff --git a/fern/products/waves/pages/api-references/get-cloned-voices-api.mdx b/fern/products/waves/pages/v2.2.0/api-references/get-cloned-voices-api.mdx similarity index 63% rename from fern/products/waves/pages/api-references/get-cloned-voices-api.mdx rename to fern/products/waves/pages/v2.2.0/api-references/get-cloned-voices-api.mdx index 6658398..397899e 100644 --- a/fern/products/waves/pages/api-references/get-cloned-voices-api.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/get-cloned-voices-api.mdx @@ -1,6 +1,6 @@ --- title: "Get your cloned Voices" description: "Retrieve your cloned voices." -openapi: "GET /api/v1/lightning-large/get_cloned_voices" +openapi: "GET /waves/v1/lightning-large/get_cloned_voices" hideApiMarker: False --- \ No newline at end of file diff --git a/fern/products/waves/pages/api-references/get-voices-api.mdx b/fern/products/waves/pages/v2.2.0/api-references/get-voices-api.mdx similarity index 74% rename from fern/products/waves/pages/api-references/get-voices-api.mdx rename to fern/products/waves/pages/v2.2.0/api-references/get-voices-api.mdx index 3b25253..6491278 100644 --- a/fern/products/waves/pages/api-references/get-voices-api.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/get-voices-api.mdx @@ -1,6 +1,6 @@ --- title: "Get Voices" description: "Get voices supported for a given model using the new Waves API." -openapi: "GET /api/v1/{model}/get_voices" +openapi: "GET /waves/v1/{model}/get_voices" hideApiMarker: False --- \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/api-references/lighntning-tts.mdx b/fern/products/waves/pages/v2.2.0/api-references/lighntning-tts.mdx similarity index 75% rename from fern/products/waves/versions/v4.0.0/api-references/lighntning-tts.mdx rename to fern/products/waves/pages/v2.2.0/api-references/lighntning-tts.mdx index ca23c1e..34c162f 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lighntning-tts.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/lighntning-tts.mdx @@ -2,6 +2,6 @@ title: 'Text to speech' sidebarTitle: 'Text to speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning/get_speech' +openapi: 'POST /waves/v1/lightning/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/pages/api-references/lightning-large-stream.mdx b/fern/products/waves/pages/v2.2.0/api-references/lightning-large-stream.mdx similarity index 96% rename from fern/products/waves/pages/api-references/lightning-large-stream.mdx rename to fern/products/waves/pages/v2.2.0/api-references/lightning-large-stream.mdx index 9e98093..b8909cf 100644 --- a/fern/products/waves/pages/api-references/lightning-large-stream.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/lightning-large-stream.mdx @@ -2,7 +2,7 @@ title: 'Text to Speech' sidebarTitle: 'Text to Speech (SSE)' description: 'Stream speech for given text using the Lightning-Large SSE API' -openapi: 'POST /api/v1/lightning-large/stream' +openapi: 'POST /waves/v1/lightning-large/stream' --- ## Overview diff --git a/fern/products/waves/pages/v2.2.0/api-references/lightning-large-ws.mdx b/fern/products/waves/pages/v2.2.0/api-references/lightning-large-ws.mdx new file mode 100644 index 0000000..f86af89 --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/api-references/lightning-large-ws.mdx @@ -0,0 +1,203 @@ +--- +title: 'Text to Speech' +sidebarTitle: 'Text to Speech (WSS)' +description: 'Stream speech for given text using the Lightning-Large WebSocket API' +hideApiMarker: True +--- + +## Lightning-Large WebSocket API + +The Lightning-Large WebSocket API allows you to stream high-quality text-to-speech audio in real-time. This is particularly useful for applications requiring low-latency audio generation with superior voice quality. + +### Connection + +Connect to the WebSocket endpoint: + +```javascript +const socket = new WebSocket( + 'wss://api.smallest.ai/waves/v1/lightning-large/stream' +); +``` + +### Authentication + +Authentication is required. Include your API key in the connection headers: + +```javascript +const socket = new WebSocket( + 'wss://api.smallest.ai/waves/v1/lightning-large/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } +); +``` + +### Request Format + +Send a JSON message with the following structure: + +```json +{ + "text": "Your text to be converted to speech", + "voice_id": "voice_id_here", + "speed": 1, + "sample_rate": 24000 +} +``` + +#### Parameters + +| Parameter | Type | Required | Description | +| ------------- | ------- | -------- | ------------------------------------------------------------------------- | +| `text` | string | Yes | The text to convert to speech (max 1000 characters) | +| `voice_id` | string | Yes | ID of the voice to use | +| `speed` | number | No | Speech speed multiplier (default: 1) | +| `sample_rate` | number | No | Audio sample rate in Hz (default: 24000) | +| `consistency` | number | No | Consistency of the speech (default: 0.5) | +| `similarity` | number | No | Similarity to the reference audio (default: 0) | +| `enhancement` | boolean | No | Enhances speech quality at the cost of increased latency (default: false) | + +### Response Format + +The WebSocket will stream responses in the following formats: + +#### Chunk Response + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "chunk", + "data": { + "audio": "base64_encoded_audio_data" + } +} +``` + +The audio data is base64-encoded and can be decoded and played in the browser. + +#### Complete Response + +When all chunks have been sent: + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "complete", + "message": "All chunks sent", + "done": true +} +``` + +#### Error Response + +If an error occurs: + +```json +{ + "status": "error", + "message": "Error message", + "errors": [ + /* detailed error information */ + ] +} +``` + +### Javascript Example + +```javascript +const WebSocket = require('ws'); + +const ws = new WebSocket( + 'wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } +); + +ws.onopen = () => { + const request = { + text: 'Hello world! This is a test of the Lightning TTS WebSocket API.', + voice_id: 'blofeld', + speed: 1, + sample_rate: 24000 + }; + + ws.send(JSON.stringify(request)); +}; + +// Handle incoming audio chunks +ws.onmessage = (event) => { + const response = JSON.parse(event.data); + + if (response.status === 'chunk') { + // Decode and play audio + const audioData = Buffer.from(response.data.audio, 'base64'); + // Process audio data... + } else if (response.status === 'complete' && response.done) { + console.log('All audio chunks received'); + ws.close(); + } else if (response.status === 'error') { + console.error('Error:', response.message); + ws.close(); + } +}; + +ws.onerror = (error) => { + console.error('WebSocket error:', error); +}; + +ws.onclose = () => { + console.log('WebSocket connection closed'); +}; +``` + +### Python Example + +```python +import asyncio +import websockets +import json +import base64 + +API_KEY = "YOUR_API_KEY" +WS_URL = "wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream" + +async def text_to_speech(): + async with websockets.connect(WS_URL, extra_headers={"Authorization": f"Bearer {API_KEY}"}) as ws: + request = { + "text": "Hello world! This is a test of the Lightning-Large TTS WebSocket API.", + "voice_id": "erica", + "speed": 1, + "sample_rate": 24000 + } + + await ws.send(json.dumps(request)) + + while True: + response = await ws.recv() + response_data = json.loads(response) + + if response_data["status"] == "chunk": + audio_data = base64.b64decode(response_data["data"]["audio"]) + print("Received audio chunk") + elif response_data["status"] == "sucompleteccess" and response_data.get("done", False): + print("All audio chunks received") + break + elif response_data["status"] == "error": + print("Error:", response_data["message"]) + break + +asyncio.run(text_to_speech()) +``` + +### Notes + +- The Lightning-Large model provides higher quality audio than the standard Lightning model +- The API automatically chunks long text and streams each chunk separately +- Credits are deducted based on the length of the input text +- The WebSocket connection will remain open until all chunks are sent or an error occurs +- For optimal performance, keep individual requests under 1000 characters diff --git a/fern/products/waves/pages/v2.2.0/api-references/lightning-large.mdx b/fern/products/waves/pages/v2.2.0/api-references/lightning-large.mdx new file mode 100644 index 0000000..d4fc470 --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/api-references/lightning-large.mdx @@ -0,0 +1,6 @@ +--- +title: 'Text to Speech' +description: 'Get speech for given text using the Waves API' +openapi: 'POST /waves/v1/lightning-large/get_speech' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/api-references/lightning-tts-ws.mdx b/fern/products/waves/pages/v2.2.0/api-references/lightning-tts-ws.mdx similarity index 96% rename from fern/products/waves/pages/api-references/lightning-tts-ws.mdx rename to fern/products/waves/pages/v2.2.0/api-references/lightning-tts-ws.mdx index b5cf4f9..7cfc53e 100644 --- a/fern/products/waves/pages/api-references/lightning-tts-ws.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/lightning-tts-ws.mdx @@ -15,7 +15,7 @@ Connect to the SSE endpoint: ```javascript const eventSource = new EventSource( - 'https://api.smallest.ai/api/v1/lightning/stream', + 'https://api.smallest.ai/waves/v1/lightning/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -50,7 +50,7 @@ Send a POST request with the following JSON structure: ```javascript const eventSource = new EventSource( - 'https://api.smallest.ai/api/v1/lightning/stream', + 'https://api.smallest.ai/waves/v1/lightning/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' diff --git a/fern/products/waves/pages/v2.2.0/api-references/lightning-v2-stream.mdx b/fern/products/waves/pages/v2.2.0/api-references/lightning-v2-stream.mdx new file mode 100644 index 0000000..007f89d --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/api-references/lightning-v2-stream.mdx @@ -0,0 +1,25 @@ +--- +title: 'Text to Speech' +sidebarTitle: 'Text to Speech (SSE)' +description: 'Stream speech for given text using the Lightning v2 SSE API' +openapi: 'POST /waves/v1/lightning-v2/stream' +--- + +## Overview + +The Lightning v2 SSE API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. +For an end-to-end example of how to use the Lightning v2 SSE API, check out [Text to Speech (SSE) Example](https://github.com/smallest-inc/waves-examples/blob/main/lightning_v2/http_streaming/http_streaming_api.py) + +## When to Use + +- **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses +- **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays +- **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency +- **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + +## How It Works + +1. **Make a POST Request**: Send your text and voice settings to the API endpoint +2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size +3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially +4. **End of Stream**: The API sends a completion event when all audio has been delivered diff --git a/fern/products/waves/pages/v2.2.0/api-references/lightning-v2-ws.mdx b/fern/products/waves/pages/v2.2.0/api-references/lightning-v2-ws.mdx new file mode 100644 index 0000000..a035d69 --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/api-references/lightning-v2-ws.mdx @@ -0,0 +1,6 @@ +--- +title: 'Text to Speech (WebSocket)' +description: The Lightning v2 WebSocket API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. Perfect for interactive applications, voice assistants, and real-time communication systems that require immediate audio feedback. For an end-to-end example of how to use the Lightning v2 WebSocket API, check out [Text to Speech (WS) Example](https://github.com/smallest-inc/waves-examples/tree/main/lightning_v2/ws_streaming) +asyncapi: "/asyncapi-spec/lightning-v2-ws.json /waves/v1/lightning-v2/get_speech/stream" +--- + diff --git a/fern/products/waves/pages/api-references/lightning-large.mdx b/fern/products/waves/pages/v2.2.0/api-references/lightning-v2.mdx similarity index 69% rename from fern/products/waves/pages/api-references/lightning-large.mdx rename to fern/products/waves/pages/v2.2.0/api-references/lightning-v2.mdx index 69784e5..51853d1 100644 --- a/fern/products/waves/pages/api-references/lightning-large.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/lightning-v2.mdx @@ -1,6 +1,6 @@ --- title: 'Text to Speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning-large/get_speech' +openapi: 'POST /waves/v1/lightning-v2/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/pages/api-references/lightning.mdx b/fern/products/waves/pages/v2.2.0/api-references/lightning.mdx similarity index 75% rename from fern/products/waves/pages/api-references/lightning.mdx rename to fern/products/waves/pages/v2.2.0/api-references/lightning.mdx index ca23c1e..34c162f 100644 --- a/fern/products/waves/pages/api-references/lightning.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/lightning.mdx @@ -2,6 +2,6 @@ title: 'Text to speech' sidebarTitle: 'Text to speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning/get_speech' +openapi: 'POST /waves/v1/lightning/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/pages/api-references/websocket.mdx b/fern/products/waves/pages/v2.2.0/api-references/websocket.mdx similarity index 68% rename from fern/products/waves/pages/api-references/websocket.mdx rename to fern/products/waves/pages/v2.2.0/api-references/websocket.mdx index 7d9e893..716fdad 100644 --- a/fern/products/waves/pages/api-references/websocket.mdx +++ b/fern/products/waves/pages/v2.2.0/api-references/websocket.mdx @@ -2,12 +2,13 @@ title: WebSocket Support for TTS API sidebarTitle: WebSocket description: Learn about WebSocket support for our Text-to-Speech (TTS) API, how it works, and when to use it. +icon: arrow-down-wide-short --- -# WebSocket Support for TTS API - Our Text-to-Speech (TTS) API supports WebSocket communication, providing a real-time, low-latency streaming experience for applications that require instant speech synthesis. WebSockets allow continuous data exchange, making them ideal for use cases that demand uninterrupted audio generation. +--- + ## When to Use WebSockets ### 1. **Real-Time Streaming** @@ -22,6 +23,8 @@ For voice assistants, chatbots, and live transcription services, WebSockets ensu A persistent WebSocket connection reduces the need for repeated request-response cycles, significantly improving performance for applications requiring rapid audio generation. +--- + ## How It Works 1. **Establish a Connection**: The client opens a WebSocket connection to our TTS API. @@ -30,9 +33,30 @@ A persistent WebSocket connection reduces the need for repeated request-response 4. **Receive Audio Stream**: As each chunk is processed, it is sent back to the client as a base64-encoded audio buffer. 5. **Completion**: Once all chunks are processed, a complete message is sent to indicate the end of the stream. +--- + +## Timeout Behavior + +By default, the WebSocket connection enforces a **20-second inactivity timeout**. This means that if the client does not send any data within 20 seconds, the server will automatically close the connection to free up resources. + +To support longer sessions for use cases where clients need more time (e.g., long pauses between messages), the timeout can be extended up to **60 seconds**. + +### To extend the timeout: + +You can include the `timeout` parameter in the WebSocket URL like so: + +```json +wss://api.smallest.ai/waves/v1/lightning-v2/get_speech/stream?timeout=60 +``` + + +This sets the inactivity timeout to 60 seconds. Valid values range from **20 (default)** to **60 seconds**. + +--- + ## Implementation Details -The WebSocket TTS API is optimized to handle real-time text-to-speech conversions efficiently. Here are some key implementation aspects: +The WebSocket TTS API is optimized to handle real-time text-to-speech conversions efficiently. Key aspects include: - **Input Validation**: Ensures the provided text and voice ID are valid before processing. - **Chunk Processing**: Long texts are split into smaller chunks (e.g., 240 characters) to optimize processing. @@ -40,9 +64,12 @@ The WebSocket TTS API is optimized to handle real-time text-to-speech conversion - **Task Queue System**: Tasks are pushed to a Redis-based queue for efficient processing and real-time audio generation. - **Error Handling**: If any chunk fails, an error message is logged and sent to the client. +--- + ## Example Request Flow 1. The client sends a WebSocket message: + ```json { "text": "Hello, world!", @@ -50,27 +77,30 @@ The WebSocket TTS API is optimized to handle real-time text-to-speech conversion "speed": 1.0, "sample_rate": 24000 } - ``` + 2. The API validates the request and retrieves the voice settings. + 3. The text is split into chunks and processed in the background. -4. The client receives responses like: - ```json - { - "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", - "status": "chunk", - "data": { - "audio": "" - } - } - ``` -5. Once all chunks are sent, a final message is sent: - ```json - { - "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", - "status": "complete", - "message": "All chunks sent", - "done": true - } - ``` -For implementation details, check our [WebSocket API documentation](/docs/websockets). +4. The client receives responses like: +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "chunk", + "data": { + "audio": "" + } +} +``` + +5. Once all chunks are sent, a final message is returned: +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "comp", + "message": "All chunks sent", + "done": true +} +``` + +For implementation details, check our [WebSocket API documentation](/v3.0.1/content/api-references/lightning-v2-ws). diff --git a/fern/products/waves/pages/best-practices/pvc-best-practices.mdx b/fern/products/waves/pages/v2.2.0/best-practices/pvc-best-practices.mdx similarity index 94% rename from fern/products/waves/pages/best-practices/pvc-best-practices.mdx rename to fern/products/waves/pages/v2.2.0/best-practices/pvc-best-practices.mdx index 3d7b8c4..39dcc20 100644 --- a/fern/products/waves/pages/best-practices/pvc-best-practices.mdx +++ b/fern/products/waves/pages/v2.2.0/best-practices/pvc-best-practices.mdx @@ -6,7 +6,7 @@ icon: "user-tie" To get the most accurate and natural voice clone, it's essential to provide **high-quality reference audio**. The best practices for recording remain the same as those for **Instant Voice Cloning**, which you can find here: -🔗 **[Instant Voice Cloning - Best Practices](/content/best-practices/vc-best-practices)** +🔗 **[Instant Voice Cloning - Best Practices](/v2.2.0/content/best-practices/vc-best-practices)** However, **Professional Voice Cloning (PVC) significantly improves upon Instant Voice Cloning** in the following ways: diff --git a/fern/products/waves/pages/best-practices/tts-best-practices.mdx b/fern/products/waves/pages/v2.2.0/best-practices/tts-best-practices.mdx similarity index 100% rename from fern/products/waves/pages/best-practices/tts-best-practices.mdx rename to fern/products/waves/pages/v2.2.0/best-practices/tts-best-practices.mdx diff --git a/fern/products/waves/pages/best-practices/vc-best-practices.mdx b/fern/products/waves/pages/v2.2.0/best-practices/vc-best-practices.mdx similarity index 97% rename from fern/products/waves/pages/best-practices/vc-best-practices.mdx rename to fern/products/waves/pages/v2.2.0/best-practices/vc-best-practices.mdx index 000f86d..749e283 100644 --- a/fern/products/waves/pages/best-practices/vc-best-practices.mdx +++ b/fern/products/waves/pages/v2.2.0/best-practices/vc-best-practices.mdx @@ -6,7 +6,7 @@ icon: "clone" To achieve the best results when cloning your voice, it's essential to provide high-quality reference audio. Below are some best practices, dos and don'ts, and examples to guide you. -Ready to Clone Your Voice? Try it out on our platform [waves.smallest.ai](https://waves.smallest.ai/studio/create) +Ready to Clone Your Voice? Try it out on our platform [platform](https://app.smallest.ai/waves/studio/create) --- diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/angry_gen_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/angry_gen_t.mp4 new file mode 100644 index 0000000..0b3628e Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/angry_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/angry_ref_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/angry_ref_t.mp4 new file mode 100644 index 0000000..aca4571 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/angry_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/bg_ref_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/bg_ref_t.mp4 new file mode 100644 index 0000000..7d1d723 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/bg_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/fast_gen_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/fast_gen_t.mp4 new file mode 100644 index 0000000..9c66deb Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/fast_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/fast_ref_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/fast_ref_t.mp4 new file mode 100644 index 0000000..1be0d9c Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/fast_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/good_ref_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/good_ref_t.mp4 new file mode 100644 index 0000000..8b16dec Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/good_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/inconsistent_ref_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/inconsistent_ref_t.mp4 new file mode 100644 index 0000000..dec6109 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/inconsistent_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/overlap_ref_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/overlap_ref_t.mp4 new file mode 100644 index 0000000..ebc3285 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/overlap_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/whisper_gen_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/whisper_gen_t.mp4 new file mode 100644 index 0000000..2c6188a Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/whisper_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v2.2.0/best-practices/video/whisper_ref_t.mp4 b/fern/products/waves/pages/v2.2.0/best-practices/video/whisper_ref_t.mp4 new file mode 100644 index 0000000..6a2edeb Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/best-practices/video/whisper_ref_t.mp4 differ diff --git a/fern/products/waves/pages/changelog/announcements.mdx b/fern/products/waves/pages/v2.2.0/changelog/announcements.mdx similarity index 64% rename from fern/products/waves/pages/changelog/announcements.mdx rename to fern/products/waves/pages/v2.2.0/changelog/announcements.mdx index a5c5807..333f8ed 100644 --- a/fern/products/waves/pages/changelog/announcements.mdx +++ b/fern/products/waves/pages/v2.2.0/changelog/announcements.mdx @@ -4,6 +4,18 @@ description: "New updates and improvements from Smallest AI." mode: "center" --- + + ## Introducing Lightning v2 + + We are thrilled to announce the release of our Lightning v2 model. This model supports 16 languages, providing high-quality speech synthesis across multiple languages. Key features include: + + - **Multilingual Support**: High-quality speech synthesis in 16 languages with voice cloning. + - **100ms TTFB**: Superfast and scalable to support your realtime applications. + - **0.05 per 10K characters**: 3x cheaper than other providers. + + Experience the new capabilities of Lightning v2 on our [Platform](https://app.smallest.ai/waves/studio/create). + + ## Introducing Lightning Multilingual - Now in Beta @@ -13,7 +25,7 @@ mode: "center" - **Versatile Applications**: Ideal for global applications requiring diverse language support. - **Beta Stage**: Currently in beta, with ongoing improvements and updates. - Experience the new capabilities of Lightning Multilingual on our [Platform](https://waves.smallest.ai). + Experience the new capabilities of Lightning Multilingual on our [Platform](https://app.smallest.ai/waves/studio/create). @@ -23,17 +35,17 @@ mode: "center" - **Enhanced Voice Cloning**: More accurate and lifelike voice clones. - **Best-in-Market Latency**: Instant voice cloning with a latency of under 300ms. - Experience the new capabilities of Lightning Large via our [Platform](https://waves.smallest.ai) or the [API](/content/api-references/waves-api). + Experience the new capabilities of Lightning Large via our [Platform](https://app.smallest.ai/waves/studio/create) or the [API](/v2.2.0/content/api-references/lightning). ## Introducing Waves - [Waves](https://waves.smallest.ai) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. + [Waves](https://app.smallest.ai/waves/studio/create) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. However, waves will also be supporting various features such as voice cloning, accent cloning, style transfer, speech to speech etc in the near future, making it a ubiquitous platform for anyone who needs AI generated speech. - The voices available on waves platform are available via the [Waves API](/content/api-references/waves-api) + The voices available on waves platform are available via the [Waves API](/v2.2.0/content/api-references/lightning) @@ -42,7 +54,7 @@ mode: "center" Read more about lightning in our release post [here](https://smallest.ai/blog/lightning-fast-text-to-speech). - You can access lightning via the [Waves API](/content/api-references/waves-api) + You can access lightning via the [Waves API](/v2.2.0/content/api-references/lightning) *A lot more coming up, very soon* \ No newline at end of file diff --git a/fern/products/waves/pages/client-libraries/overview.mdx b/fern/products/waves/pages/v2.2.0/client-libraries/overview.mdx similarity index 100% rename from fern/products/waves/pages/client-libraries/overview.mdx rename to fern/products/waves/pages/v2.2.0/client-libraries/overview.mdx diff --git a/fern/products/waves/pages/getting-started/authentication.mdx b/fern/products/waves/pages/v2.2.0/getting-started/authentication.mdx similarity index 95% rename from fern/products/waves/pages/getting-started/authentication.mdx rename to fern/products/waves/pages/v2.2.0/getting-started/authentication.mdx index a12a7a1..15413b5 100644 --- a/fern/products/waves/pages/getting-started/authentication.mdx +++ b/fern/products/waves/pages/v2.2.0/getting-started/authentication.mdx @@ -25,7 +25,7 @@ Authorization: Bearer YOUR_API_KEY Test the API with this curl command by replacing `YOUR_API_KEY` with your actual key: ```bash -curl 'https://waves-api.smallest.ai/api/v1/lightning/get_voices' \ +curl 'https://api.smallest.ai/waves/v1/lightning/get_voices' \ -H 'Content-Type: application/json' \ -H 'Authorization: Bearer YOUR_API_KEY' ``` diff --git a/fern/products/waves/pages/v2.2.0/getting-started/http-stream.mdx b/fern/products/waves/pages/v2.2.0/getting-started/http-stream.mdx new file mode 100644 index 0000000..72fcd86 --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/getting-started/http-stream.mdx @@ -0,0 +1,70 @@ +--- +title: "HTTP vs HTTP Streaming vs Websockets" +description: "What should you use?" +icon: "handshake-angle" +--- + +--- + +### Choosing the Right Protocol for Your TTS Application: HTTP, HTTP Streaming, or WebSocket? + +If you’re integrating Waves TTS into your application, one important decision is how to connect to the TTS engine. We support three protocols: HTTP, HTTP Streaming, and WebSocket, each tailored to different use cases. In this post, we’ll break down the strengths of each and help you choose the best fit for your needs. + +## HTTP: Best for Simplicity and Short Requests +**What it is**: +A classic REST-style interaction. You send a complete request (e.g., the full text to be converted to speech), and receive the synthesized audio as a downloadable response. + +**When to use it**: +- You have short or moderate-length texts. +- You want a simple integration, such as from a browser, mobile app, or backend job. +- You don’t need real-time feedback or streaming audio. + +**Pros and Cons**: + +| Pros | Cons | +|-----------------------------------------------|--------------------------------------------------------| +| Simple to integrate with standard HTTP tools | Full audio is returned only after complete synthesis | +| Easy to debug and monitor | Not suitable for real-time or long-form audio | +| Stateless; good for serverless environments | Reconnect needed for each request | +| Works well with caching and CDNs | Higher latency compared to streaming methods | + + +## HTTP Streaming: Best for Faster Playback Without Complexity + +**What it is**: +An enhancement of standard HTTP. The client sends a complete request, but the server streams back the audio as it's being generated, no need to wait for the full file. + +**When to use it**: +- You want faster playback with lower perceived latency. +- You send full input text but need audio to start as soon as possible. +- You want low-latency audio delivery without handling connection persistence. + +**Pros and Cons**: + +| Pros | Cons | +|------------------------------------------------|--------------------------------------------------------| +| Lower latency than regular HTTP | Only one-way communication (client → server) | +| Compatible with standard HTTP infrastructure | Full input must still be sent before synthesis starts | +| Audio starts playing as it's generated | No partial or live input updates | +| Easy to adopt with minimal changes | Slightly more complex than basic HTTP | + + +## WebSocket: Best for Real-Time, Interactive Applications + +**What it is**: +A full-duplex, persistent connection that allows two-way communication between the client and server. You can send text dynamically and receive streaming audio back continuously. + +**When to use it**: +- You need real-time, interactive TTS responses. +- Input is dynamic or arrives in chunks (e.g., live typing, conversation). +- You want persistent connections with minimal overhead per message. + +**Pros and Cons**: + +| Pros | Cons | +|----------------------------------------------------|---------------------------------------------------------| +| Ultra low latency | More complex to implement and manage | +| Supports real-time, chunked input and responses | Requires persistent connection management | +| Bi-directional communication | Not ideal for simple or infrequent tasks | +| Great for chatbots, live agents, or dictation apps | May require additional libraries or WebSocket support | + diff --git a/fern/products/waves/pages/v2.2.0/getting-started/models.mdx b/fern/products/waves/pages/v2.2.0/getting-started/models.mdx new file mode 100644 index 0000000..deeeb0a --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/getting-started/models.mdx @@ -0,0 +1,42 @@ +--- +title: "Models" +description: "Find detailed description of each model along with their capabilities and supported languages." +icon: "cube" +--- + + + + Our fastest model, optimized for low-latency applications. It can generate 10 seconds of audio in just 100 milliseconds, making it ideal for real-time applications such as voicebots and interactive systems. + + + An upgrade from the Lightning Large model, offering improved performance and quality. It supports 16 languages, making it suitable for a wider range of applications requiring expressive and high-quality speech synthesis. + + + Offers more emotional depth and expressiveness compared to the Lightning model. It supports voice cloning and has a latency of just under 300 milliseconds, making it suitable for applications requiring high-quality, expressive speech. + + + +## Geo-location Based Routing +Waves intelligently routes every request to the nearest server cluster to ensure the lowest possible latency for your applications. We currently operate server clusters in: +- 🇮🇳 India (Mumbai) +- 🇺🇸 USA (Oregon) + +Our routing system automatically detects the client's geographical location and connects them to the optimal server based on network proximity and latency. This process is fully automated, no manual configuration is required on your side. + + +## Model Overview + +| Model ID | Description | Languages Supported | +|-----------------------|-----------------------------------------------------------------------------|---------------------| +| **lightning** | Fastest model with an RTF of 0.01, generating 10 seconds of audio in 100 ms. | `English`
`Hindi` | +| **lightning-large** | More emotional depth and expressiveness, supports voice cloning, latency under 300 ms. | `English`
`Hindi` | +| **lightning-v2** | 100ms TTFB, Supports 16 languages with voice cloning. | `English`
`Hindi`
`Tamil`
`Kannada`
`Gujarati`
`Bengali`
`Marathi`
`German`
`French`
`Spanish`
`Italian`
`Polish`
`Dutch`
`Russian`
`Arabic`
`Hebrew` | + + + +Note: The API uses [ISO 639-1 language codes - Set 1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) (2-letter codes) to specify supported languages. + + +## Pricing + +Our pricing model is designed to be flexible and scalable, catering to different usage needs. For detailed pricing information, please visit our [pricing page](https://smallest.ai/text-to-speech) or contact our sales team at [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/pages/getting-started/quickstart.mdx b/fern/products/waves/pages/v2.2.0/getting-started/quickstart.mdx similarity index 92% rename from fern/products/waves/pages/getting-started/quickstart.mdx rename to fern/products/waves/pages/v2.2.0/getting-started/quickstart.mdx index 1619c94..96c37bd 100644 --- a/fern/products/waves/pages/getting-started/quickstart.mdx +++ b/fern/products/waves/pages/v2.2.0/getting-started/quickstart.mdx @@ -6,7 +6,7 @@ icon: "rocket" ## Step 1: Sign Up & get the API Key -1. Visit [waves.smallest.ai](https://waves.smallest.ai/) and sign up for an account or log in if you already have an account. +1. Visit [platform](https://app.smallest.ai/waves/studio/create) and sign up for an account or log in if you already have an account. 2. Navigate to `API Key` tab in your account dashboard. 3. Create a new API Key and copy it. 4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication. @@ -20,7 +20,7 @@ pip install smallestai When using an SDK in your application, make sure to pin to at least the major version (e.g., ==1.*). This helps ensure your application remains stable and avoids potential issues from breaking changes in future updates. ## Step 3: Make Your First API Call -Here is a basic example of how to use the Python SDK to convert text to speech: +Here is a basic example of how to use the Python SDK to convert text to speech: If you are using a `voice_id` corresponding to a voice clone, you should explicitly set the `model` parameter to `"lightning-large"` in the `Smallest` client or payload. diff --git a/fern/products/waves/pages/integrations/livekit.mdx b/fern/products/waves/pages/v2.2.0/integrations/livekit.mdx similarity index 100% rename from fern/products/waves/pages/integrations/livekit.mdx rename to fern/products/waves/pages/v2.2.0/integrations/livekit.mdx diff --git a/fern/products/waves/pages/integrations/plivo.mdx b/fern/products/waves/pages/v2.2.0/integrations/plivo.mdx similarity index 100% rename from fern/products/waves/pages/integrations/plivo.mdx rename to fern/products/waves/pages/v2.2.0/integrations/plivo.mdx diff --git a/fern/products/waves/pages/integrations/vonage.mdx b/fern/products/waves/pages/v2.2.0/integrations/vonage.mdx similarity index 100% rename from fern/products/waves/pages/integrations/vonage.mdx rename to fern/products/waves/pages/v2.2.0/integrations/vonage.mdx diff --git a/fern/products/waves/pages/introduction/introduction.mdx b/fern/products/waves/pages/v2.2.0/introduction/introduction.mdx similarity index 77% rename from fern/products/waves/pages/introduction/introduction.mdx rename to fern/products/waves/pages/v2.2.0/introduction/introduction.mdx index 8f93570..7f779bf 100644 --- a/fern/products/waves/pages/introduction/introduction.mdx +++ b/fern/products/waves/pages/v2.2.0/introduction/introduction.mdx @@ -6,7 +6,7 @@ icon: "globe" ## About Waves -Welcome to [Waves](https://waves.smallest.ai/), the cutting-edge platform designed to deliver real-time, hyper-realistic text-to-speech solutions. Our platform leverages advanced AI models to provide seamless and natural-sounding speech synthesis, catering to a wide range of applications including voicebots, interactive systems, and more. +Welcome to [Waves](https://app.smallest.ai/waves/studio/create), the cutting-edge platform designed to deliver real-time, hyper-realistic text-to-speech solutions. Our platform leverages advanced AI models to provide seamless and natural-sounding speech synthesis, catering to a wide range of applications including voicebots, interactive systems, and more. ### Key Features @@ -16,7 +16,7 @@ Welcome to [Waves](https://waves.smallest.ai/), the cutting-edge platform design ### Stay Updated -We are committed to continuous improvement and innovation. All new releases and updates are announced [here](/content/changelog/announcements). +We are committed to continuous improvement and innovation. All new releases and updates are announced [here](/v2.2.0/content/changelog/announcements). ### Get in Touch diff --git a/fern/products/waves/pages/product/projects.mdx b/fern/products/waves/pages/v2.2.0/product/projects.mdx similarity index 98% rename from fern/products/waves/pages/product/projects.mdx rename to fern/products/waves/pages/v2.2.0/product/projects.mdx index 1d603fe..752a469 100644 --- a/fern/products/waves/pages/product/projects.mdx +++ b/fern/products/waves/pages/v2.2.0/product/projects.mdx @@ -112,7 +112,7 @@ Welcome to the official documentation for our text-to-speech (TTS) project. Our ### Installation & Setup -1. Register for an account and +1. Register for an account and log into the platform. 2. Create a new project or open an existing one. 3. Add or paste your text content to the project. diff --git a/fern/products/waves/pages/v2.2.0/text-to-speech/get-voice-models-langs.mdx b/fern/products/waves/pages/v2.2.0/text-to-speech/get-voice-models-langs.mdx new file mode 100644 index 0000000..da9f543 --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/text-to-speech/get-voice-models-langs.mdx @@ -0,0 +1,74 @@ +--- +title: "Get available Voices, Models and Languages" +description: "Learn how to retrieve available voices, models, and languages." +icon: "toolbox" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to fetch the available languages, models, and voices. By the end of this guide, you'll be able to retrieve and display this information using the Smallest AI SDK. + +## Requirements + +Before you begin, ensure you have the following: + +- Python (3.9 or higher) installed on your machine. +- An API key from the Smallest AI platform (sign up [here](https://app.smallest.ai/waves/studio/create)). +- The Smallest AI Python SDK installed. If you haven't installed it yet, follow the instructions below: + +### Install the SDK +```bash +pip install smallestai +``` + +Set your API key as an environment variable: +```bash +export SMALLEST_API_KEY=YOUR_API_KEY +``` + +## Fetch Available Voices, Models, and Languages + +The Smallest AI SDK allows you to query the available languages, voices, and models for your TTS needs. Here's how you can do it: + +If you are using a `voice_id` corresponding to a voice clone, you should explicitly set the `model` parameter to `"lightning-large"` in the `Smallest` client or payload. + + + +```python python +from smallest import Smallest + +def main(): + client = Smallest(api_key="YOUR_API_KEY") + + # Get available languages + languages = client.get_languages() + print(f"Available Languages: {languages}") + + # Get available voices for the "lightning" model, alternatively `lightning-large` + voices = client.get_voices(model="lightning") + print(f"Available Voices (Model: 'lightning'): {voices}") + + # Get user-specific cloned voices + cloned_voices = client.get_cloned_voices() + print(f"Available Cloned Voices: {cloned_voices}") + + # Get available models + models = client.get_models() + print(f"Available Models: {models}") + +if __name__ == "__main__": + main() +``` + + +## Explanation of Functions + +- `get_languages()`: Retrieves the list of supported languages for Text-to-Speech. +- `get_voices(model="model_name")`: Retrieves the voices available for a specific model (e.g., "lightning"). +- `get_cloned_voices()`: Fetches all user-specific cloned voices. +- `get_models()`: Retrieves the TTS models on the platform available through API. + + +## Need Help? + +If you have any questions or encounter issues, our community is here to help! +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Contact us via email: [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/pages/text-to-speech/how-to-tts.mdx b/fern/products/waves/pages/v2.2.0/text-to-speech/how-to-tts.mdx similarity index 99% rename from fern/products/waves/pages/text-to-speech/how-to-tts.mdx rename to fern/products/waves/pages/v2.2.0/text-to-speech/how-to-tts.mdx index 23a4c7b..9cd33fa 100644 --- a/fern/products/waves/pages/text-to-speech/how-to-tts.mdx +++ b/fern/products/waves/pages/v2.2.0/text-to-speech/how-to-tts.mdx @@ -13,7 +13,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://app.smallest.ai/waves/studio/create)). ## Setup diff --git a/fern/products/waves/pages/text-to-speech/llm-to-tts.mdx b/fern/products/waves/pages/v2.2.0/text-to-speech/llm-to-tts.mdx similarity index 100% rename from fern/products/waves/pages/text-to-speech/llm-to-tts.mdx rename to fern/products/waves/pages/v2.2.0/text-to-speech/llm-to-tts.mdx diff --git a/fern/products/waves/pages/voice-cloning/how-to-delete-vc.mdx b/fern/products/waves/pages/v2.2.0/voice-cloning/how-to-delete-vc.mdx similarity index 97% rename from fern/products/waves/pages/voice-cloning/how-to-delete-vc.mdx rename to fern/products/waves/pages/v2.2.0/voice-cloning/how-to-delete-vc.mdx index 4bedc84..dc7e5ce 100644 --- a/fern/products/waves/pages/voice-cloning/how-to-delete-vc.mdx +++ b/fern/products/waves/pages/v2.2.0/voice-cloning/how-to-delete-vc.mdx @@ -12,7 +12,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://app.smallest.ai/waves/studio/create)). ## Setup diff --git a/fern/products/waves/pages/voice-cloning/how-to-pvc.mdx b/fern/products/waves/pages/v2.2.0/voice-cloning/how-to-pvc.mdx similarity index 87% rename from fern/products/waves/pages/voice-cloning/how-to-pvc.mdx rename to fern/products/waves/pages/v2.2.0/voice-cloning/how-to-pvc.mdx index 20c3a3c..87108b2 100644 --- a/fern/products/waves/pages/voice-cloning/how-to-pvc.mdx +++ b/fern/products/waves/pages/v2.2.0/voice-cloning/how-to-pvc.mdx @@ -9,7 +9,7 @@ In this tutorial, you will learn how to use the Smallest AI platform to create a # Creating a Professional Voice Clone 1. **Go to the Smallest AI Platform** - Navigate to [smallest.ai](https://waves.smallest.ai/voice-clone) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: + Navigate to [smallest.ai](https://app.smallest.ai/waves/voice-cloning) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: ![Voice Clone Setup](../../images/pvc_page.png) diff --git a/fern/products/waves/pages/voice-cloning/how-to-vc-ui.mdx b/fern/products/waves/pages/v2.2.0/voice-cloning/how-to-vc-ui.mdx similarity index 86% rename from fern/products/waves/pages/voice-cloning/how-to-vc-ui.mdx rename to fern/products/waves/pages/v2.2.0/voice-cloning/how-to-vc-ui.mdx index 1e97f5b..51cc4be 100644 --- a/fern/products/waves/pages/voice-cloning/how-to-vc-ui.mdx +++ b/fern/products/waves/pages/v2.2.0/voice-cloning/how-to-vc-ui.mdx @@ -8,7 +8,7 @@ In this tutorial, you will learn how to use the **Smallest AI** platform to crea # Creating an Instant Voice Clone 1️. **Go to the Smallest AI Platform** - - Navigate to **[smallest.ai](https://waves.smallest.ai/voice-clone)** and click on **Create New**. + - Navigate to **[smallest.ai](https://app.smallest.ai/waves/voice-cloning)** and click on **Create New**. - In the modal that appears, select **Instant Voice Clone**. ![Voice Clone Setup](../../images/ivc-image-1.png) @@ -16,7 +16,7 @@ In this tutorial, you will learn how to use the **Smallest AI** platform to crea 2️. **Upload Your Clean Reference Audio** - Select a **short, high-quality** audio clip (5-15 seconds). - Ensure the recording is **clear and noise-free** for the best results. - - Follow the recommended **[best practices](/content/best-practices/vc-best-practices)** to maximize quality. + - Follow the recommended **[best practices](/v2.2.0/content/best-practices/vc-best-practices)** to maximize quality. ![Upload your clean reference audio](../../images/ivc-image-2.png) diff --git a/fern/products/waves/pages/voice-cloning/how-to-vc.mdx b/fern/products/waves/pages/v2.2.0/voice-cloning/how-to-vc.mdx similarity index 98% rename from fern/products/waves/pages/voice-cloning/how-to-vc.mdx rename to fern/products/waves/pages/v2.2.0/voice-cloning/how-to-vc.mdx index 534c085..cb1a1d6 100644 --- a/fern/products/waves/pages/voice-cloning/how-to-vc.mdx +++ b/fern/products/waves/pages/v2.2.0/voice-cloning/how-to-vc.mdx @@ -12,7 +12,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://app.smallest.ai/waves/studio/create)). ## Setup diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/agent-dashboard-conversions.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/agent-dashboard-conversions.png new file mode 100644 index 0000000..edf540d Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/agent-dashboard-conversions.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/agent-dashboard.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/agent-dashboard.png new file mode 100644 index 0000000..888291a Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/agent-dashboard.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/checks-passed.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/checks-passed.png new file mode 100644 index 0000000..3303c77 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/checks-passed.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/conversions-list.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/conversions-list.png new file mode 100644 index 0000000..aa46a94 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/conversions-list.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-audience.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-audience.png new file mode 100644 index 0000000..e1e062d Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-audience.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-campaign.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-campaign.png new file mode 100644 index 0000000..a7efb9d Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-campaign.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-conversion.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-conversion.png new file mode 100644 index 0000000..2e95316 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/create-conversion.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/download.svg b/fern/products/waves/pages/v2.2.0/voice-cloning/images/download.svg new file mode 100644 index 0000000..f41f94e --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/voice-cloning/images/download.svg @@ -0,0 +1,3 @@ + + + diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/hero-dark.svg b/fern/products/waves/pages/v2.2.0/voice-cloning/images/hero-dark.svg new file mode 100644 index 0000000..c6a30e8 --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/voice-cloning/images/hero-dark.svg @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/hero-light.svg b/fern/products/waves/pages/v2.2.0/voice-cloning/images/hero-light.svg new file mode 100644 index 0000000..297d68f --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/voice-cloning/images/hero-light.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-1.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-1.png new file mode 100644 index 0000000..5c18503 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-1.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-2.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-2.png new file mode 100644 index 0000000..b4d3f32 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-2.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-3.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-3.png new file mode 100644 index 0000000..0cf238e Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-3.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-4.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-4.png new file mode 100644 index 0000000..5ecf5d6 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/ivc-image-4.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/lightning_cover.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/lightning_cover.png new file mode 100644 index 0000000..acaebb8 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/lightning_cover.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/pvc_page.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/pvc_page.png new file mode 100644 index 0000000..8bf1c5e Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/pvc_page.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/save-campaign.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/save-campaign.png new file mode 100644 index 0000000..972b0df Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/save-campaign.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/smallest_cover.jpeg b/fern/products/waves/pages/v2.2.0/voice-cloning/images/smallest_cover.jpeg new file mode 100644 index 0000000..2f584f4 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/smallest_cover.jpeg differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/started-campaign.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/started-campaign.png new file mode 100644 index 0000000..0c1857a Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/started-campaign.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/test-agent.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/test-agent.png new file mode 100644 index 0000000..f90fbec Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/test-agent.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/thunder.png b/fern/products/waves/pages/v2.2.0/voice-cloning/images/thunder.png new file mode 100644 index 0000000..2a64ee9 Binary files /dev/null and b/fern/products/waves/pages/v2.2.0/voice-cloning/images/thunder.png differ diff --git a/fern/products/waves/pages/v2.2.0/voice-cloning/images/thunder.svg b/fern/products/waves/pages/v2.2.0/voice-cloning/images/thunder.svg new file mode 100644 index 0000000..35f6a0e --- /dev/null +++ b/fern/products/waves/pages/v2.2.0/voice-cloning/images/thunder.svg @@ -0,0 +1,26 @@ + + + + + + + + + + image/svg+xml + + + + + Openclipart + + + + + + + + + + + diff --git a/fern/products/waves/pages/voice-cloning/types-of-clone.mdx b/fern/products/waves/pages/v2.2.0/voice-cloning/types-of-clone.mdx similarity index 100% rename from fern/products/waves/pages/voice-cloning/types-of-clone.mdx rename to fern/products/waves/pages/v2.2.0/voice-cloning/types-of-clone.mdx diff --git a/fern/products/waves/pages/v3.0.1/api-references/add-voice-api.mdx b/fern/products/waves/pages/v3.0.1/api-references/add-voice-api.mdx new file mode 100644 index 0000000..b1911c3 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/add-voice-api.mdx @@ -0,0 +1,37 @@ +--- +title: "Add your Voice" +description: "Add your voice using the Waves API." +openapi: "POST /waves/v1/lightning-large/add_voice" +hideApiMarker: False +--- + +> **Note:** The Mintlify web UI currently does not correctly upload files in the API request. Below, we have provided code examples in Python and curl to help you test the API. + +## Sample cURL Example +```bash +curl -X POST https://api.smallest.ai/waves/v1/lightning-large/add_voice \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -F "displayName=my voice" \ + -F "file=@my_voice.wav;type=audio/wav" +``` +## Sample Code Example + +Here is a Python example using the `requests` library: + +```python python +import requests +url = "https://api.smallest.ai/waves/v1/lightning-large/add_voice" +payload = {'displayName': 'my voice'} +files=[ + ('file', ('my_voice.wav', open('my_voice.wav','rb'), 'audio/wav')) +] +headers = { + 'Authorization': 'Bearer YOUR_API_KEY' +} +response = requests.request("POST", url, headers=headers, data=payload, files=files) +print(response.text) +``` + + +Replace `YOUR_API_KEY` with your actual API key and `example.wav` with the path to your audio file. + diff --git a/fern/products/waves/pages/v3.0.1/api-references/authentication.mdx b/fern/products/waves/pages/v3.0.1/api-references/authentication.mdx new file mode 100644 index 0000000..531aa54 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/authentication.mdx @@ -0,0 +1,34 @@ +--- +title: Authentication +description: Learn how to authenticate requests using API keys. +icon: key +--- + +# Authentication + +Our API requires authentication using API keys to ensure secure access. + +## Obtaining Your API Key + +To access our API, you need an API key. You can find your API key by visiting [Smallest AI Console](https://app.smallest.ai/dashboard/settings/apikeys). + +## Using API Keys + +Once you have an API key, you must include it in the `Authorization` header of each request using the **Bearer Token** scheme. + +### Example Request + +```http +GET /v1/some-endpoint HTTP/1.1 +Host: api.smallest.ai +Authorization: Bearer YOUR_API_KEY_HERE +``` + +## Security Best Practices + +- **Keep your API key private**: Do not expose your key in client-side applications. +- **Rotate keys periodically**: Regularly regenerate your API keys to enhance security. +- **Use environment variables**: Store API keys securely instead of hardcoding them in your source code. +- **Monitor API usage**: Keep track of your API calls to detect any unauthorized access. + +For more details, visit our [API Documentation](/v3.0.1/content/getting-started/authentication). diff --git a/fern/products/waves/pages/v3.0.1/api-references/delete-cloned-voice.mdx b/fern/products/waves/pages/v3.0.1/api-references/delete-cloned-voice.mdx new file mode 100644 index 0000000..a9d01e8 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/delete-cloned-voice.mdx @@ -0,0 +1,6 @@ +--- +title: 'Delete Cloned Voice' +description: 'Delete a cloned voice using the new Waves API.' +openapi: 'DELETE /waves/v1/lightning-large' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v3.0.1/api-references/get-cloned-voices-api.mdx b/fern/products/waves/pages/v3.0.1/api-references/get-cloned-voices-api.mdx new file mode 100644 index 0000000..397899e --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/get-cloned-voices-api.mdx @@ -0,0 +1,6 @@ +--- +title: "Get your cloned Voices" +description: "Retrieve your cloned voices." +openapi: "GET /waves/v1/lightning-large/get_cloned_voices" +hideApiMarker: False +--- \ No newline at end of file diff --git a/fern/products/waves/pages/v3.0.1/api-references/get-voices-api.mdx b/fern/products/waves/pages/v3.0.1/api-references/get-voices-api.mdx new file mode 100644 index 0000000..6491278 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/get-voices-api.mdx @@ -0,0 +1,6 @@ +--- +title: "Get Voices" +description: "Get voices supported for a given model using the new Waves API." +openapi: "GET /waves/v1/{model}/get_voices" +hideApiMarker: False +--- \ No newline at end of file diff --git a/fern/products/waves/pages/api-references/lighntning-tts.mdx b/fern/products/waves/pages/v3.0.1/api-references/lighntning-tts.mdx similarity index 75% rename from fern/products/waves/pages/api-references/lighntning-tts.mdx rename to fern/products/waves/pages/v3.0.1/api-references/lighntning-tts.mdx index ca23c1e..34c162f 100644 --- a/fern/products/waves/pages/api-references/lighntning-tts.mdx +++ b/fern/products/waves/pages/v3.0.1/api-references/lighntning-tts.mdx @@ -2,6 +2,6 @@ title: 'Text to speech' sidebarTitle: 'Text to speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning/get_speech' +openapi: 'POST /waves/v1/lightning/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/pages/v3.0.1/api-references/lightning-large-stream.mdx b/fern/products/waves/pages/v3.0.1/api-references/lightning-large-stream.mdx new file mode 100644 index 0000000..e314d6e --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/lightning-large-stream.mdx @@ -0,0 +1,25 @@ +--- +title: 'Text to Speech' +sidebarTitle: 'Text to Speech (SSE)' +description: 'Stream speech for given text using the Lightning-Large SSE API' +openapi: 'POST /waves/v1/lightning-large/stream' +--- + +## Overview + +The Lightning-Large SSE API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. +For an end-to-end example of how to use the Lightning-Large SSE API, check out [Text to Speech (SSE) Example](https://github.com/smallest-inc/waves-examples/blob/main/lightning_large/http_streaming/http_streaming_api.py) + +## When to Use + +- **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses +- **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays +- **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency +- **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + +## How It Works + +1. **Make a POST Request**: Send your text and voice settings to the API endpoint +2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size +3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially +4. **End of Stream**: The API sends a completion event when all audio has been delivered diff --git a/fern/products/waves/pages/api-references/lightning-large-ws.mdx b/fern/products/waves/pages/v3.0.1/api-references/lightning-large-ws.mdx similarity index 94% rename from fern/products/waves/pages/api-references/lightning-large-ws.mdx rename to fern/products/waves/pages/v3.0.1/api-references/lightning-large-ws.mdx index 68a7cd0..9660c2b 100644 --- a/fern/products/waves/pages/api-references/lightning-large-ws.mdx +++ b/fern/products/waves/pages/v3.0.1/api-references/lightning-large-ws.mdx @@ -15,7 +15,7 @@ Connect to the WebSocket endpoint: ```javascript const socket = new WebSocket( - 'wss://api.smallest.ai/api/v1/lightning-large/stream' + 'wss://api.smallest.ai/waves/v1/lightning-large/stream' ); ``` @@ -25,7 +25,7 @@ Authentication is required. Include your API key in the connection headers: ```javascript const socket = new WebSocket( - 'wss://api.smallest.ai/api/v1/lightning-large/stream', + 'wss://api.smallest.ai/waves/v1/lightning-large/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -99,7 +99,7 @@ If an error occurs: "status": "error", "message": "Error message", "errors": [ - /* detailed error information */ + error information ] } ``` @@ -110,7 +110,7 @@ If an error occurs: const WebSocket = require('ws'); const ws = new WebSocket( - 'wss://waves-api.smallest.ai/api/v1/lightning-large/get_speech/stream', + 'wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -164,7 +164,7 @@ import json import base64 API_KEY = "YOUR_API_KEY" -WS_URL = "wss://waves-api.smallest.ai/api/v1/lightning-large/get_speech/stream" +WS_URL = "wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream" async def text_to_speech(): async with websockets.connect(WS_URL, extra_headers={"Authorization": f"Bearer {API_KEY}"}) as ws: diff --git a/fern/products/waves/pages/v3.0.1/api-references/lightning-large.mdx b/fern/products/waves/pages/v3.0.1/api-references/lightning-large.mdx new file mode 100644 index 0000000..d4fc470 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/lightning-large.mdx @@ -0,0 +1,6 @@ +--- +title: 'Text to Speech' +description: 'Get speech for given text using the Waves API' +openapi: 'POST /waves/v1/lightning-large/get_speech' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v3.0.1/api-references/lightning-tts-ws.mdx b/fern/products/waves/pages/v3.0.1/api-references/lightning-tts-ws.mdx new file mode 100644 index 0000000..bfb2538 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/lightning-tts-ws.mdx @@ -0,0 +1,129 @@ +--- +title: 'Server-Sent Events API' +sidebarTitle: 'Text to Speech (SSE)' +description: 'Stream speech for given text using the Lightning SSE API' +hideApiMarker: false +--- + +## Lightning SSE API + +The Lightning SSE API allows you to stream text-to-speech audio in real-time. This is particularly useful for applications requiring low-latency audio generation. + +### Connection + +Connect to the SSE endpoint: + +```javascript +const eventSource = new EventSource( + 'https://api.smallest.ai/waves/v1/lightning/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } +); +``` + +### Request Format + +Send a POST request with the following JSON structure: + +```json +{ + "text": "Your text to be converted to speech", + "voice_id": "voice_id_here", + "speed": 1, + "sample_rate": 24000 +} +``` + +#### Parameters + +| Parameter | Type | Required | Description | +| ------------- | ------ | -------- | --------------------------------------------------- | +| `text` | string | Yes | The text to convert to speech (max 1000 characters) | +| `voice_id` | string | Yes | ID of the voice to use | +| `speed` | number | No | Speech speed multiplier (default: 1) | +| `sample_rate` | number | No | Audio sample rate in Hz (default: 24000) | + +### Example Usage + +```javascript +const eventSource = new EventSource( + 'https://api.smallest.ai/waves/v1/lightning/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } +); + +// Handle incoming audio chunks +eventSource.onmessage = (event) => { + const response = JSON.parse(event.data); + + if (response.status === 'chunk') { + // Decode and play audio + const audioData = atob(response.data.audio); + // Process audio data... + } else if (response.status === 'complete' && response.done) { + console.log('All audio chunks received'); + eventSource.close(); + } else if (response.status === 'error') { + console.error('Error:', response.message); + eventSource.close(); + } +}; + +eventSource.onerror = (error) => { + console.error('SSE error:', error); + eventSource.close(); +}; +``` + +### Response Events + +The server will send events with the following formats: + +#### Chunk Event + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "chunk", + "data": { + "audio": "base64_encoded_audio_data" + } +} +``` + +#### Complete Event + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "complete", + "message": "All chunks sent", + "done": true +} +``` + +#### Error Event + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "error", + "message": "Error message", + "errors": [ + /* detailed error information */ + ] +} +``` + +### Notes + +- The API automatically chunks long text and streams each chunk separately +- Credits are deducted based on the length of the input text +- The SSE connection will remain open until all chunks are sent or an error occurs +- For optimal performance, keep individual requests under 1000 characters diff --git a/fern/products/waves/pages/v3.0.1/api-references/lightning-v2-stream.mdx b/fern/products/waves/pages/v3.0.1/api-references/lightning-v2-stream.mdx new file mode 100644 index 0000000..007f89d --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/lightning-v2-stream.mdx @@ -0,0 +1,25 @@ +--- +title: 'Text to Speech' +sidebarTitle: 'Text to Speech (SSE)' +description: 'Stream speech for given text using the Lightning v2 SSE API' +openapi: 'POST /waves/v1/lightning-v2/stream' +--- + +## Overview + +The Lightning v2 SSE API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. +For an end-to-end example of how to use the Lightning v2 SSE API, check out [Text to Speech (SSE) Example](https://github.com/smallest-inc/waves-examples/blob/main/lightning_v2/http_streaming/http_streaming_api.py) + +## When to Use + +- **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses +- **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays +- **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency +- **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + +## How It Works + +1. **Make a POST Request**: Send your text and voice settings to the API endpoint +2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size +3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially +4. **End of Stream**: The API sends a completion event when all audio has been delivered diff --git a/fern/products/waves/pages/v3.0.1/api-references/lightning-v2-ws.mdx b/fern/products/waves/pages/v3.0.1/api-references/lightning-v2-ws.mdx new file mode 100644 index 0000000..f7d82d7 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/lightning-v2-ws.mdx @@ -0,0 +1,6 @@ +--- +title: 'Text to Speech (WebSocket)' +description: The Lightning v2 WebSocket API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. For an end-to-end example of how to use the Lightning v2 WebSocket API, check out [Text to Speech (WS) Example](https://github.com/smallest-inc/waves-examples/tree/main/lightning_v2/ws_streaming) +asyncapi: "/asyncapi-spec/lightning-v2-ws.json /waves/v1/lightning-v2/get_speech/stream" +--- + diff --git a/fern/products/waves/pages/v3.0.1/api-references/lightning-v2.mdx b/fern/products/waves/pages/v3.0.1/api-references/lightning-v2.mdx new file mode 100644 index 0000000..51853d1 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/lightning-v2.mdx @@ -0,0 +1,6 @@ +--- +title: 'Text to Speech' +description: 'Get speech for given text using the Waves API' +openapi: 'POST /waves/v1/lightning-v2/get_speech' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v3.0.1/api-references/lightning.mdx b/fern/products/waves/pages/v3.0.1/api-references/lightning.mdx new file mode 100644 index 0000000..34c162f --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/lightning.mdx @@ -0,0 +1,7 @@ +--- +title: 'Text to speech' +sidebarTitle: 'Text to speech' +description: 'Get speech for given text using the Waves API' +openapi: 'POST /waves/v1/lightning/get_speech' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v3.0.1/api-references/websocket.mdx b/fern/products/waves/pages/v3.0.1/api-references/websocket.mdx new file mode 100644 index 0000000..990d8d3 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/api-references/websocket.mdx @@ -0,0 +1,106 @@ +--- +title: WebSocket Support for TTS API +sidebarTitle: WebSocket +description: Learn about WebSocket support for our Text-to-Speech (TTS) API, how it works, and when to use it. +icon: arrow-down-wide-short +--- + +Our Text-to-Speech (TTS) API supports WebSocket communication, providing a real-time, low-latency streaming experience for applications that require instant speech synthesis. WebSockets allow continuous data exchange, making them ideal for use cases that demand uninterrupted audio generation. + +--- + +## When to Use WebSockets + +### 1. **Real-Time Streaming** + +WebSockets are perfect for applications that need real-time speech synthesis, eliminating the delays associated with traditional HTTP requests. + +### 2. **Interactive Applications** + +For voice assistants, chatbots, and live transcription services, WebSockets ensure smooth, uninterrupted audio playback and response times. + +### 3. **Reduced Latency** + +A persistent WebSocket connection reduces the need for repeated request-response cycles, significantly improving performance for applications requiring rapid audio generation. + +--- + +## How It Works + +1. **Establish a Connection**: The client opens a WebSocket connection to our TTS API. +2. **Send Text Data**: The client sends the text payload to be synthesized. +3. **Process in Chunks**: The API breaks the text into chunks and processes them individually. +4. **Receive Audio Stream**: As each chunk is processed, it is sent back to the client as a base64-encoded audio buffer. +5. **Completion**: Once all chunks are processed, a complete message is sent to indicate the end of the stream. + +--- + +## Timeout Behavior + +By default, the WebSocket connection enforces a **20-second inactivity timeout**. This means that if the client does not send any data within 20 seconds, the server will automatically close the connection to free up resources. + +To support longer sessions for use cases where clients need more time (e.g., long pauses between messages), the timeout can be extended up to **60 seconds**. + +### To extend the timeout: + +You can include the `timeout` parameter in the WebSocket URL like so: + +```link +wss://api.smallest.ai/waves/v1/lightning-v2/get_speech/stream?timeout=60 +``` + + +This sets the inactivity timeout to 60 seconds. Valid values range from **20 (default)** to **60 seconds**. + +--- + +## Implementation Details + +The WebSocket TTS API is optimized to handle real-time text-to-speech conversions efficiently. Key aspects include: + +- **Input Validation**: Ensures the provided text and voice ID are valid before processing. +- **Chunk Processing**: Long texts are split into smaller chunks (e.g., 240 characters) to optimize processing. +- **Voice Caching**: The API fetches and caches voice configurations to reduce redundant database queries. +- **Task Queue System**: Tasks are pushed to a Redis-based queue for efficient processing and real-time audio generation. +- **Error Handling**: If any chunk fails, an error message is logged and sent to the client. + +--- + +## Example Request Flow + +1. The client sends a WebSocket message: + + ```json + { + "text": "Hello, world!", + "voice_id": "12345", + "speed": 1.0, + "sample_rate": 24000 + } + +2. The API validates the request and retrieves the voice settings. + +3. The text is split into chunks and processed in the background. + +4. The client receives responses like: +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "chunk", + "data": { + "audio": "" + } +} +``` + +5. Once all chunks are sent, a final message is returned: +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "complete", + "message": "All chunks sent", + "done": true +} +``` + +For implementation details, check our [WebSocket API documentation](/v3.0.1/content/api-references/lightning-v2-ws). diff --git a/fern/products/waves/pages/v3.0.1/best-practices/pvc-best-practices.mdx b/fern/products/waves/pages/v3.0.1/best-practices/pvc-best-practices.mdx new file mode 100644 index 0000000..8a1e183 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/best-practices/pvc-best-practices.mdx @@ -0,0 +1,38 @@ +--- +title: "Professional Voice Cloning - Best Practices" +description: "Best practices for recording high-quality reference audio." +icon: "user-tie" +--- + +To get the most accurate and natural voice clone, it's essential to provide **high-quality reference audio**. The best practices for recording remain the same as those for **Instant Voice Cloning**, which you can find here: + +🔗 **[Instant Voice Cloning - Best Practices](/v3.0.1/content/best-practices/vc-best-practices)** + +However, **Professional Voice Cloning (PVC) significantly improves upon Instant Voice Cloning** in the following ways: + +## 🎙️ How PVC Enhances Voice Cloning + +### 1. **Handles Background Noise More Effectively** + - PVC can filter out mild background noise without affecting voice quality. + - Unlike Instant Cloning, **PVC adapts better to real-world recording conditions**. + +### 2. **Captures a More Natural Speaking Style** + - Supports a **wider range of tones and vocal inflections**. + - Preserves the natural rhythm and personality of speech. + +### 3. **Understands Extreme Emotions & Variability** + - PVC models can **learn from expressive speech**, making them ideal for voices with dynamic emotions (anger, excitement, sadness). + - Instant Cloning may struggle with highly expressive tones. + +### 4. **Improves Inconsistent Speaking Patterns** + - Can learn from **pauses, breath sounds, and fluctuations in speaking speed**. + - Works well even if the reference recordings contain slight variations. + +### 5. **More Robust for Long-Form Content** + - Best suited for audiobook narration, dubbing, and professional voice applications. + - Produces high-quality results even in long recordings. + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/pages/v3.0.1/best-practices/tts-best-practices.mdx b/fern/products/waves/pages/v3.0.1/best-practices/tts-best-practices.mdx new file mode 100644 index 0000000..e09c166 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/best-practices/tts-best-practices.mdx @@ -0,0 +1,333 @@ +--- +title: "Text to Speech - Best Practices" +description: "Learn best practices for text formatting for optimal Audio Generation." +icon: "comment-dots" +--- + +This comprehensive guide outlines the recommended practices for formatting text to ensure optimal processing, accuracy, and consistent output across different use cases. Following these guidelines will help improve the quality of generated audio and reduce potential errors. + +## Language and Script Guidelines + +### Mixed Language Formatting + +When working with mixed language content, particularly English and Hindi, proper script selection is crucial for accurate processing: + +- English text must be written in Latin script +- Hindi text must be written in Devanagari script +- Avoid transliteration of Hindi words into Latin script + +**Examples:** +``` +✅ Correct: I want to eat खाना +❌ Incorrect: I want to eat khana + +✅ Correct: मैं school जाता हूं +❌ Incorrect: main school jata hun +``` + +### Proper Nouns Handling + +For Indian proper nouns, maintain cultural and linguistic accuracy by following these rules: + +1. **City Names:** + - Use Devanagari script for Indian city names + - Maintain Latin script for non-Indian city names + +2. **Personal Names:** + - Use Devanagari script for Indian personal names + - Maintain original script for non-Indian names + +**Examples:** +``` +✅ Correct: I live in मुंबई near अंधेरी station +❌ Incorrect: I live in Mumbai near Andheri station + +✅ Correct: Hello! अमित and रोहित are my friends from New York +❌ Incorrect: Hello! Amit and Rohit are my friends from New York + +✅ Correct: Hello! मैं दिल्ली में रहता हूं। My name is John and my friend's name is श्याम। +❌ Incorrect: Hello! Mai Delhi me rehta hun. My name is John and my friend's name is Shyam. +``` + +## Text Chunking + +### Character Limit Guidelines + +To optimize real-time processing and reduce latency, implement these chunking practices: + +1. **Size Constraints:** + - Maximum chunk size: 250 characters + - Break at natural punctuation points + - Maintain sentence coherence when possible + +2. **Breaking Points Priority:** + - First priority: Sentence-ending punctuation (., !, ?) + - Second priority: Other punctuation (;, :) + - Third priority: Natural word breaks + +### Chunking Implementation + +Use the following Python code for implementing text chunking: +- For `lightning-large` model, set `max_chunk_size=140`. +- For `lightning` model, set `max_chunk_size=250`. + + +```python python +def chunk_text(text, max_chunk_size=250): + """ + Chunks text with a maximum size of 250 characters, preferring to break at punctuation marks. + + - For `lightning-large` model, set `max_chunk_size=140`. + - For `lightning` model, set `max_chunk_size=250`. + + Args: + text (str): Input text to be chunked + max_chunk_size (int): Maximum size of each chunk (default: 250) + + Returns: + list: List of text chunks + """ + chunks = [] + while text: + if len(text) <= max_chunk_size: + chunks.append(text) + break + + # Look for punctuation within the last 50 characters of the max chunk size + chunk_end = max_chunk_size + punctuation_marks = '.,:;।!?' + + # Search backward from max_chunk_size for punctuation + found_punct = False + for i in range(chunk_end, max(chunk_end - 50, 0), -1): + if i < len(text) and text[i] in punctuation_marks: + chunk_end = i + 1 # Include the punctuation mark + found_punct = True + break + + # If no punctuation found, look for space + if not found_punct: + for i in range(chunk_end, max(chunk_end - 50, 0), -1): + if i < len(text) and text[i].isspace(): + chunk_end = i + break + # If no space found, force break at max_chunk_size + if not found_punct and chunk_end == max_chunk_size: + chunk_end = max_chunk_size + + # Add chunk and remove it from original text + chunks.append(text[:chunk_end].strip()) + text = text[chunk_end:].strip() + + return chunks +``` + + +## Handling numbers + + +### Order IDs and Large Numbers + +When handling order IDs or large numbers: +- Send them as separate requests +- Split the text around the number + +**Example:** +``` +Original: "Your order id is 123456789012345" +Split into: +1. "Your order id is" +2. "123456789012345" +``` + +### Phone Numbers + +#### Default Grouping +- Numbers are automatically grouped in 3-4-3 format +- Example: "9876543210" is read as "987-6543-210" + +#### Custom Formatting +For specific reading patterns: +- Format numbers explicitly in text +- Write out the exact pronunciation desired + +**Example:** +``` +✅ Correct: "double nine triple eight double seven double six" (for 9988877766) +❌ Incorrect: "9988877766" (if you want it read as "double nine...") +``` + +## Date and Time Formatting Guidelines + +### Date Formats +You may use any of the following formats when writing dates: + +1. DD/MM/YYYY → `12/02/2025` → "twelve, two, twenty twenty-five" +2. DD-MM-YYYY → `12-02-2025` → "twelve, two, twenty twenty-five" +3. DD Month YYYY → `12 February 2025` → "twelve February twenty twenty five" +4. Month DD YYYY → `February 12th 2025` → "February, twelfth, twenty twenty-five" +5. DD-MM-YY → `12-02-25` → "twelve, two, twenty-five" +6. DD/MM/YY → `12/02/25` → "twelve, two, twenty-five" + +> Note: Ordinal suffixes (st, nd, rd, th) could be used in dates. + +``` +✅ My birthday is on 31/12/2002. +✅ The event is scheduled for 05th March 2024. +✅ We will launch the project on June 15 2023. +✅ The deadline is 30-06-24. + +❌ 21st of June, 2003. (Will be read as twenty-first of June, two thousand and three) +❌ 12.02.2025. (Will be read as twelve two two thousand and twenty-five) +``` + +### Time Formats +You may use the following formats when specifying time: + +1. HH:MM:SS → `14:30:15` → "fourteen thirty fifteen" +2. HH:MM → `14:30` → "fourteen thirty" + +``` +✅ Let's meet at 12:32 PM on 12/02/2025. +✅ The meeting starts at 09:45 AM. +✅ The match will begin at 18:00. +✅ The alarm is set for 07:15:30. + +❌ 14.30 (Will be read as fourteen [long pause] thirty) +❌ 7'5 AM (Will be read as seven five) +``` + +## Mathematical Expressions + +Express mathematical operations in words for clarity. For complex mathematical expressions, break down into simpler components: + +``` +✅ Correct: two plus three equals five +✅ Correct: 2 plus 3 equals 5 +❌ Incorrect: 2+3=5 + +✅ Correct: ten minus three equals seven +✅ Correct: 10 minus 3 equals 7 +❌ Incorrect: 10-3=7 + +✅ Correct: five multiplied by three equals fifteen +✅ Correct: 5 multiplied by 3 equals 15 +❌ Incorrect: 5x3=15, 5*3=15 + +✅ Correct: ten divided by two equals five +✅ Correct: 10 divided by 2 equals 5 +❌ Incorrect: 10/2=5, 10÷5=2 + +✅ Correct: open parentheses five plus three close parentheses multiplied by two equals sixteen +✅ Correct: open parentheses 5 plus 3 close parentheses multiplied by 2 equals 16 +❌ Incorrect: (5+3)*2=16 + +✅ Correct: square root of sixteen equals four +✅ Correct: square root of 16 equals 4 +❌ Incorrect: √16=4 +``` + +## Approximate Values + +When expressing approximate values: +- Write out the full words +- Avoid using symbols for approximation +- Be explicit about the approximation + +**Examples:** +``` +✅ Correct: Your delivery will arrive in approximately twenty minutes +✅ Correct: Your delivery will arrive in approximately 20 minutes +❌ Incorrect: Your delivery will arrive in ~20 mins + +✅ Correct: around five hundred people attended +✅ Correct: around 500 people attended +❌ Incorrect: ~500 people attended +``` + +## Units and Measurements + + +When expressing measurements, write out the units in full words to ensure clear understanding: + +``` +✅ Correct: five kilometers, 5 kilometers +❌ Incorrect: 5km, 5 kms + +✅ Correct: twenty kilograms of rice, 20 kilograms of rice +❌ Incorrect: 20kg rice, 20kgs rice + +✅ Correct: thirty degrees Celsius, 30 degrees Celsius +❌ Incorrect: 30°C, 30 C + +✅ Correct: two liters of water, 2 liters of water +❌ Incorrect: 2L water, 2l water + +✅ Correct: five feet six inches tall, 5 feet 6 inches tall +❌ Incorrect: 5'6" tall, 5ft 6in tall +``` + +## Symbols and Special Characters + +### Basic Symbols +Spell out special characters and symbols in all contexts: +``` +. → "dot" +@ → "at" +_ → "underscore" +- → "dash" +/ → "forward slash" +# → "hashtag" +& → "and" +``` + +### Digital Content Formatting + +**1. URLs:** +``` +✅ Correct: visit docs dot example dot com forward slash guide +❌ Incorrect: visit docs.example.com/guide + +✅ Correct: my dash website dot com forward slash about +❌ Incorrect: my-website.com/about +``` + +**2. Email Addresses:** +``` +✅ Correct: support dot company at gmail dot com +❌ Incorrect: support.company@gmail.com + +✅ Correct: info underscore help at company dot com +❌ Incorrect: info_help@company.com +``` + +**3. Social Media:** +``` +✅ Correct: at company underscore name +❌ Incorrect: @company_name + +✅ Correct: hashtag trending now +❌ Incorrect: #TrendingNow + +✅ Correct: follow us at tech underscore company hashtag latest news +❌ Incorrect: follow us @tech_company #LatestNews +``` + +### Range and Interval Notation +Always write out ranges and relationships explicitly to avoid ambiguity: +``` +✅ Correct: five to eight days +❌ Incorrect: 5-8 days + +✅ Correct: between ten and fifteen minutes +❌ Incorrect: 10-15 minutes + +✅ Correct: temperatures from twenty to thirty degrees +❌ Incorrect: temperatures 20-30° +``` + +Note: +- Consistency is key - use the same format throughout your content +- When in doubt, write out the full words +- For complex URLs or handles, break them into smaller, manageable chunks +- Avoid using symbols that could have multiple interpretations diff --git a/fern/products/waves/pages/v3.0.1/best-practices/vc-best-practices.mdx b/fern/products/waves/pages/v3.0.1/best-practices/vc-best-practices.mdx new file mode 100644 index 0000000..749e283 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/best-practices/vc-best-practices.mdx @@ -0,0 +1,118 @@ +--- +title: "Voice Cloning - Best Practices" +description: "Best Practices for Recording Reference Audio" +icon: "clone" +--- + +To achieve the best results when cloning your voice, it's essential to provide high-quality reference audio. Below are some best practices, dos and don'ts, and examples to guide you. + +Ready to Clone Your Voice? Try it out on our platform [platform](https://app.smallest.ai/waves/studio/create) + +--- + +## 🎙️ How to Record Reference Audio + +1. **Environment** + - Record in a quiet room with minimal background noise. + - Use a good quality microphone. While dedicated mics are ideal, MacBook and Mobile microphones work well for this purpose. + - Mobile and Laptop recordings can work well too, as long as the device is placed at an adequate distance—not too far or too close—to ensure clear, natural sound without distortion. + - Make sure the recording environment doesn’t introduce echo or distortion (e.g., avoid large empty rooms or outdoor spaces). + - After uploading the audio, listen to it to ensure it is clear and free of interruptions, background noise, or distortion. + +2. **Speaking Style** + - Speak naturally and avoid excessive emotion unless a specific tone is required. + - Maintain a consistent pace and tone throughout the recording. Be mindful of long pauses, as they can impact the quality of the cloned voice. + +3. **Length of Audio** + - Provide at least 5 seconds to 15 seconds of clean audio. + +--- + +## 🎧 Examples of Good and Bad Reference Audio + +> **_NOTE:_** Currently, there is no direct support for adding audio to Mintlify. As a workaround, we have embedded a video to include the necessary audio content. + +### Good Reference Audio +- High-quality, clear, and consistent tone. + + + + +### Bad Reference Audio +1. **With Background Noise** + + +2. **Inconsistent Speaking Style** + + +3. **Overlapping Voices** + + +--- + +# 🎭 Creating Expressive Voice Clones + +Our platform supports emotional reference audio, meaning the emotions, pitch or tone in the reference audio will influence the output. This is ideal for creating expressive clones that match your intended tone. + +## 😄 Emotional Control +- The emotions in the reference audio (e.g., angry, happy, sad) directly impact the tone of the generated voice. +- For example, if the reference audio conveys happiness, the output will replicate that cheerful tone. + +## ⚡ Speed Control +- The pace of your reference audio determines the speed of the output. +- A fast-paced reference will generate a similarly fast delivery, while a slower reference will produce a more measured response. + +## 🔊 Loudness Control +- The loudness or volume in your reference audio is reflected in the output. +- For instance, a soft-spoken input will result in a quieter clone, while a louder, more energetic recording will produce a bolder output. + +--- + +## 🎧 Emotional Reference Audio Examples + +### Angry Tone +- **Reference Audio Sample**: + + +- **Output Audio Example**: + + +### Silent Tone +- **Reference Audio Sample**: + + +- **Output Audio Example**: + + + +### Fast-Paced Tone +- **Reference Audio Sample**: + + +- **Output Audio Example**: + + + +--- + +By following these guidelines and leveraging emotional reference audio, you can achieve highly accurate and expressive voice clones tailored to your needs. diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/angry_gen_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/angry_gen_t.mp4 new file mode 100644 index 0000000..0b3628e Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/angry_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/angry_ref_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/angry_ref_t.mp4 new file mode 100644 index 0000000..aca4571 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/angry_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/bg_ref_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/bg_ref_t.mp4 new file mode 100644 index 0000000..7d1d723 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/bg_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/fast_gen_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/fast_gen_t.mp4 new file mode 100644 index 0000000..9c66deb Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/fast_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/fast_ref_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/fast_ref_t.mp4 new file mode 100644 index 0000000..1be0d9c Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/fast_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/good_ref_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/good_ref_t.mp4 new file mode 100644 index 0000000..8b16dec Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/good_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/inconsistent_ref_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/inconsistent_ref_t.mp4 new file mode 100644 index 0000000..dec6109 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/inconsistent_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/overlap_ref_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/overlap_ref_t.mp4 new file mode 100644 index 0000000..ebc3285 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/overlap_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/whisper_gen_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/whisper_gen_t.mp4 new file mode 100644 index 0000000..2c6188a Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/whisper_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/best-practices/video/whisper_ref_t.mp4 b/fern/products/waves/pages/v3.0.1/best-practices/video/whisper_ref_t.mp4 new file mode 100644 index 0000000..6a2edeb Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/best-practices/video/whisper_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v3.0.1/changelog/announcements.mdx b/fern/products/waves/pages/v3.0.1/changelog/announcements.mdx new file mode 100644 index 0000000..9fa5634 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/changelog/announcements.mdx @@ -0,0 +1,60 @@ +--- +title: "Announcements" +description: "New updates and improvements from Smallest AI." +mode: "center" +--- + + + ## Introducing Lightning v2 + + We are thrilled to announce the release of our Lightning v2 model. This model supports 16 languages, providing high-quality speech synthesis across multiple languages. Key features include: + + - **Multilingual Support**: High-quality speech synthesis in 16 languages with voice cloning. + - **100ms TTFB**: Superfast and scalable to support your realtime applications. + - **0.05 per 10K characters**: 3x cheaper than other providers. + + Experience the new capabilities of Lightning v2 on our [Platform](https://app.smallest.ai/waves/studio/create). + + + + ## Introducing Lightning Multilingual - Now in Beta + + We are thrilled to announce the beta release of our Lightning Multilingual model. This model supports 30 languages, providing high-quality speech synthesis across multiple languages. Key features include: + + - **Multilingual Support**: High-quality speech synthesis in 30 languages. + - **Versatile Applications**: Ideal for global applications requiring diverse language support. + - **Beta Stage**: Currently in beta, with ongoing improvements and updates. + + Experience the new capabilities of Lightning Multilingual on our [Platform](https://app.smallest.ai/waves/studio/create). + + + + ## Introducing Lightning Large - Enhanced Expressiveness and Quality + + We are excited to announce the latest enhancements to our Lightning Large model. With improved emotional depth and expressiveness, Lightning Large now delivers even more natural and engaging speech synthesis. This update includes: + - **Enhanced Voice Cloning**: More accurate and lifelike voice clones. + - **Best-in-Market Latency**: Instant voice cloning with a latency of under 300ms. + + Experience the new capabilities of Lightning Large via our [Platform](https://app.smallest.ai/waves/studio/create) or the [API](/v3.0.1/content/api-references/lightning). + + + + ## Introducing Waves + + [Waves](https://app.smallest.ai/waves/studio/create) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. + However, waves will also be supporting various features such as voice cloning, accent cloning, style transfer, speech to speech etc in the near future, making it a ubiquitous platform + for anyone who needs AI generated speech. + + The voices available on waves platform are available via the [Waves API](/v3.0.1/content/api-references/lightning) + + + + ## Introducing Lightning - World's Fastest Text to Speech + Lightning is the world's fastest text to speech model, generating around 10 seconds of hyper-realistic audio in just 100ms, all at once, no streaming. + + Read more about lightning in our release post [here](https://smallest.ai/blog/lightning-fast-text-to-speech). + + You can access lightning via the [Waves API](/v3.0.1/content/api-references/lightning) + + +*A lot more coming up, very soon* \ No newline at end of file diff --git a/fern/products/waves/pages/v3.0.1/client-libraries/overview.mdx b/fern/products/waves/pages/v3.0.1/client-libraries/overview.mdx new file mode 100644 index 0000000..9920dae --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/client-libraries/overview.mdx @@ -0,0 +1,10 @@ +--- +title: "Client Libraries" +description: "Access Client Libraries by Smallest AI." +mode: "center" +--- + +Welcome to the Smallest AI Client Libraries documentation. Our client libraries provide easy-to-use interfaces for integrating Smallest AI's powerful text-to-speech and voice cloning capabilities into your applications. + +- [Python](https://github.com/smallest-inc/smallest-python-sdk) +- [Node](https://github.com/smallest-inc/smallest-node-sdk) \ No newline at end of file diff --git a/fern/products/waves/pages/v3.0.1/getting-started/authentication.mdx b/fern/products/waves/pages/v3.0.1/getting-started/authentication.mdx new file mode 100644 index 0000000..5c635c5 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/getting-started/authentication.mdx @@ -0,0 +1,44 @@ +--- +title: "Authentication" +description: "Learn how to authenticate your API requests and manage access keys securely." +icon: "lock" +--- + +## API keys +Smallest AI uses API keys for authentication. You must include your key with every API request to authenticate your identity and track usage against your quota. + +Each API key can be scoped to the following: + +- **Rate Limits**: Controls the number of requests allowed within a specified time period. +- **Credit Quota**: Defines the maximum usage credits available for your account. + +Your API key is confidential and should be kept secure. Never share it with others or expose it in client-side code (browsers, mobile apps, github repositories, etc.). + +## Making Requests +All API requests require your key in the `Authorization` header using the Bearer key format: + +``` +Authorization: Bearer YOUR_API_KEY +``` + +## Example API Request +Test the API with this curl command by replacing `YOUR_API_KEY` with your actual key: + +```bash +curl 'https://api.smallest.ai/waves/v1/lightning/get_voices' \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer YOUR_API_KEY' +``` + +## Example with the Smallest Python Package + +```python python +from smallestai.waves import WavesClient + +# Initialize the client with your API key +client = WavesClient(api_key="YOUR_API_KEY") + +# Retrieve available voices +print(f"Available Voices: {client.get_voices(model='lightning')}") +``` + \ No newline at end of file diff --git a/fern/products/waves/pages/v3.0.1/getting-started/http-stream.mdx b/fern/products/waves/pages/v3.0.1/getting-started/http-stream.mdx new file mode 100644 index 0000000..d977145 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/getting-started/http-stream.mdx @@ -0,0 +1,69 @@ +--- +title: "HTTP vs HTTP Streaming vs Websockets" +description: "What should you use?" +icon: "handshake-angle" +--- + +--- + +### Choosing the Right Protocol for Your TTS Application: HTTP, HTTP Streaming, or WebSocket? + +If you’re integrating Waves TTS into your application, one important decision is how to connect to the TTS engine. We support three protocols: HTTP, HTTP Streaming, and WebSocket, each tailored to different use cases. In this post, we’ll break down the strengths of each and help you choose the best fit for your needs. + +## HTTP: Best for Simplicity and Short Requests +**What it is**: +A classic REST-style interaction. You send a complete request (e.g., the full text to be converted to speech), and receive the synthesized audio as a downloadable response. + +**When to use it**: +- You have short or moderate-length texts. +- You want a simple integration, such as from a browser, mobile app, or backend job. +- You don’t need real-time feedback or streaming audio. + +**Pros and Cons**: + +| Pros | Cons | +|-----------------------------------------------|--------------------------------------------------------| +| Simple to integrate with standard HTTP tools | Full audio is returned only after complete synthesis | +| Easy to debug and monitor | Not suitable for real-time or long-form audio | +| Stateless; good for serverless environments | Reconnect needed for each request | +| Works well with caching and CDNs | Higher latency compared to streaming methods | + + +## HTTP Streaming: Best for Faster Playback Without Complexity + +**What it is**: +An enhancement of standard HTTP. The client sends a complete request, but the server streams back the audio as it's being generated, no need to wait for the full file. + +**When to use it**: +- You want faster playback with lower perceived latency. +- You send full input text but need audio to start as soon as possible. +- You want low-latency audio delivery without handling connection persistence. + +**Pros and Cons**: + +| Pros | Cons | +|------------------------------------------------|--------------------------------------------------------| +| Lower latency than regular HTTP | Only one-way communication (client → server) | +| Compatible with standard HTTP infrastructure | Full input must still be sent before synthesis starts | +| Audio starts playing as it's generated | No partial or live input updates | +| Easy to adopt with minimal changes | Slightly more complex than basic HTTP | + + +## WebSocket: Best for Real-Time, Interactive Applications + +**What it is**: +A full-duplex, persistent connection that allows two-way communication between the client and server. You can send text dynamically and receive streaming audio back continuously. + +**When to use it**: +- You need real-time, interactive TTS responses. +- Input is dynamic or arrives in chunks (e.g., live typing, conversation). +- You want persistent connections with minimal overhead per message. + +**Pros and Cons**: + +| Pros | Cons | +|----------------------------------------------------|---------------------------------------------------------| +| Ultra low latency | More complex to implement and manage | +| Supports real-time, chunked input and responses | Requires persistent connection management | +| Bi-directional communication | Not ideal for simple or infrequent tasks | +| Great for chatbots, live agents, or dictation apps | May require additional libraries or WebSocket support | diff --git a/fern/products/waves/pages/v3.0.1/getting-started/models.mdx b/fern/products/waves/pages/v3.0.1/getting-started/models.mdx new file mode 100644 index 0000000..deeeb0a --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/getting-started/models.mdx @@ -0,0 +1,42 @@ +--- +title: "Models" +description: "Find detailed description of each model along with their capabilities and supported languages." +icon: "cube" +--- + + + + Our fastest model, optimized for low-latency applications. It can generate 10 seconds of audio in just 100 milliseconds, making it ideal for real-time applications such as voicebots and interactive systems. + + + An upgrade from the Lightning Large model, offering improved performance and quality. It supports 16 languages, making it suitable for a wider range of applications requiring expressive and high-quality speech synthesis. + + + Offers more emotional depth and expressiveness compared to the Lightning model. It supports voice cloning and has a latency of just under 300 milliseconds, making it suitable for applications requiring high-quality, expressive speech. + + + +## Geo-location Based Routing +Waves intelligently routes every request to the nearest server cluster to ensure the lowest possible latency for your applications. We currently operate server clusters in: +- 🇮🇳 India (Mumbai) +- 🇺🇸 USA (Oregon) + +Our routing system automatically detects the client's geographical location and connects them to the optimal server based on network proximity and latency. This process is fully automated, no manual configuration is required on your side. + + +## Model Overview + +| Model ID | Description | Languages Supported | +|-----------------------|-----------------------------------------------------------------------------|---------------------| +| **lightning** | Fastest model with an RTF of 0.01, generating 10 seconds of audio in 100 ms. | `English`
`Hindi` | +| **lightning-large** | More emotional depth and expressiveness, supports voice cloning, latency under 300 ms. | `English`
`Hindi` | +| **lightning-v2** | 100ms TTFB, Supports 16 languages with voice cloning. | `English`
`Hindi`
`Tamil`
`Kannada`
`Gujarati`
`Bengali`
`Marathi`
`German`
`French`
`Spanish`
`Italian`
`Polish`
`Dutch`
`Russian`
`Arabic`
`Hebrew` | + + + +Note: The API uses [ISO 639-1 language codes - Set 1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) (2-letter codes) to specify supported languages. + + +## Pricing + +Our pricing model is designed to be flexible and scalable, catering to different usage needs. For detailed pricing information, please visit our [pricing page](https://smallest.ai/text-to-speech) or contact our sales team at [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/getting-started/quickstart.mdx b/fern/products/waves/pages/v3.0.1/getting-started/quickstart.mdx similarity index 93% rename from fern/products/waves/versions/v4.0.0/getting-started/quickstart.mdx rename to fern/products/waves/pages/v3.0.1/getting-started/quickstart.mdx index f87dda2..0bd6363 100644 --- a/fern/products/waves/versions/v4.0.0/getting-started/quickstart.mdx +++ b/fern/products/waves/pages/v3.0.1/getting-started/quickstart.mdx @@ -6,7 +6,7 @@ icon: "rocket" ## Step 1: Sign Up & get the API Key -1. Visit [waves.smallest.ai](https://waves.smallest.ai/) and sign up for an account or log in if you already have an account. +1. Visit [platform](https://app.smallest.ai/waves/studio/create) and sign up for an account or log in if you already have an account. 2. Navigate to `API Key` tab in your account dashboard. 3. Create a new API Key and copy it. 4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication. diff --git a/fern/products/waves/pages/v3.0.1/integrations/livekit.mdx b/fern/products/waves/pages/v3.0.1/integrations/livekit.mdx new file mode 100644 index 0000000..f51537d --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/integrations/livekit.mdx @@ -0,0 +1,113 @@ +--- +title: "LiveKit" +description: "Build LiveKit voice agents using Smallest AI TTS plugin." +icon: "voicemail" +--- + +This example provides scripts and tools to perform standalone audio generation and build Livekit voice assistants using the Smallest AI TTS plugin. Follow the steps below to set up and run the experiments. + +## Code Examples Repository + +You can find the code examples for this setup in the following GitHub repository: + +[Livekit Example Code Repository](https://github.com/smallest-inc/waves-examples/tree/main/lightning/livekit_example) + +## Common Steps + +### 1. Create a Virtual Environment + +To ensure your Python environment is isolated, create a virtual environment: + +```bash +python3 -m venv venv +``` + +Activate the virtual environment: + +- On Linux/Mac: + ```bash + source venv/bin/activate + ``` + +- On Windows: + ```bash + venv\Scripts\activate + ``` + +### 2. Install Requirements + +Once the virtual environment is activated, install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +### 3. Sign in and Create a New Project on Livekit + +Sign in here: [Livekit Cloud](https://cloud.livekit.io) + +After signing in, create a new project and copy the following tokens: + +```bash +LIVEKIT_API_KEY +LIVEKIT_API_SECRET +LIVEKIT_URL +``` + +### 4. Create a `.env` File + +Create a `.env` file in the project root directory. This file should contain the following keys with appropriate values: + +```bash +LIVEKIT_API_KEY=... +LIVEKIT_API_SECRET=... +LIVEKIT_URL=... +OPENAI_API_KEY=... +DEEPGRAM_API_KEY=... +SMALLEST_API_KEY=... +``` + +### 5. Install the Plugin + +To set up the Livekit plugin for [smallest.ai](https://smallest.ai), run the following commands: + +```bash +chmod +x install_plugin.sh +./install_plugin.sh +``` + +--- + +## Usage + +### 1. Running `generate_audio.py` + +To generate audio using the Smallest AI plugin as a WAV file, run the following command: + +```bash +python3 generate_audio.py +``` + +You can change the parameters in the script and try out different voices, languages, and texts. + +### 2. Running `minimal_assistant.py` + +To build a minimal Livekit voice assistant using the Smallest model, run the following command: + +```bash +python3 minimal_assistant.py dev +``` + +### 3. Connect to the Agent Here + +You can connect to the agent by visiting the following link: + +[Livekit Agent Playground](https://agents-playground.livekit.io) + +--- + +## Notes + +- Ensure that you have added the correct API keys and other credentials in the `.env` file before running the scripts. +- For any issues or questions, feel free to open an issue in the repository or contact us on [Discord](https://discord.gg/Ub25S48hSf). + diff --git a/fern/products/waves/pages/v3.0.1/integrations/plivo.mdx b/fern/products/waves/pages/v3.0.1/integrations/plivo.mdx new file mode 100644 index 0000000..ce2d638 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/integrations/plivo.mdx @@ -0,0 +1,108 @@ +--- +title: "Telephony: Plivo" +description: "Learn how to integrate Smallest AI TTS in Plivo for telephony solutions." +icon: "phone-volume" +--- + +This guide demonstrates how to use the **Smallest AI API** with telephony call functionality. It sets up a local FastAPI server to stream audio data and uses **ngrok** to expose the server to the public for testing. + +You can access the code for this example on GitHub [here ](https://github.com/smallest-inc/waves-examples/tree/main/lightning/telephone_example/plivo_example). + +## Prerequisites + +Before you begin, ensure you have the following: + +- **Python 3.8+** installed +- **ngrok** for tunneling local servers to the internet. You can download ngrok from [here](https://ngrok.com/download). +- **Plivo** account to handle calls and text-to-speech. + +## Setup Steps + +### 1. Configure Environment Variables + +Start by creating a `.env` file in your project directory with the necessary API keys. This configuration is essential for interacting with **Smallest.ai** and **Plivo**. + +```bash +SMALLEST_API_KEY=... +PLIVO_AUTH_ID=... +PLIVO_AUTH_TOKEN=... +``` + +### 2. Set Up Ngrok + +**Ngrok** is used to expose your local FastAPI server to the public internet. Follow these steps: + +1. Install **ngrok** (if not already installed). +2. Expose your FastAPI server for Plivo by running: + +```bash +ngrok http 5000 +``` + +3. Once ngrok is running, it will generate a public URL (e.g., `https://abcd-1234-5678.ngrok.io`). + +### 3. Install Dependencies + +To run the example code, you’ll need to install the required dependencies. Run the following command: + +```bash +pip install -r requirements.txt +``` + +### 4. Run the FastAPI Application + +Now, you can start the FastAPI server, which will handle the streaming of audio. + +**Note**: Make sure to update the ngrok URL and paths in the script before running. + +Run the Plivo server: +```bash +python plivo_example/plivo_app.py +``` + +### 5. Run the Phonetic Call Client + +Now that the FastAPI server is up and running, it's time to make a phonetic call. This client connects to the server, and you can test the audio stream with the desired telephony platform. + + +Similarly, update the phone numbers and ngrok URL in the script, then run: + +```bash +python plivo_example/plivo_make_call.py +``` + +### 6. Testing the Call + +- Once the client script is executed, a call will be placed to the provided number with phonetic audio. +- Plivo will handle the audio and make the call based on the provided parameters. + +### 7. Accessing the Public URL + +If you’ve successfully run ngrok, your application will be accessible via a public URL provided by ngrok, such as `https://abcd-1234-5678.ngrok.io`. + +This public URL will allow external services like Plivo to interact with your server. + +--- + +## Notes + +- **Public URL Requirement:** If you already have a public URL (e.g., from deploying the app), you don't need to use ngrok. +- **API Keys:** Ensure your `.env` file contains all required keys for **Plivo** to ensure successful API calls. +- **Phone Numbers:** Update the client scripts with valid phone numbers to receive the call. +- **Testing:** Use ngrok for easy testing in a local environment before deploying the application. + +--- + +## Troubleshooting + +If you face any issues during setup or while making calls, ensure the following: + +- **Correct API keys**: Double-check the credentials in your `.env` file. +- **Ngrok URL**: Ensure that ngrok is running and the correct URL is used in the scripts. +- **Dependencies**: Ensure all dependencies are installed correctly via `pip install -r requirements.txt`. + +If issues persist, you can reach out to the [Smallest.ai support team](https://discord.gg/Ub25S48hSf) or raise an issue on the [GitHub repository](https://github.com/smallest-inc/waves-examples). + +--- + +This setup provides seamless testing of phonetic calls using the **Smallest AI API** and **Plivo** for real-time voice interaction. \ No newline at end of file diff --git a/fern/products/waves/pages/v3.0.1/integrations/vonage.mdx b/fern/products/waves/pages/v3.0.1/integrations/vonage.mdx new file mode 100644 index 0000000..ed17a42 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/integrations/vonage.mdx @@ -0,0 +1,109 @@ +--- +title: "Telephony: Vonage" +description: "Learn how to integrate Smallest AI TTS in Vonage for telephony solutions." +icon: "tty" +--- + +This guide demonstrates how to use the **Smallest AI API** with telephony call functionality. It sets up a local FastAPI server to stream audio data and uses **ngrok** to expose the server to the public for testing. + +You can access the code for this example on GitHub [here ](https://github.com/smallest-inc/waves-examples/tree/main/lightning/telephone_example/vonage_example). + + +## Prerequisites + +Before you begin, ensure you have the following: + +- **Python 3.8+** installed +- **ngrok** for tunneling local servers to the internet. You can download ngrok from [here](https://ngrok.com/download). +- **Vonage** account to handle calls and text-to-speech. + +## Setup Steps + +### 1. Configure Environment Variables + +Start by creating a `.env` file in your project directory with the necessary API keys. This configuration is essential for interacting with **Smallest.ai** and **Vonage**. + +```bash +SMALLEST_API_KEY=... +VONAGE_APPLICATION_ID=... +``` + +Additionally, ensure that the private key is added to `secrets/private.key`. + +### 2. Set Up Ngrok + +**Ngrok** is used to expose your local FastAPI server to the public internet. Follow these steps: + +1. Install **ngrok** (if not already installed). +2. Expose your FastAPI server for Vonage by running: + +- For Vonage: +```bash +ngrok http 8000 +``` + +3. Once ngrok is running, it will generate a public URL (e.g., `https://abcd-1234-5678.ngrok.io`). + +### 3. Install Dependencies + +To run the example code, you’ll need to install the required dependencies. Run the following command: + +```bash +pip install -r requirements.txt +``` + +### 4. Run the FastAPI Application + +Now, you can start the FastAPI server, which will handle the streaming of audio. + +Run the Vonage server: + +```bash +python vonage_example/vonage_app.py +``` + +### 5. Run the Phonetic Call Client + +Now that the FastAPI server is up and running, it's time to make a phonetic call. This client connects to the server, and you can test the audio stream with the desired telephony platform. + +Update the phone numbers and ngrok URL in the script, then run: + +```bash +python vonage_example/vonage_make_call.py +``` + +### 6. Testing the Call + +- Once the client script is executed, a call will be placed to the provided number with phonetic audio. +- Vonage will handle the audio and make the call based on the provided parameters. + +### 7. Accessing the Public URL + +If you’ve successfully run ngrok, your application will be accessible via a public URL provided by ngrok, such as `https://abcd-1234-5678.ngrok.io`. + +This public URL will allow external services like Vonage to interact with your server. + +--- + +## Notes + +- **Public URL Requirement:** If you already have a public URL (e.g., from deploying the app), you don't need to use ngrok. +- **API Keys:** Ensure your `.env` file contains all required keys for **Vonage** to ensure successful API calls. +- **Phone Numbers:** Update the client scripts with valid phone numbers to receive the call. +- **Testing:** Use ngrok for easy testing in a local environment before deploying the application. + +--- + +## Troubleshooting + +If you face any issues during setup or while making calls, ensure the following: + +- **Correct API keys**: Double-check the credentials in your `.env` file. +- **Ngrok URL**: Ensure that ngrok is running and the correct URL is used in the scripts. +- **Dependencies**: Ensure all dependencies are installed correctly via `pip install -r requirements.txt`. + +If issues persist, you can reach out to the [Smallest AI support team](https://discord.gg/Ub25S48hSf) or raise an issue on the [GitHub repository](https://github.com/smallest-inc/waves-examples). + +--- + +This setup provides seamless testing of phonetic calls using the **Smallest AI API** and **Vonage** for real-time voice interaction. \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/introduction/introduction.mdx b/fern/products/waves/pages/v3.0.1/introduction/introduction.mdx similarity index 82% rename from fern/products/waves/versions/v4.0.0/introduction/introduction.mdx rename to fern/products/waves/pages/v3.0.1/introduction/introduction.mdx index d42681b..6c43829 100644 --- a/fern/products/waves/versions/v4.0.0/introduction/introduction.mdx +++ b/fern/products/waves/pages/v3.0.1/introduction/introduction.mdx @@ -6,7 +6,7 @@ icon: "globe" ## About Waves -Welcome to [Waves](https://waves.smallest.ai/), the cutting-edge platform designed to deliver real-time, hyper-realistic text-to-speech solutions. Our platform leverages advanced AI models to provide seamless and natural-sounding speech synthesis, catering to a wide range of applications including voicebots, interactive systems, and more. +Welcome to [Waves](https://app.smallest.ai/waves/studio/create), the cutting-edge platform designed to deliver real-time, hyper-realistic text-to-speech solutions. Our platform leverages advanced AI models to provide seamless and natural-sounding speech synthesis, catering to a wide range of applications including voicebots, interactive systems, and more. ### Key Features diff --git a/fern/products/waves/pages/v3.0.1/product/projects.mdx b/fern/products/waves/pages/v3.0.1/product/projects.mdx new file mode 100644 index 0000000..752a469 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/product/projects.mdx @@ -0,0 +1,151 @@ +--- +title: 'Projects' +description: 'Create and manage your projects in Waves.' +icon: 'sheet-plastic' +--- + +Projects + +## Introduction + +Welcome to the official documentation for our text-to-speech (TTS) project. Our platform is a state-of-the-art audio synthesis tool designed to convert written text into high-quality, natural-sounding speech. It is particularly useful for content creators, authors, educators, and businesses looking to create voice-driven experiences efficiently. + +## Key Features + + + + Multiple Voices + Access a diverse selection of AI-generated voices tailored for different use cases. Choose from various genders, age groups, and accents to find the perfect match for your project. + +
    +
  • + Click on a voice avatar to preview it. +
  • +
  • + Click the + icon to add it to your project. +
  • +
+ +
+ + Drag-and-Drop Content Management + Easily organize your content with an intuitive block-based editing system. Simply click and drag to rearrange content blocks for a seamless editing experience. + + + Easily transform text into speech with flexible conversion options. Generate audio for the entire text or select specific blocks as needed. +
    +
  • + Click on the play button to preview the generated audio. +
  • +
  • + Click on the Generate Selected button to convert the selected text to speech. +
  • +
  • + Click on the Generate Till End button to convert the entire text to speech. +
  • +
+
+ + Organize your content into chapters for better management and navigation. + + + Easily integrate cloned voices into your projects. Simply add the cloned voice to your project and start using it in your content seamlessly. + + + Fine-tune your voice output with advanced settings. Use the gear icon to adjust speed, consistency, and enhancement options for a more customized experience. + + +{' '} + + +

+ Protect finalized content from unintended modifications by locking + blocks. Ensure important sections remain unchanged. +

+
+ + +

+ Easily download individual voice outputs with a single click. + Streamline your workflow with quick export options. +

+
+ +
+ +## Use Cases + +### Content Creation + +- Transform blog posts, articles, and scripts into engaging audio content. +- Enhance storytelling with dynamic voice narration. + +### Education and Accessibility + +- Convert textbooks and educational materials into audio formats. +- Improve accessibility for visually impaired users. + +### Business and Marketing + +- Create audio advertisements and voiceovers for promotional content. +- Generate automated voice responses for customer support systems. + +## Getting Started + +### Installation & Setup + +1. Register for an account and + log into the platform. +2. Create a new project or open an existing one. +3. Add or paste your text content to the project. +4. Select a voice, adjust settings, and generate speech. +5. Use the drag-and-drop editor to organize your content. +6. Export the final output in your preferred format. + +### Best Practices + +- Use chapters to organize your content. +- Lock finalized blocks to prevent accidental edits. +- Experiment with different voice settings for the best results. +- Use the preview to check the generated speech before exporting. +- Use the clone feature to create a new voice with your own style. +- Use the gear icon to adjust speed, consistency, and enhancement options for a more customized experience. + +### Get in Touch + + + + Drop a mail at support@smallest.ai to talk to sales if you are + looking for enterprise support. + + + +Join our community and stay connected with the latest developments: + +- **Support**: Reach out to our support team at [support@smallest.ai](mailto:support@smallest.ai) for any queries or assistance. +- **Community**: Join our [Discord server](https://discord.gg/5evETqguJs) to connect with other developers and get real-time support. +- **Blog**: Follow our [blog](https://smallest.ai/blog) for insights, tutorials, and updates. + +Thank you for choosing Waves. We look forward to helping you create amazing voice experiences! diff --git a/fern/products/waves/pages/text-to-speech/get-voice-models-langs.mdx b/fern/products/waves/pages/v3.0.1/text-to-speech/get-voice-models-langs.mdx similarity index 94% rename from fern/products/waves/pages/text-to-speech/get-voice-models-langs.mdx rename to fern/products/waves/pages/v3.0.1/text-to-speech/get-voice-models-langs.mdx index 8fa85aa..c8dba02 100644 --- a/fern/products/waves/pages/text-to-speech/get-voice-models-langs.mdx +++ b/fern/products/waves/pages/v3.0.1/text-to-speech/get-voice-models-langs.mdx @@ -11,7 +11,7 @@ In this tutorial, you will learn how to use the Smallest AI platform to fetch th Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://app.smallest.ai/waves/studio/create)). - The Smallest AI Python SDK installed. If you haven't installed it yet, follow the instructions below: ### Install the SDK @@ -30,10 +30,10 @@ The Smallest AI SDK allows you to query the available languages, voices, and mod ```python python -from smallest import Smallest +from smallestai.waves import WavesClient def main(): - client = Smallest(api_key="YOUR_API_KEY") + client = WavesClient(api_key="YOUR_API_KEY") # Get available languages languages = client.get_languages() diff --git a/fern/products/waves/pages/v3.0.1/text-to-speech/how-to-tts.mdx b/fern/products/waves/pages/v3.0.1/text-to-speech/how-to-tts.mdx new file mode 100644 index 0000000..0dd3ece --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/text-to-speech/how-to-tts.mdx @@ -0,0 +1,113 @@ +--- +title: "How to use Text to Speech" +description: "Learn how to synthesize your text using the Smallest AI API." +icon: "wave-square" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to synthesize text to speech both synchronously and asynchronously. By the end of this tutorial, you will be able to convert text into speech using our API. + +You can access the source code for the Python SDK on our [GitHub repository](https://github.com/smallest-inc/smallest-python-sdk). + +## Requirements + +Before you begin, ensure you have the following: + +- Python (3.9 or higher) installed on your machine. +- An API key from the Smallest AI platform (sign up [here](https://app.smallest.ai/waves/studio/create)). + +## Setup + +### Install our SDK +```bash +pip install smallestai +``` + +### Set your API key as an environment variable +```bash +export SMALLEST_API_KEY=YOUR_API_KEY +``` + +## Synchronous Text to Speech + +Here is an example of how to synthesize text to speech synchronously: ' + +If you are using a `voice_id` corresponding to a voice clone, you should explicitly set the `model` parameter to `"lightning-large"` in the `Smallest` client or payload. + + +```python python +from smallestai.waves import WavesClient + +def main(): + client = WavesClient(api_key="SMALLEST_API_KEY") + client.synthesize( + "Hello, this is a test for sync synthesis function.", + save_as="sync_synthesize.wav" + ) + +if __name__ == "__main__": + main() +``` + + +## Asynchronous Text to Speech + +Here is an example of how to synthesize text to speech asynchronously: + +If you are using a `voice_id` corresponding to a voice clone, you should explicitly set the `model` parameter to `"lightning-large"` in the `Smallest` client or payload. + + +```python python +import asyncio +import aiofiles +from smallestai.waves import AsyncWavesClient + +async def main(): + client = AsyncWavesClient(api_key="SMALLEST_API_KEY") + async with client as tts: + audio_bytes = await tts.synthesize("Hello, this is a test of the async synthesis function.") + async with aiofiles.open("async_synthesize.wav", "wb") as f: + await f.write(audio_bytes) # alternatively you can use the `save_as` parameter. + +if __name__ == "__main__": + asyncio.run(main()) +``` + + +## Parameters + +- `api_key` (str): Your API key (can be set via SMALLEST_API_KEY environment variable) +- `model` (str): TTS model to use (default: `lightning`, available: `lightning`, `lightning-large`) +- `sample_rate` (int): Audio sample rate (default: 24000) +- `voice_id` (str): Voice ID (default: "emily") +- `speed` (float): Speech speed multiplier (default: 1.0) +- `consistency` (float): Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. (default: 0.5) +- `similarity` (float): Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. (default: 0) +- `enhancement` (boolean): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. (default: False) +- `add_wav_header` (boolean): Whether to add a WAV header to the output audio. (default: Faalse) + + +These parameters are part of the `Smallest` and `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis. + +For example, you can modify the speech speed and sample rate just for a particular synthesis request: + +```python python +audio_bytes = client.synthesize( + "Modern problems don't always require modern solutions.", + speed=1.5, # Overrides default speed + sample_rate=16000 # Overrides default sample rate +) +``` + + +## Conclusion + +The Smallest AI Text-to-Speech SDK offers both synchronous and asynchronous options, catering to a variety of use cases: + +- **Synchronous TTS**: Ideal for applications where immediate responses are needed, such as real-time voice assistants, chatbot integrations, or interactive voice systems. It ensures that the audio is generated and available instantly for use within the same execution flow. + +- **Asynchronous TTS**: Designed for scenarios that involve handling multiple requests or large-scale processing. For example, if you need to convert multiple text inputs into speech concurrently, such as creating audio files for an audiobook or processing a batch of text-based announcements, asynchronous TTS allows you to execute these tasks efficiently without blocking other operations. This approach ensures scalability and optimal resource utilization, particularly in environments where time and performance are critical. + +By understanding these modes and tailoring their usage to specific requirements, you can build highly responsive, scalable, and efficient solutions using the Smallest AI platform. + + +If you have any questions or suggestions, please create an issue on the [smallest-python-sdk GitHub ](https://github.com/smallest-inc/smallest-python-sdk). diff --git a/fern/products/waves/pages/v3.0.1/text-to-speech/llm-to-tts.mdx b/fern/products/waves/pages/v3.0.1/text-to-speech/llm-to-tts.mdx new file mode 100644 index 0000000..68cc00b --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/text-to-speech/llm-to-tts.mdx @@ -0,0 +1,144 @@ +--- +title: "How to stream LLM to TTS in Realtime" +description: "Learn how to convert streaming Text to Speech in Realtime." +icon: "bars-staggered" +--- + +## Synthesize streaming Text to Speech + +The `TextToAudioStream` class provides real-time text-to-speech (TTS) conversion by streaming text directly into audio output. This feature is particularly useful in applications that require instant feedback, such as voice assistants, live captioning systems, or interactive chatbots, where text is continuously generated and needs to be converted into speech on-the-fly. + +This example demonstrates how to stream text from a large language model (LLM) and process it into speech, utilizing the `TextToAudioStream` class with both synchronous and asynchronous TTS engines. + +### Example Overview + +In this example, text is generated using an LLM (Groq in this case, you can use any LLM), and the generated text is then passed to a TTS system (Smallest API) for real-time audio synthesis. The audio is saved as a `.wav` file. This entire process happens asynchronously to ensure smooth performance, especially when dealing with large or continuous streams of text. + +### Code Walkthrough +#### Stream through a WebSocket + +If you are using a `voice_id` corresponding to a voice clone, you should explicitly set the `model` parameter to `"lightning-large"` in the `Smallest` client or payload. + + +```python python +import asyncio +import websockets +from groq import Groq +from smallestai.waves import WavesClient, TextToAudioStream + +# Initialize Groq (LLM) and Smallest (TTS) instances +llm = Groq(api_key="GROQ_API_KEY") +tts = WavesClient(api_key="SMALLEST_API_KEY") +WEBSOCKET_URL = "wss://echo.websocket.events" # Mock WebSocket server + +# Async function to stream text generation from LLM +async def generate_text(prompt): + completion = llm.chat.completions.create( + messages=[{"role": "user", "content": prompt}], + model="llama3-8b-8192", + stream=True, + ) + + # Yield text as it is generated + for chunk in completion: + text = chunk.choices[0].delta.content + if text: + yield text + +# Main function to run the process +async def main(): + # Initialize the TTS processor + processor = TextToAudioStream(tts_instance=tts) + + # Generate text from LLM + llm_output = generate_text("Explain text to speech like I am five in 5 sentences.") + + # Stream the generated speech throught a websocket + async with websockets.connect(WEBSOCKET_URL) as ws: + print("Connected to WebSocket server.") + + # Stream the generated speech + async for audio_chunk in processor.process(llm_output): + await ws.send(audio_chunk) # Send audio chunk + echoed_data = await ws.recv() # Receive the echoed message + print("Received from server:", echoed_data[:20], "...") # Print first 20 bytes + + print("WebSocket connection closed.") + +if __name__ == "__main__": + asyncio.run(main()) +``` + + +#### Saving to a file + +If you are using a `voice_id` corresponding to a voice clone, you should explicitly set the `model` parameter to `"lightning-large"` in the `Smallest` client or payload. + + +```python python +import wave +import asyncio +from groq import Groq +from smallestai.waves import WavesClient, TextToAudioStream + +# Initialize Groq (LLM) and Smallest (TTS) instances +llm = Groq(api_key="GROQ_API_KEY") +tts = WavesClient(api_key="SMALLEST_API_KEY") + +# Async function to stream text generation from LLM +async def generate_text(prompt): + completion = llm.chat.completions.create( + messages=[{"role": "user", "content": prompt}], + model="llama3-8b-8192", + stream=True, + ) + + # Yield text as it is generated + for chunk in completion: + text = chunk.choices[0].delta.content + if text: + yield text + +# Async function to save generated audio as a WAV file +async def save_audio_to_wav(file_path, processor, llm_output): + with wave.open(file_path, "wb") as wav_file: + wav_file.setnchannels(1) # Mono audio + wav_file.setsampwidth(2) # 16-bit samples + wav_file.setframerate(24000) # 24 kHz sample rate + + # Process audio chunks and write them to the WAV file + async for audio_chunk in processor.process(llm_output): + wav_file.writeframes(audio_chunk) + +# Main asynchronous function to run the process +async def main(): + # Initialize the TTS processor + processor = TextToAudioStream(tts_instance=tts) + + # Generate text asynchronously + llm_output = generate_text("Explain text to speech like I am five in 5 sentences.") + + # Save the generated speech to a WAV file + await save_audio_to_wav("llm_to_speech.wav", processor, llm_output) + +if __name__ == "__main__": + asyncio.run(main()) +``` + + +### Parameters + +- `tts_instance`: The instance of the TTS engine (either `Smallest` or `AsyncSmallest`) used to generate speech from the text. +- `queue_timeout`: The wait time (in seconds) for new text to be received before attempting to generate speech. Default is 5.0 seconds. +- `max_retries`: The maximum number of retries for failed synthesis attempts. Default is 3. + +### Output Format + +The `TextToAudioStream` processor streams raw audio data without WAV headers for better streaming efficiency. These raw audio chunks can be: + +- Played directly through an audio device for real-time feedback. +- Saved to a file (e.g., `.wav` or `.mp3`) for later use. +- Streamed over a network to a client device or service. +- Further processed for additional applications, such as speech analytics or audio effects. + +This approach allows you to handle continuous streams of text and convert them into real-time speech, making it ideal for interactive applications where immediate audio feedback is crucial. diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-delete-vc.mdx b/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-delete-vc.mdx new file mode 100644 index 0000000..0d414ad --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-delete-vc.mdx @@ -0,0 +1,73 @@ +--- +title: "How to delete your Voice Clone using Python SDK" +icon: "compact-disc" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to delete your voice clone. By the end of this tutorial, you will be able to clone your voice using our API. + +You can access the source code for the Python SDK on our [GitHub repository](https://github.com/smallest-inc/smallest-python-sdk). + +## Requirements + +Before you begin, ensure you have the following: + +- Python (3.9 or higher) installed on your machine. +- An API key from the Smallest AI platform (sign up [here](https://app.smallest.ai/waves/studio/create)). + +## Setup + +### Install our SDK +```bash +pip install smallestai +``` + +Set your API key as an environment variable. +```bash +export SMALLEST_API_KEY=YOUR_API_KEY +``` + +## Delete your Voice +The Smallest AI SDK allows you to delete your cloned voice. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality. + +### Synchronously + + +```python python +from smallestai.waves import WavesClient + +def main(): + client = WavesClient(api_key="SMALLEST_API_KEY") + res = client.delete_voice(voice_id="voice_id") + print(res) + +if __name__ == "__main__": + main() +``` + + +### Asynchronously + +```python python +import asyncio +from smallestai.waves import AsyncWavesClient + +async def main(): + client = AsyncWavesClient(api_key="SMALLEST_API_KEY") + res = await client.delete_voice(voice_id="voice_id") + print(res) + +if __name__ == "__main__": + asyncio.run(main()) +``` + + +## Parameters + +- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable). +- `voice_id`: Unique Voice ID of the voice to be deleted. + + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-pvc.mdx b/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-pvc.mdx new file mode 100644 index 0000000..87108b2 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-pvc.mdx @@ -0,0 +1,31 @@ +--- +title: "How to Create a Professional Voice Clone" +description: "Train our model on your voice and generate a high-quality professional voice clone." +icon: "compact-disc" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to create a professional voice clone by uploading an audio file. + +# Creating a Professional Voice Clone + +1. **Go to the Smallest AI Platform** + Navigate to [smallest.ai](https://app.smallest.ai/waves/voice-cloning) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: + + ![Voice Clone Setup](../../images/pvc_page.png) + +2. **Upload Your Audio File** + Follow the instructions provided on the page to upload your audio file. Ensure that the recording is clear for the best results. + +3. **Enable Denoise (Optional)** + If your audio contains background noise, toggle **Denoise** on to improve quality. + +4. **Wait for Model to get trained** + The voice cloning process typically takes **3 to 6 hours**, but may take longer depending on demand. The Voice Clone will be available to Use on platform and you will also get mail for that. + +### **Note:** +**Creation of Professional Voice Clones (PVC) is not available via the SDK** due to the requirement of larger audio files. Please use the Smallest AI platform for this process. + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-vc-ui.mdx b/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-vc-ui.mdx new file mode 100644 index 0000000..1970114 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-vc-ui.mdx @@ -0,0 +1,38 @@ +--- +title: "How to Create an Instant Voice Clone" +icon: "compact-disc" +--- + +In this tutorial, you will learn how to use the **Smallest AI** platform to create an **Instant Voice Clone** using a short audio sample. This process is quick and efficient, allowing you to generate high-quality voice clones in minutes. + +# Creating an Instant Voice Clone + + 1️. **Go to the Smallest AI Platform** + - Navigate to **[smallest.ai](https://app.smallest.ai/waves/voice-cloning)** and click on **Create New**. + - In the modal that appears, select **Instant Voice Clone**. + + ![Voice Clone Setup](../../images/ivc-image-1.png) + + 2️. **Upload Your Clean Reference Audio** + - Select a **short, high-quality** audio clip (5-15 seconds). + - Ensure the recording is **clear and noise-free** for the best results. + - Follow the recommended **[best practices](/v3.0.1/content/best-practices/vc-best-practices)** to maximize quality. + + ![Upload your clean reference audio](../../images/ivc-image-2.png) + + 3️. **Review Generated Testing Examples** + - The platform will process your reference audio and generate **sample outputs**. + - Listen to the test clips to verify the voice match. + + ![Testing Examples](../../images/ivc-image-3.png) + + 4️. **Customize & Save Your Voice Clone** + - Fill in details like **Name, Tags, and Description** for your voice. + - Click **Generate** to store your cloned voice. + + ![Create Voice](../../images/ivc-image-4.png) + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-vc.mdx b/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-vc.mdx new file mode 100644 index 0000000..2f8d162 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/how-to-vc.mdx @@ -0,0 +1,88 @@ +--- +title: "How to create an Instant Voice Clone using Python SDK" +icon: "compact-disc" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to create a voice by uploading an audio file. By the end of this tutorial, you will be able to clone your voice using our API. + +You can access the source code for the Python SDK on our [GitHub repository](https://github.com/smallest-inc/smallest-python-sdk). + +## Requirements + +Before you begin, ensure you have the following: + +- Python (3.9 or higher) installed on your machine. +- An API key from the Smallest AI platform (sign up [here](https://app.smallest.ai/waves/studio/create)). + +## Setup + +### Install our SDK +```bash +pip install smallestai +``` + +Set your API key as an environment variable. +```bash +export SMALLEST_API_KEY=YOUR_API_KEY +``` + +## Add your Voice +The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality. + +### Synchronously + + +```python python +from smallestai.waves import WavesClient + +def main(): + client = WavesClient(api_key="YOUR_API_KEY") + res = client.add_voice(display_name="My Voice", file_path="my_voice.wav") + print(res) + +if __name__ == "__main__": + main() +``` + + +### Asynchronously + +```python python +import asyncio +from smallestai.waves import AsyncWavesClient + +async def main(): + client = AsyncWavesClient(api_key="YOUR_API_KEY") + res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav") + print(res) + +if __name__ == "__main__": + asyncio.run(main()) +``` + + +## Parameters + +- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable). +- `display_name`: Name of the voice to be created. +- `file_path`: Path to the audio file to be cloned. + +These parameters are part of the add_voice function. They can be set when calling the function as shown above. + +## Get All Cloned Voices + +Once you have cloned your voices, you can retrieve a list of all cloned voices associated with your account using the following code: + +```python python +from smallestai.waves import WavesClient + +client = WavesClient(api_key="YOUR_API_KEY") +print(f"Available Voices: {client.get_cloned_voices()}") +``` + + + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/agent-dashboard-conversions.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/agent-dashboard-conversions.png new file mode 100644 index 0000000..edf540d Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/agent-dashboard-conversions.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/agent-dashboard.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/agent-dashboard.png new file mode 100644 index 0000000..888291a Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/agent-dashboard.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/checks-passed.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/checks-passed.png new file mode 100644 index 0000000..3303c77 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/checks-passed.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/conversions-list.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/conversions-list.png new file mode 100644 index 0000000..aa46a94 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/conversions-list.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-audience.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-audience.png new file mode 100644 index 0000000..e1e062d Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-audience.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-campaign.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-campaign.png new file mode 100644 index 0000000..a7efb9d Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-campaign.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-conversion.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-conversion.png new file mode 100644 index 0000000..2e95316 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/create-conversion.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/download.svg b/fern/products/waves/pages/v3.0.1/voice-cloning/images/download.svg new file mode 100644 index 0000000..f41f94e --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/images/download.svg @@ -0,0 +1,3 @@ + + + diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/hero-dark.svg b/fern/products/waves/pages/v3.0.1/voice-cloning/images/hero-dark.svg new file mode 100644 index 0000000..c6a30e8 --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/images/hero-dark.svg @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/hero-light.svg b/fern/products/waves/pages/v3.0.1/voice-cloning/images/hero-light.svg new file mode 100644 index 0000000..297d68f --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/images/hero-light.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-1.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-1.png new file mode 100644 index 0000000..5c18503 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-1.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-2.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-2.png new file mode 100644 index 0000000..b4d3f32 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-2.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-3.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-3.png new file mode 100644 index 0000000..0cf238e Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-3.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-4.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-4.png new file mode 100644 index 0000000..5ecf5d6 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/ivc-image-4.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/lightning_cover.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/lightning_cover.png new file mode 100644 index 0000000..acaebb8 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/lightning_cover.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/pvc_page.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/pvc_page.png new file mode 100644 index 0000000..8bf1c5e Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/pvc_page.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/save-campaign.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/save-campaign.png new file mode 100644 index 0000000..972b0df Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/save-campaign.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/smallest_cover.jpeg b/fern/products/waves/pages/v3.0.1/voice-cloning/images/smallest_cover.jpeg new file mode 100644 index 0000000..2f584f4 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/smallest_cover.jpeg differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/started-campaign.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/started-campaign.png new file mode 100644 index 0000000..0c1857a Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/started-campaign.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/test-agent.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/test-agent.png new file mode 100644 index 0000000..f90fbec Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/test-agent.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/thunder.png b/fern/products/waves/pages/v3.0.1/voice-cloning/images/thunder.png new file mode 100644 index 0000000..2a64ee9 Binary files /dev/null and b/fern/products/waves/pages/v3.0.1/voice-cloning/images/thunder.png differ diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/images/thunder.svg b/fern/products/waves/pages/v3.0.1/voice-cloning/images/thunder.svg new file mode 100644 index 0000000..35f6a0e --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/images/thunder.svg @@ -0,0 +1,26 @@ + + + + + + + + + + image/svg+xml + + + + + Openclipart + + + + + + + + + + + diff --git a/fern/products/waves/pages/v3.0.1/voice-cloning/types-of-clone.mdx b/fern/products/waves/pages/v3.0.1/voice-cloning/types-of-clone.mdx new file mode 100644 index 0000000..515f3fa --- /dev/null +++ b/fern/products/waves/pages/v3.0.1/voice-cloning/types-of-clone.mdx @@ -0,0 +1,28 @@ +--- +title: "Types of Cloning: Instant vs Professional" +description: "Train our model on your voice and generate a high-quality professional voice clone." +icon: "compact-disc" +--- + +## Instant vs Professional Cloning + +Voice cloning technology offers two primary methods: **Instant Cloning** and **Professional Voice Cloning (PVC)**. Each method varies in terms of time, accuracy, and overall quality. + +### **Instant Cloning** +- Requires **5 to 15 seconds** of audio. +- Provides a **quick and accessible** voice clone. +- Captures **basic voice characteristics** but lacks deep refinement. +- Best suited for casual applications where **speed matters more than accuracy**. + +### **Professional Voice Cloning (PVC)** +- Requires **at least 45 minutes** of high-quality recorded audio. +- Uses extensive training and **fine-tuning** for a near-perfect voice match. +- Captures **intonation, accent, emotions, and vocal nuances** accurately. +- Ideal for professional applications like **content creation, dubbing, and virtual assistants**. + +While **Instant Cloning** offers a fast and convenient solution, **Professional Voice Cloning** ensures a **high-fidelity, natural, and expressive** result tailored for professional use. + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v4.0.0/api-references/add-voice-api.mdx b/fern/products/waves/pages/v4.0.0/api-references/add-voice-api.mdx new file mode 100644 index 0000000..a31d8f8 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/add-voice-api.mdx @@ -0,0 +1,37 @@ +--- +title: "Add your Voice" +description: "Add your voice using the Smallest AI API." +openapi: "POST /waves/v1/lightning-large/add_voice" +hideApiMarker: False +--- + +> **Note:** The Mintlify web UI currently does not correctly upload files in the API request. Below, we have provided code examples in Python and curl to help you test the API. + +## Sample cURL Example +```bash +curl -X POST https://api.smallest.ai/waves/v1/lightning-large/add_voice \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -F "displayName=my voice" \ + -F "file=@my_voice.wav;type=audio/wav" +``` +## Sample Code Example + +Here is a Python example using the `requests` library: + +```python python +import requests +url = "https://api.smallest.ai/waves/v1/lightning-large/add_voice" +payload = {'displayName': 'my voice'} +files=[ + ('file', ('my_voice.wav', open('my_voice.wav','rb'), 'audio/wav')) +] +headers = { + 'Authorization': 'Bearer YOUR_API_KEY' +} +response = requests.request("POST", url, headers=headers, data=payload, files=files) +print(response.text) +``` + + +Replace `YOUR_API_KEY` with your actual API key and `example.wav` with the path to your audio file. + diff --git a/fern/products/waves/pages/v4.0.0/api-references/authentication.mdx b/fern/products/waves/pages/v4.0.0/api-references/authentication.mdx new file mode 100644 index 0000000..f69f4a7 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/authentication.mdx @@ -0,0 +1,30 @@ +--- +title: Authentication +description: Learn how to authenticate requests using API keys. +icon: key +--- + +## Obtaining Your API Key + +To access our API, you need an API key. You can generate your API key by visiting the [Smallest AI Console](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=api-references). + +## Using API Keys + +Once you have an API key, you must include it in the `Authorization` header of each request using the **Bearer Token** scheme. + +### Example Request + +```http +GET /v1/some-endpoint HTTP/1.1 +Host: api.smallest.ai +Authorization: Bearer YOUR_API_KEY_HERE +``` + +## Security Best Practices + +- **Keep your API key private**: Do not expose your key in client-side applications. +- **Rotate keys periodically**: Regularly regenerate your API keys to enhance security. +- **Use environment variables**: Store API keys securely instead of hardcoding them in your source code. +- **Monitor API usage**: Keep track of your API calls to detect any unauthorized access. + +For more details, visit our [API Documentation](/waves/documentation/getting-started/authentication#model-overview). diff --git a/fern/products/waves/pages/v4.0.0/api-references/concurrency-and-limits.mdx b/fern/products/waves/pages/v4.0.0/api-references/concurrency-and-limits.mdx new file mode 100644 index 0000000..90df32b --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/concurrency-and-limits.mdx @@ -0,0 +1,66 @@ +--- +title: "Concurrency and Limits" +description: "Understanding API concurrency limits and rate limiting" +icon: "bolt" +--- + +## Overview + +Smallest AI API implements concurrency limits to ensure fair usage and optimal performance across all users. Understanding these limits is crucial for building robust applications that integrate with our services. + +## What is Concurrency? + +**Concurrency** refers to the number of simultaneous requests that can be processed at any given moment. In the context of Smallest AI API: + +- **1 TTS request concurrency**: Only 1 Text-to-Speech request can be actively processed at a time per account +- This applies to all TTS endpoints including Lightning v2, Lightning v3.1, and streaming variants + +## How Concurrency Works + +### HTTP API Requests + +- Each HTTP API call (POST request) counts as **1 concurrency unit** while being processed +- Once the request completes and returns a response, the concurrency slot is freed +- If you attempt to make a second HTTP request while one is already being processed, you'll receive a `429 Too Many Requests` error + +### WebSocket Connections + +- You can establish up to **5 WebSocket connections** simultaneously (5 × concurrency limit) +- However, only **1 concurrent request** can be processed across all WebSocket connections +- Additional requests sent through any WebSocket while one is being processed will be rejected with an error + +## Monitoring Your Usage + +### Dashboard Monitoring + +Check your usage patterns in the Waves dashboard to: + +- Monitor request patterns +- Identify peak usage times +- Plan capacity requirements + +Link to dashboard: https://app.smallest.ai/waves/developers/usage?utm_source=documentation&utm_medium=api-references + +## Parallel Conversational Bots + +For conversational applications, you can potentially support approximately **4x your concurrency limit** in parallel conversations. This is based on the typical speaking patterns where users don't speak continuously. + +### How It Works + +- **Concurrency limit**: 1 active TTS request +- **Potential parallel conversations**: ~4 conversations simultaneously +- **Reasoning**: In natural conversation, users speak intermittently with pauses between responses + + This is a **rough estimate** and may fail when multiple conversations + simultaneously request TTS generation. Your application must handle 429 + errors gracefully when the actual concurrency limit is reached. + + +## Upgrading Limits + +If your application requires higher concurrency limits, please contact our support team to discuss enterprise plans with increased limits. + + + Concurrency limits are account basis. If you are using multiple models, all + models share the same concurrency limit. + diff --git a/fern/products/waves/pages/v4.0.0/api-references/delete-cloned-voice.mdx b/fern/products/waves/pages/v4.0.0/api-references/delete-cloned-voice.mdx new file mode 100644 index 0000000..d45948b --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/delete-cloned-voice.mdx @@ -0,0 +1,6 @@ +--- +title: 'Delete Cloned Voice' +description: 'Delete a cloned voice using the Smallest AI API.' +openapi: 'DELETE /waves/v1/lightning-large' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v4.0.0/api-references/get-cloned-voices-api.mdx b/fern/products/waves/pages/v4.0.0/api-references/get-cloned-voices-api.mdx new file mode 100644 index 0000000..397899e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/get-cloned-voices-api.mdx @@ -0,0 +1,6 @@ +--- +title: "Get your cloned Voices" +description: "Retrieve your cloned voices." +openapi: "GET /waves/v1/lightning-large/get_cloned_voices" +hideApiMarker: False +--- \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/api-references/get-voices-api.mdx b/fern/products/waves/pages/v4.0.0/api-references/get-voices-api.mdx new file mode 100644 index 0000000..d19f72e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/get-voices-api.mdx @@ -0,0 +1,6 @@ +--- +title: "Get Voices" +description: "Get voices supported for a given model using the Smallest AI API." +openapi: "GET /waves/v1/{model}/get_voices" +hideApiMarker: False +--- \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-asr-ws.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-asr-ws.mdx new file mode 100644 index 0000000..586c440 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-asr-ws.mdx @@ -0,0 +1,269 @@ +--- +title: "Lightning (Realtime)" +description: The Lightning STT WebSocket API provides real-time speech-to-text transcription capabilities with streaming audio input. This API uses WebSocket to deliver transcription results as audio is processed, enabling low-latency transcription without waiting for the entire audio file to upload. Perfect for live transcription, voice assistants, and real-time communication systems that require immediate speech recognition. Supports multiple languages, word-level timestamps, sentence-level timestamps (utterances), PII and PCI redaction, cumulative transcripts, and more advanced features. +asyncapi: "/asyncapi-spec/lightning-asr-ws.json /waves/v1/lightning/get_text" +--- + +## Query Parameters + +The WebSocket connection accepts the following query parameters: + +### Audio Configuration + +| Parameter | Type | Default | Description | +| ------------- | ------ | ---------- | ------------------------------------------------------------------------------------- | +| `encoding` | string | `linear16` | Audio encoding format. Options: `linear16`, `linear32`, `alaw`, `mulaw`, `opus`, `ogg_opus` | +| `sample_rate` | string | `16000` | Audio sample rate in Hz. Options: `8000`, `16000`, `22050`, `24000`, `44100`, `48000` | + +### Language & Detection + +| Parameter | Type | Default | Description | +| ---------- | ------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `language` | string | `en` | Language code for transcription. Use `multi` for automatic language detection. Supported: `it`, `es`, `en`, `pt`, `hi`, `de`, `fr`, `uk`, `ru`, `kn`, `ml`, `pl`, `mr`, `gu`, `cs`, `sk`, `te`, `or`, `nl`, `bn`, `lv`, `et`, `ro`, `pa`, `fi`, `sv`, `bg`, `ta`, `hu`, `da`, `lt`, `mt`, `multi` | + +### Feature Flags + +| Parameter | Type | Default | Description | +| --------------------- | ------ | ------- | ------------------------------------------------------------------------ | +| `word_timestamps` | string | `true` | Include word-level timestamps in transcription. Options: `true`, `false` | +| `full_transcript` | string | `false` | Include cumulative transcript received till now in responses where `is_final` is `true`. Options: `true`, `false` | +| `sentence_timestamps` | string | `false` | Include sentence-level timestamps (utterances) in transcription. Options: `true`, `false` | +| `redact_pii` | string | `false` | Redact personally identifiable information (name, surname, address). Options: `true`, `false` | +| `redact_pci` | string | `false` | Redact payment card information (credit card, CVV, zip, account number). Options: `true`, `false` | +| `numerals` | string | `auto` | "Convert spoken numerals into digit form (e.g., 'twenty five' to '25') and `auto` enables automatic detection based on context. Options: `true`, `false`, `auto` | +| `diarize` | string | `false` | Enable speaker diarization to identify and label different speakers in the audio. When enabled, each word in the transcription includes `speaker` (integer ID) and `speaker_confidence` (float 0-1) fields. Options: `true`, `false` | + +### Webhook Configuration + +## Connection Flow + +### Example Connection URL + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/lightning/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("word_timestamps", "true"); +url.searchParams.append("full_transcript", "true"); +url.searchParams.append("sentence_timestamps", "true"); +url.searchParams.append("redact_pii", "true"); +url.searchParams.append("redact_pci", "true"); +url.searchParams.append("numerals", "true"); +url.searchParams.append("diarize", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Input Messages + +### Audio Data (Binary) + +Send raw audio bytes as binary WebSocket messages: + +```javascript +const audioChunk = new Uint8Array(4096); +ws.send(audioChunk); +``` + +### End Signal (JSON) + +Signal the end of audio stream. This is used to flush the transcription and receive the final response with `is_last=true`: + +```json +{ + "type": "finalize" +} +``` + +## Response Format + +The server responds with JSON messages containing transcription results: + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "Hello, how are you?", + "is_final": true, + "is_last": false, + "language": "en" +} +``` + +### Response Fields + +| Field | Type | Description | +| ----------------- | ------- | -------------------------------------------------------------------- | +| `session_id` | string | Unique identifier for the transcription session | +| `transcript` | string | Partial or complete transcription text for the current segment | +| `is_final` | boolean | Indicates if this is the final transcription for the current segment | +| `is_last` | boolean | Indicates if this is the last transcription in the session | +| `language` | string | Detected primary language code, returns only when `is_final=True` | +| `languages` | array | List of languages detected in the audio included in Responses where `is_final` is `true` | + +### Optional Response Fields (Based on Query Parameters) + +| Field | Type | When Included | Description | +| ----------------- | ------ | ------------------------ | ---------------------------------------------------------------------- | +| `full_transcript` | string | `full_transcript=true` AND `is_final=true` | Complete transcription text accumulated till now. Only present in responses when `full_transcript=true` query parameter is set AND `is_final=true` | +| `words` | array | `word_timestamps=true` | Word-level timestamps with `word`, `start`, `end`, and `confidence` fields. When `diarize=true`, also includes `speaker` and `speaker_confidence` fields | +| `utterances` | array | `sentence_timestamps=true` | Sentence-level timestamps with `text`, `start`, and `end` fields | +| `redacted_entities`| array | `redact_pii=true` or `redact_pci=true` | List of redacted entity placeholders (e.g., `[FIRSTNAME_1]`, `[CREDITCARDCVV_1]`) | + +### Example Response with All Features + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "[CREDITCARDCVV_1] and expiry [TIME_2].", + "is_final": true, + "is_last": true, + "full_transcript": "Hi, my name is [FIRSTNAME_1] [FIRSTNAME_2] You can reach me at [PHONENUMBER_1] and I paid using my Visa card [ZIPCODE_1] [ACCOUNTNUMBER_1] with [CREDITCARDCVV_1] and expiry [TIME_1].", + "language": "en", + "languages": ["en"], + "words": [ + { + "word": "[creditcardcvv_1]", + "start": 15.44, + "end": 17.36, + "confidence": 0.97, + "speaker": 0, + "speaker_confidence": 0.67 + }, + { + "word": "and", + "start": 18.0, + "end": 18.32, + "confidence": 0.94, + "speaker": 0, + "speaker_confidence": 0.76 + }, + { + "word": "expiry", + "start": 18.32, + "end": 19.2, + "confidence": 1.0, + "speaker": 0, + "speaker_confidence": 0.91 + }, + { + "word": "[time_2]", + "start": 19.2, + "end": 19.92, + "confidence": 0.91, + "speaker": 0, + "speaker_confidence": 0.82 + }, + ], + "utterances": [ + { + "text": "Hi, my name is Hans Miller.", + "start": 0.0, + "end": 2.64, + "speaker": 0 + }, + { + "text": "You can reach me at [PHONENUMBER_1], and I paid using my Visa card 4242 42424242 with CVV123 and expiry [TIME_1].", + "start": 2.64, + "end": 21.04, + "speaker": 0 + } + ], + "redacted_entities": [ + "[CREDITCARDCVV_1]", + "[TIME_2]" + ] +} +``` + +## Code Examples + + +```python python +import asyncio +import json +import argparse +import numpy as np +import websockets +import librosa +from urllib.parse import urlencode + +BASE_WS_URL = "wss://api.smallest.ai/waves/v1/lightning/get_text" + +async def stream_audio(audio_file, api_key, language="en", encoding="linear16", sample_rate=16000, word_timestamps="true", full_transcript="false", sentence_timestamps="false", redact_pii="false", redact_pci="false", numerals="auto", diarize="false"): + params = { + "language": language, + "encoding": encoding, + "sample_rate": sample_rate, + "word_timestamps": word_timestamps, + "full_transcript": full_transcript, + "sentence_timestamps": sentence_timestamps, + "redact_pii": redact_pii, + "redact_pci": redact_pci, + "numerals": numerals, + "diarize": diarize + } + ws_url = f"{BASE_WS_URL}?{urlencode(params)}" + + async with websockets.connect(ws_url, additional_headers={"Authorization": f"Bearer {api_key}"}) as ws: + print(f"Connected: {ws_url}") + + async def send(): + audio, _ = librosa.load(audio_file, sr=sample_rate, mono=True) + chunk_size = int(0.160 * sample_rate) + + for i in range(0, len(audio), chunk_size): + chunk = audio[i:i + chunk_size] + await ws.send((chunk * 32768.0).astype(np.int16).tobytes()) + await asyncio.sleep(len(chunk) / sample_rate) + + await ws.send(json.dumps({"type": "finalize"})) + + sender = asyncio.create_task(send()) + + async for message in ws: + data = json.loads(message) + print("Received:", json.dumps(data, indent=2)) + if data.get("is_last"): + break + + await sender + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("audio_file", nargs="?", default="path/to/audio.wav") + parser.add_argument("--api-key", "-k", default="your_api_key_here") + parser.add_argument("--language", "-l", default="en") + parser.add_argument("--encoding", "-e", default="linear16") + parser.add_argument("--sample-rate", "-sr", type=int, default=16000) + parser.add_argument("--word-timestamps", "-wt", default="true") + parser.add_argument("--full-transcript", "-ft", default="false") + parser.add_argument("--sentence-timestamps", "-st", default="false") + parser.add_argument("--redact-pii", default="false") + parser.add_argument("--redact-pci", default="false") + parser.add_argument("--numerals", default="auto") + parser.add_argument("--diarize", default="false") + + args = parser.parse_args() + asyncio.run(stream_audio( + args.audio_file, + args.api_key, + args.language, + args.encoding, + args.sample_rate, + args.word_timestamps, + args.full_transcript, + args.sentence_timestamps, + args.redact_pii, + args.redact_pci, + args.numerals, + args.diarize + )) +``` + + + diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-asr.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-asr.mdx new file mode 100644 index 0000000..be3d5a1 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-asr.mdx @@ -0,0 +1,184 @@ +--- +title: "Lightning (Pre-Recorded)" +description: "Convert speech to text using file upload with the Lightning STT POST API" +openapi: "POST /waves/v1/lightning/get_text" +--- + +The STT POST API allows you to convert speech to text using two different input methods: + +1. **Raw Audio Bytes** (`application/octet-stream`) - Send raw audio data with all parameters as query parameters +2. **Audio URL** (`application/json`) - Provide only a URL to an audio file in the JSON body, with all other parameters as query parameters + +Both methods use our Lightning STT model with automatic language detection across 30+ languages. + +## Authentication + +This endpoint requires authentication using a Bearer token in the Authorization header: + +```bash +Authorization: Bearer YOUR_API_KEY +``` + +## Input Methods + +Choose the input method that best fits your use case: + +| Method | Content Type | Use Case | Parameters | +| ------------- | -------------------------- | ------------------------------------------ | ---------------- | +| **Raw Bytes** | `application/octet-stream` | Streaming audio data, real-time processing | Query parameters | +| **Audio URL** | `application/json` | Remote audio files, webhook processing | Query parameters | + +## Code Examples + +### Method 1: Raw Audio Bytes (application/octet-stream) + + + +```bash cURL - Raw Bytes +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/lightning/get_text?language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ + --header 'Authorization: Bearer ' \ + --header 'Content-Type: audio/wav' \ + --data-binary '@/path/to/your/audio.wav' +``` + +```python Python - Raw Bytes +import requests + +url = "https://api.smallest.ai/waves/v1/lightning/get_text" +headers = { + "Authorization": "Bearer YOUR_API_KEY", + "Content-Type": "audio/wav" +} +params = { + "language": "en", + "word_timestamps": "true", + "diarize": "true", + "age_detection": "true", + "gender_detection": "true", + "emotion_detection": "true" +} + +with open("path/to/your/audio.wav", "rb") as audio_file: + audio_data = audio_file.read() + +response = requests.post(url, headers=headers, params=params, data=audio_data) +result = response.json() +print(f"Transcription: {result['transcription']}") +``` + +```javascript JavaScript - Raw Bytes +const audioFile = await fetch("/path/to/audio.wav"); +const audioBuffer = await audioFile.arrayBuffer(); + +const params = new URLSearchParams({ + language: "en", + word_timestamps: "true", + diarize: "true", + age_detection: "true", + gender_detection: "true", + emotion_detection: "true", +}); + +const response = await fetch( + `https://api.smallest.ai/waves/v1/lightning/get_text?${params}`, + { + method: "POST", + headers: { + Authorization: "Bearer YOUR_API_KEY", + "Content-Type": "audio/wav", + }, + body: audioBuffer, + } +); + +const result = await response.json(); +console.log("Transcription:", result.transcription); +``` + + + +### Method 2: Audio URL (application/json) + + + +```bash cURL - Audio URL +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/lightning/get_text?language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ + --header 'Authorization: Bearer ' \ + --header 'Content-Type: application/json' \ + --data '{ + "url": "https://example.com/audio.mp3" + }' +``` + +```python Python - Audio URL +import requests +import json + +url = "https://api.smallest.ai/waves/v1/lightning/get_text" +headers = { + "Authorization": "Bearer YOUR_API_KEY", + "Content-Type": "application/json" +} +params = { + "language": "en", + "word_timestamps": "true", + "diarize": "true", + "age_detection": "true", + "gender_detection": "true", + "emotion_detection": "true" +} +payload = { + "url": "https://example.com/audio.mp3" +} + +response = requests.post(url, headers=headers, params=params, data=json.dumps(payload)) +result = response.json() +print(f"Transcription: {result['transcription']}") +``` + +```javascript JavaScript - Audio URL +const params = new URLSearchParams({ + language: "en", + word_timestamps: "true", + diarize: "true", + age_detection: "true", + gender_detection: "true", + emotion_detection: "true", +}); + +const payload = { + url: "https://example.com/audio.mp3", +}; + +const response = await fetch( + `https://api.smallest.ai/waves/v1/lightning/get_text?${params}`, + { + method: "POST", + headers: { + Authorization: "Bearer YOUR_API_KEY", + "Content-Type": "application/json", + }, + body: JSON.stringify(payload), + } +); + +const result = await response.json(); +console.log("Transcription:", result.transcription); +``` + + + +## Supported Languages + +The Lightning STT model supports **automatic language detection** and transcription across **30+ languages**. + +For the full list of supported languages, please check [**STT Supported Languages**](/waves/documentation/getting-started/models#model-overview-stt). + + + Specify the **language** of the input audio using its [ISO + 639-1](https://en.wikipedia.org/wiki/ISO_639-1) code. Use **`multi`** to + enable automatic language detection from the supported list. The default is + **`en`** (English). + diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-large-stream.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-large-stream.mdx new file mode 100644 index 0000000..e314d6e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-large-stream.mdx @@ -0,0 +1,25 @@ +--- +title: 'Text to Speech' +sidebarTitle: 'Text to Speech (SSE)' +description: 'Stream speech for given text using the Lightning-Large SSE API' +openapi: 'POST /waves/v1/lightning-large/stream' +--- + +## Overview + +The Lightning-Large SSE API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. +For an end-to-end example of how to use the Lightning-Large SSE API, check out [Text to Speech (SSE) Example](https://github.com/smallest-inc/waves-examples/blob/main/lightning_large/http_streaming/http_streaming_api.py) + +## When to Use + +- **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses +- **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays +- **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency +- **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + +## How It Works + +1. **Make a POST Request**: Send your text and voice settings to the API endpoint +2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size +3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially +4. **End of Stream**: The API sends a completion event when all audio has been delivered diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-large-ws.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-large-ws.mdx new file mode 100644 index 0000000..9660c2b --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-large-ws.mdx @@ -0,0 +1,203 @@ +--- +title: 'Text to Speech' +sidebarTitle: 'Text to Speech (WSS)' +description: 'Stream speech for given text using the Lightning-Large WebSocket API' +hideApiMarker: True +--- + +## Lightning-Large WebSocket API + +The Lightning-Large WebSocket API allows you to stream high-quality text-to-speech audio in real-time. This is particularly useful for applications requiring low-latency audio generation with superior voice quality. + +### Connection + +Connect to the WebSocket endpoint: + +```javascript +const socket = new WebSocket( + 'wss://api.smallest.ai/waves/v1/lightning-large/stream' +); +``` + +### Authentication + +Authentication is required. Include your API key in the connection headers: + +```javascript +const socket = new WebSocket( + 'wss://api.smallest.ai/waves/v1/lightning-large/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } +); +``` + +### Request Format + +Send a JSON message with the following structure: + +```json +{ + "text": "Your text to be converted to speech", + "voice_id": "voice_id_here", + "speed": 1, + "sample_rate": 24000 +} +``` + +#### Parameters + +| Parameter | Type | Required | Description | +| ------------- | ------- | -------- | ------------------------------------------------------------------------- | +| `text` | string | Yes | The text to convert to speech (max 1000 characters) | +| `voice_id` | string | Yes | ID of the voice to use | +| `speed` | number | No | Speech speed multiplier (default: 1) | +| `sample_rate` | number | No | Audio sample rate in Hz (default: 24000) | +| `consistency` | number | No | Consistency of the speech (default: 0.5) | +| `similarity` | number | No | Similarity to the reference audio (default: 0) | +| `enhancement` | boolean | No | Enhances speech quality at the cost of increased latency (default: false) | + +### Response Format + +The WebSocket will stream responses in the following formats: + +#### Chunk Response + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "chunk", + "data": { + "audio": "base64_encoded_audio_data" + } +} +``` + +The audio data is base64-encoded and can be decoded and played in the browser. + +#### Complete Response + +When all chunks have been sent: + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "complete", + "message": "All chunks sent", + "done": true +} +``` + +#### Error Response + +If an error occurs: + +```json +{ + "status": "error", + "message": "Error message", + "errors": [ + error information + ] +} +``` + +### Javascript Example + +```javascript +const WebSocket = require('ws'); + +const ws = new WebSocket( + 'wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } +); + +ws.onopen = () => { + const request = { + text: 'Hello world! This is a test of the Lightning TTS WebSocket API.', + voice_id: 'blofeld', + speed: 1, + sample_rate: 24000 + }; + + ws.send(JSON.stringify(request)); +}; + +// Handle incoming audio chunks +ws.onmessage = (event) => { + const response = JSON.parse(event.data); + + if (response.status === 'chunk') { + // Decode and play audio + const audioData = Buffer.from(response.data.audio, 'base64'); + // Process audio data... + } else if (response.status === 'complete' && response.done) { + console.log('All audio chunks received'); + ws.close(); + } else if (response.status === 'error') { + console.error('Error:', response.message); + ws.close(); + } +}; + +ws.onerror = (error) => { + console.error('WebSocket error:', error); +}; + +ws.onclose = () => { + console.log('WebSocket connection closed'); +}; +``` + +### Python Example + +```python +import asyncio +import websockets +import json +import base64 + +API_KEY = "YOUR_API_KEY" +WS_URL = "wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream" + +async def text_to_speech(): + async with websockets.connect(WS_URL, extra_headers={"Authorization": f"Bearer {API_KEY}"}) as ws: + request = { + "text": "Hello world! This is a test of the Lightning-Large TTS WebSocket API.", + "voice_id": "erica", + "speed": 1, + "sample_rate": 24000 + } + + await ws.send(json.dumps(request)) + + while True: + response = await ws.recv() + response_data = json.loads(response) + + if response_data["status"] == "chunk": + audio_data = base64.b64decode(response_data["data"]["audio"]) + print("Received audio chunk") + elif response_data["status"] == "complete" and response_data.get("done", False): + print("All audio chunks received") + break + elif response_data["status"] == "error": + print("Error:", response_data["message"]) + break + +asyncio.run(text_to_speech()) +``` + +### Notes + +- The Lightning-Large model provides higher quality audio than the standard Lightning model +- The API automatically chunks long text and streams each chunk separately +- Credits are deducted based on the length of the input text +- The WebSocket connection will remain open until all chunks are sent or an error occurs +- For optimal performance, keep individual requests under 1000 characters diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-large.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-large.mdx new file mode 100644 index 0000000..5e6310e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-large.mdx @@ -0,0 +1,6 @@ +--- +title: 'Text to Speech' +description: 'Get speech for given text using the Smallest AI API' +openapi: 'POST /waves/v1/lightning-large/get_speech' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-tts-ws.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-tts-ws.mdx new file mode 100644 index 0000000..bfb2538 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-tts-ws.mdx @@ -0,0 +1,129 @@ +--- +title: 'Server-Sent Events API' +sidebarTitle: 'Text to Speech (SSE)' +description: 'Stream speech for given text using the Lightning SSE API' +hideApiMarker: false +--- + +## Lightning SSE API + +The Lightning SSE API allows you to stream text-to-speech audio in real-time. This is particularly useful for applications requiring low-latency audio generation. + +### Connection + +Connect to the SSE endpoint: + +```javascript +const eventSource = new EventSource( + 'https://api.smallest.ai/waves/v1/lightning/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } +); +``` + +### Request Format + +Send a POST request with the following JSON structure: + +```json +{ + "text": "Your text to be converted to speech", + "voice_id": "voice_id_here", + "speed": 1, + "sample_rate": 24000 +} +``` + +#### Parameters + +| Parameter | Type | Required | Description | +| ------------- | ------ | -------- | --------------------------------------------------- | +| `text` | string | Yes | The text to convert to speech (max 1000 characters) | +| `voice_id` | string | Yes | ID of the voice to use | +| `speed` | number | No | Speech speed multiplier (default: 1) | +| `sample_rate` | number | No | Audio sample rate in Hz (default: 24000) | + +### Example Usage + +```javascript +const eventSource = new EventSource( + 'https://api.smallest.ai/waves/v1/lightning/stream', + { + headers: { + Authorization: 'Bearer YOUR_API_KEY' + } + } +); + +// Handle incoming audio chunks +eventSource.onmessage = (event) => { + const response = JSON.parse(event.data); + + if (response.status === 'chunk') { + // Decode and play audio + const audioData = atob(response.data.audio); + // Process audio data... + } else if (response.status === 'complete' && response.done) { + console.log('All audio chunks received'); + eventSource.close(); + } else if (response.status === 'error') { + console.error('Error:', response.message); + eventSource.close(); + } +}; + +eventSource.onerror = (error) => { + console.error('SSE error:', error); + eventSource.close(); +}; +``` + +### Response Events + +The server will send events with the following formats: + +#### Chunk Event + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "chunk", + "data": { + "audio": "base64_encoded_audio_data" + } +} +``` + +#### Complete Event + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "complete", + "message": "All chunks sent", + "done": true +} +``` + +#### Error Event + +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "error", + "message": "Error message", + "errors": [ + /* detailed error information */ + ] +} +``` + +### Notes + +- The API automatically chunks long text and streams each chunk separately +- Credits are deducted based on the length of the input text +- The SSE connection will remain open until all chunks are sent or an error occurs +- For optimal performance, keep individual requests under 1000 characters diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-v2-stream.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-v2-stream.mdx new file mode 100644 index 0000000..0285393 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-v2-stream.mdx @@ -0,0 +1,25 @@ +--- +title: "Lightning v2 SSE" +sidebarTitle: "Lightning v2 SSE" +description: "Stream speech for given text using the Lightning v2 SSE API" +openapi: "POST /waves/v1/lightning-v2/stream" +--- + +## Overview + +The Lightning v2 SSE API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. +For an end-to-end example of how to use the Lightning v2 SSE API, check out [Text to Speech (SSE) Example](https://github.com/smallest-inc/waves-examples/blob/main/lightning_v2/http_streaming/http_streaming_api.py) + +## When to Use + +- **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses +- **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays +- **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency +- **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + +## How It Works + +1. **Make a POST Request**: Send your text and voice settings to the API endpoint +2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size +3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially +4. **End of Stream**: The API sends a completion event when all audio has been delivered diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-v2-ws.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-v2-ws.mdx new file mode 100644 index 0000000..7604095 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-v2-ws.mdx @@ -0,0 +1,52 @@ +--- +title: "Lightning v2 WebSocket" +description: The Lightning v2 WebSocket API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. For an end-to-end example of how to use the Lightning v2 WebSocket API, check out [Text to Speech (WS) Example](https://github.com/smallest-inc/waves-examples/tree/main/lightning_v2/ws_streaming) +asyncapi: "/asyncapi-spec/lightning-v2-ws.json /waves/v1/lightning-v2/get_speech/stream" +--- + +Deprecated Lightning v2 is a legacy model. For new projects, we recommend Lightning v3.1. + +## Overview + +The Lightning v2 WebSocket API delivers state-of-the-art text-to-speech synthesis through a persistent WebSocket connection, providing ultra-low latency audio streaming for applications that demand real-time voice generation. Unlike traditional HTTP-based TTS APIs, this WebSocket implementation streams audio chunks as they're processed, significantly reducing perceived latency and enabling seamless user experiences. + +## Key Benefits + +- **Ultra-Low Latency**: Audio chunks are delivered as soon as they're generated, enabling near-instantaneous playback +- **Real-Time Streaming**: Continuous audio delivery without waiting for complete text processing +- **High-Quality Synthesis**: Advanced neural voice models with natural-sounding speech output +- **Persistent Connection**: Maintains connection for multiple requests, reducing connection overhead +- **Interactive Applications**: Perfect for chatbots, voice assistants, and live communication systems + +## Use Cases + +- **Voice Assistants**: Real-time response generation for conversational AI +- **Interactive Chatbots**: Immediate audio feedback for user interactions +- **Live Streaming**: Real-time narration and commentary +- **Accessibility Tools**: Screen readers and text-to-speech applications +- **Gaming**: Dynamic voice generation for characters and narration +- **Customer Service**: Automated voice responses with natural speech patterns + +## Concurrency and Rate Limits + +This WebSocket API is subject to concurrency limits to ensure optimal performance for all users. Here's how it works: + +- **1 Concurrency Unit** = 1 active TTS request that can be processed at any given time +- **5 WebSocket Connections** can be established per concurrency unit +- **Total Connections** = Your concurrency limit × 5 + +**Examples:** + +- **1 concurrency** = Up to 5 WebSocket connections, but only 1 active request +- **3 concurrency** = Up to 15 WebSocket connections, but only 3 active requests simultaneously +- **5 concurrency** = Up to 25 WebSocket connections, but only 5 active requests simultaneously + +While you can maintain multiple WebSocket connections, only your concurrency limit number of requests can be actively processed at once. Additional requests sent through any connection while at the concurrency limit will be rejected with an error. + +For detailed information about concurrency limits, rate limiting, and best practices for handling these constraints, see our [Concurrency and Limits](concurrency-and-limits) documentation. + + + When multiple requests are sent simultaneously beyond your concurrency limit, + additional requests will be rejected with an error. Implement proper error + handling and request queuing to manage concurrency effectively. + diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-v2.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-v2.mdx new file mode 100644 index 0000000..eb36181 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-v2.mdx @@ -0,0 +1,8 @@ +--- +title: "Lightning v2" +description: "Get speech for given text using the Smallest AI API" +openapi: "POST /waves/v1/lightning-v2/get_speech" +hideApiMarker: False +--- + +Deprecated Lightning v2 is a legacy model. For new projects, we recommend Lightning v3.1. diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1-stream.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1-stream.mdx new file mode 100644 index 0000000..eac93ac --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1-stream.mdx @@ -0,0 +1,26 @@ +--- +title: "Lightning v3.1 SSE" +sidebarTitle: "Lightning v3.1 SSE" +description: "Stream speech for given text using the Lightning v3.1 SSE API" +openapi: "POST /waves/v1/lightning-v3.1/stream" +--- + +## Overview + +The Lightning v3.1 SSE API provides real-time text-to-speech streaming capabilities with natural, expressive voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. + +Lightning v3.1 is a 44 kHz model that produces natural, expressive, and realistic speech, with support for voice cloning. + +## When to Use + +- **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses +- **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays +- **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency +- **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + +## How It Works + +1. **Make a POST Request**: Send your text and voice settings to the API endpoint +2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size +3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially +4. **End of Stream**: The API sends a completion event when all audio has been delivered diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1-ws.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1-ws.mdx new file mode 100644 index 0000000..b8e7503 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1-ws.mdx @@ -0,0 +1,53 @@ +--- +title: "Lightning v3.1 WebSocket" +description: The Lightning v3.1 WebSocket API provides real-time text-to-speech streaming capabilities with natural, expressive voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. +asyncapi: "/asyncapi-spec/lightning-v3.1-ws.json /waves/v1/lightning-v3.1/get_speech/stream" +--- + +## Overview + +The Lightning v3.1 WebSocket API delivers state-of-the-art text-to-speech synthesis through a persistent WebSocket connection, providing ultra-low latency audio streaming for applications that demand real-time voice generation. Unlike traditional HTTP-based TTS APIs, this WebSocket implementation streams audio chunks as they're processed, significantly reducing perceived latency and enabling seamless user experiences. + +Lightning v3.1 is a 44 kHz model that produces natural, expressive, and realistic speech, with support for voice cloning. + +## Key Benefits + +- **Ultra-Low Latency**: Audio chunks delivered as soon as they're generated +- **Real-Time Streaming**: Continuous audio delivery without waiting for complete text processing +- **Natural Speech**: Expressive synthesis that sounds realistic +- **Voice Cloning Support**: Compatible with cloned voices +- **Persistent Connection**: Maintains connection for multiple requests, reducing connection overhead +- **Interactive Applications**: Perfect for chatbots, voice assistants, and live communication systems + +## Use Cases + +- **Voice Assistants**: Real-time response generation for conversational AI +- **Interactive Chatbots**: Immediate audio feedback for user interactions +- **Live Streaming**: Real-time narration and commentary +- **Accessibility Tools**: Screen readers and text-to-speech applications +- **Gaming**: Dynamic voice generation for characters and narration +- **Customer Service**: Automated voice responses with natural speech patterns + +## Concurrency and Rate Limits + +This WebSocket API is subject to concurrency limits to ensure optimal performance for all users. Here's how it works: + +- **1 Concurrency Unit** = 1 active TTS request that can be processed at any given time +- **5 WebSocket Connections** can be established per concurrency unit +- **Total Connections** = Your concurrency limit × 5 + +**Examples:** + +- **1 concurrency** = Up to 5 WebSocket connections, but only 1 active request +- **3 concurrency** = Up to 15 WebSocket connections, but only 3 active requests simultaneously +- **5 concurrency** = Up to 25 WebSocket connections, but only 5 active requests simultaneously + +While you can maintain multiple WebSocket connections, only your concurrency limit number of requests can be actively processed at once. Additional requests sent through any connection while at the concurrency limit will be rejected with an error. + +For detailed information about concurrency limits, rate limiting, and best practices for handling these constraints, see our [Concurrency and Limits](concurrency-and-limits) documentation. + + + When multiple requests are sent simultaneously beyond your concurrency limit, + additional requests will be rejected with an error. Implement proper error + handling and request queuing to manage concurrency effectively. + diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1.mdx new file mode 100644 index 0000000..69d72ef --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning-v3.1.mdx @@ -0,0 +1,19 @@ +--- +title: "Lightning v3.1" +description: "Get speech for given text using the Lightning v3.1 model" +openapi: "POST /waves/v1/lightning-v3.1/get_speech" +hideApiMarker: False +--- + +## Overview + +Lightning v3.1 is a 44 kHz text-to-speech model that delivers natural, expressive, and realistic speech synthesis. + +## Key Features + +- **Voice Cloning Support**: Compatible with cloned voices +- **Ultra-Low Latency**: Optimized for real-time applications +- **Multi-Language**: Supports English (en) and Hindi (hi) +- **Multiple Output Formats**: PCM, MP3, WAV, and mulaw +- **Flexible Sample Rates**: 8000 Hz to 44100 Hz +- **Speed Control**: Adjustable from 0.5x to 2x speed diff --git a/fern/products/waves/pages/v4.0.0/api-references/lightning.mdx b/fern/products/waves/pages/v4.0.0/api-references/lightning.mdx new file mode 100644 index 0000000..6d52ba8 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/lightning.mdx @@ -0,0 +1,7 @@ +--- +title: 'Text to speech' +sidebarTitle: 'Text to speech' +description: 'Get speech for given text using the Smallest AI API' +openapi: 'POST /waves/v1/lightning/get_speech' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/create.mdx b/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/create.mdx new file mode 100644 index 0000000..c7c4e9d --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/create.mdx @@ -0,0 +1,7 @@ +--- +title: 'Create Pronunciations Dict' +sidebarTitle: 'Create' +description: 'Create pronunciations dicts using the Smallest AI API' +openapi: 'POST /waves/v1/pronunciation-dicts' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/delete.mdx b/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/delete.mdx new file mode 100644 index 0000000..cf0f411 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/delete.mdx @@ -0,0 +1,7 @@ +--- +title: 'Delete Pronunciations Dict' +sidebarTitle: 'Delete' +description: 'Delete pronunciations dicts using the Smallest AI API' +openapi: 'DELETE /waves/v1/pronunciation-dicts' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/list.mdx b/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/list.mdx new file mode 100644 index 0000000..8e4b063 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/list.mdx @@ -0,0 +1,7 @@ +--- +title: 'List Pronunciations Dicts' +sidebarTitle: 'List' +description: 'Get pronunciations dicts using the Smallest AI API' +openapi: 'GET /waves/v1/pronunciation-dicts' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/update.mdx b/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/update.mdx new file mode 100644 index 0000000..bbbda6b --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/pronunciations-dict/update.mdx @@ -0,0 +1,7 @@ +--- +title: 'Update Pronunciations Dict' +sidebarTitle: 'Update' +description: 'Update pronunciations dicts using the Smallest AI API' +openapi: 'PUT /waves/v1/pronunciation-dicts' +hideApiMarker: False +--- diff --git a/fern/products/waves/pages/v4.0.0/api-references/pulse-stt-ws.mdx b/fern/products/waves/pages/v4.0.0/api-references/pulse-stt-ws.mdx new file mode 100644 index 0000000..da741b6 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/pulse-stt-ws.mdx @@ -0,0 +1,269 @@ +--- +title: "Pulse (Realtime)" +description: The Pulse STT WebSocket API provides real-time speech-to-text transcription capabilities with streaming audio input. This API uses WebSocket to deliver transcription results as audio is processed, enabling low-latency transcription without waiting for the entire audio file to upload. Perfect for live transcription, voice assistants, and real-time communication systems that require immediate speech recognition. Supports multiple languages, word-level timestamps, sentence-level timestamps (utterances), PII and PCI redaction, cumulative transcripts, and more advanced features. +asyncapi: "/asyncapi-spec/pulse-stt-ws.json /waves/v1/pulse/get_text" +--- + +## Query Parameters + +The WebSocket connection accepts the following query parameters: + +### Audio Configuration + +| Parameter | Type | Default | Description | +| ------------- | ------ | ---------- | ------------------------------------------------------------------------------------- | +| `encoding` | string | `linear16` | Audio encoding format. Options: `linear16`, `linear32`, `alaw`, `mulaw`, `opus`, `ogg_opus` | +| `sample_rate` | string | `16000` | Audio sample rate in Hz. Options: `8000`, `16000`, `22050`, `24000`, `44100`, `48000` | + +### Language & Detection + +| Parameter | Type | Default | Description | +| ---------- | ------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `language` | string | `en` | Language code for transcription. Use `multi` for automatic language detection. Supported: `it`, `es`, `en`, `pt`, `hi`, `de`, `fr`, `uk`, `ru`, `kn`, `ml`, `pl`, `mr`, `gu`, `cs`, `sk`, `te`, `or`, `nl`, `bn`, `lv`, `et`, `ro`, `pa`, `fi`, `sv`, `bg`, `ta`, `hu`, `da`, `lt`, `mt`, `multi` | + +### Feature Flags + +| Parameter | Type | Default | Description | +| --------------------- | ------ | ------- | ------------------------------------------------------------------------ | +| `word_timestamps` | string | `true` | Include word-level timestamps in transcription. Options: `true`, `false` | +| `full_transcript` | string | `false` | Include cumulative transcript received till now in responses where `is_final` is `true`. Options: `true`, `false` | +| `sentence_timestamps` | string | `false` | Include sentence-level timestamps (utterances) in transcription. Options: `true`, `false` | +| `redact_pii` | string | `false` | Redact personally identifiable information (name, surname, address). Options: `true`, `false` | +| `redact_pci` | string | `false` | Redact payment card information (credit card, CVV, zip, account number). Options: `true`, `false` | +| `numerals` | string | `auto` | "Convert spoken numerals into digit form (e.g., 'twenty five' to '25') and `auto` enables automatic detection based on context. Options: `true`, `false`, `auto` | +| `diarize` | string | `false` | Enable speaker diarization to identify and label different speakers in the audio. When enabled, each word in the transcription includes `speaker` (integer ID) and `speaker_confidence` (float 0-1) fields. Options: `true`, `false` | +| `keywords` | string | — | Comma-separated list of words/phrases to boost, each optionally followed by `:INTENSIFIER` (e.g. `NVIDIA:5,Jensen`). Intensifier defaults to `1.0` if omitted. Max 100 keywords per session. See [Keyword Boosting](/waves/documentation/speech-to-text-pulse/features/keyword-boosting) | + +### Webhook Configuration + +## Connection Flow + +### Example Connection URL + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("word_timestamps", "true"); +url.searchParams.append("full_transcript", "true"); +url.searchParams.append("sentence_timestamps", "true"); +url.searchParams.append("redact_pii", "true"); +url.searchParams.append("redact_pci", "true"); +url.searchParams.append("numerals", "true"); +url.searchParams.append("diarize", "true"); +url.searchParams.append("keywords", "NVIDIA:5,Jensen:4"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Input Messages + +### Audio Data (Binary) + +Send raw audio bytes as binary WebSocket messages: + +```javascript +const audioChunk = new Uint8Array(4096); +ws.send(audioChunk); +``` + +### End Signal (JSON) + +Signal the end of audio stream. This is used to flush the transcription and receive the final response with `is_last=true`: + +```json +{ + "type": "finalize" +} +``` + +## Response Format + +The server responds with JSON messages containing transcription results: + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "Hello, how are you?", + "is_final": true, + "is_last": false, + "language": "en" +} +``` + +### Response Fields + +| Field | Type | Description | +| ----------------- | ------- | -------------------------------------------------------------------- | +| `session_id` | string | Unique identifier for the transcription session | +| `transcript` | string | Partial or complete transcription text for the current segment | +| `is_final` | boolean | Indicates if this is the final transcription for the current segment | +| `is_last` | boolean | Indicates if this is the last transcription in the session | +| `language` | string | Detected primary language code, returns only when `is_final=True` | +| `languages` | array | List of languages detected in the audio included in Responses where `is_final` is `true` | + +### Optional Response Fields (Based on Query Parameters) + +| Field | Type | When Included | Description | +| ----------------- | ------ | ------------------------ | ---------------------------------------------------------------------- | +| `full_transcript` | string | `full_transcript=true` AND `is_final=true` | Complete transcription text accumulated till now. Only present in responses when `full_transcript=true` query parameter is set AND `is_final=true` | +| `words` | array | `word_timestamps=true` | Word-level timestamps with `word`, `start`, `end`, and `confidence` fields. When `diarize=true`, also includes `speaker` and `speaker_confidence` fields | +| `utterances` | array | `sentence_timestamps=true` | Sentence-level timestamps with `text`, `start`, and `end` fields | +| `redacted_entities`| array | `redact_pii=true` or `redact_pci=true` | List of redacted entity placeholders (e.g., `[FIRSTNAME_1]`, `[CREDITCARDCVV_1]`) | + +### Example Response with All Features + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "[CREDITCARDCVV_1] and expiry [TIME_2].", + "is_final": true, + "is_last": true, + "full_transcript": "Hi, my name is [FIRSTNAME_1] [FIRSTNAME_2] You can reach me at [PHONENUMBER_1] and I paid using my Visa card [ZIPCODE_1] [ACCOUNTNUMBER_1] with [CREDITCARDCVV_1] and expiry [TIME_1].", + "language": "en", + "languages": ["en"], + "words": [ + { + "word": "[creditcardcvv_1]", + "start": 15.44, + "end": 17.36, + "confidence": 0.97, + "speaker": 0, + "speaker_confidence": 0.67 + }, + { + "word": "and", + "start": 18.0, + "end": 18.32, + "confidence": 0.94, + "speaker": 0, + "speaker_confidence": 0.76 + }, + { + "word": "expiry", + "start": 18.32, + "end": 19.2, + "confidence": 1.0, + "speaker": 0, + "speaker_confidence": 0.91 + }, + { + "word": "[time_2]", + "start": 19.2, + "end": 19.92, + "confidence": 0.91, + "speaker": 0, + "speaker_confidence": 0.82 + }, + ], + "utterances": [ + { + "text": "Hi, my name is Hans Miller.", + "start": 0.0, + "end": 2.64, + "speaker": 0 + }, + { + "text": "You can reach me at [PHONENUMBER_1], and I paid using my Visa card 4242 42424242 with CVV123 and expiry [TIME_1].", + "start": 2.64, + "end": 21.04, + "speaker": 0 + } + ], + "redacted_entities": [ + "[CREDITCARDCVV_1]", + "[TIME_2]" + ] +} +``` + +## Code Examples + + +```python python +import asyncio +import json +import argparse +import numpy as np +import websockets +import librosa +from urllib.parse import urlencode + +BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text" + +async def stream_audio(audio_file, api_key, language="en", encoding="linear16", sample_rate=16000, word_timestamps="true", full_transcript="false", sentence_timestamps="false", redact_pii="false", redact_pci="false", numerals="auto", diarize="false"): + params = { + "language": language, + "encoding": encoding, + "sample_rate": sample_rate, + "word_timestamps": word_timestamps, + "full_transcript": full_transcript, + "sentence_timestamps": sentence_timestamps, + "redact_pii": redact_pii, + "redact_pci": redact_pci, + "numerals": numerals, + "diarize": diarize + } + ws_url = f"{BASE_WS_URL}?{urlencode(params)}" + + async with websockets.connect(ws_url, additional_headers={"Authorization": f"Bearer {api_key}"}) as ws: + print(f"Connected: {ws_url}") + + async def send(): + audio, _ = librosa.load(audio_file, sr=sample_rate, mono=True) + chunk_size = int(0.160 * sample_rate) + + for i in range(0, len(audio), chunk_size): + chunk = audio[i:i + chunk_size] + await ws.send((chunk * 32768.0).astype(np.int16).tobytes()) + await asyncio.sleep(len(chunk) / sample_rate) + + await ws.send(json.dumps({"type": "finalize"})) + + sender = asyncio.create_task(send()) + + async for message in ws: + data = json.loads(message) + print("Received:", json.dumps(data, indent=2)) + if data.get("is_last"): + break + + await sender + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("audio_file", nargs="?", default="path/to/audio.wav") + parser.add_argument("--api-key", "-k", default="your_api_key_here") + parser.add_argument("--language", "-l", default="en") + parser.add_argument("--encoding", "-e", default="linear16") + parser.add_argument("--sample-rate", "-sr", type=int, default=16000) + parser.add_argument("--word-timestamps", "-wt", default="true") + parser.add_argument("--full-transcript", "-ft", default="false") + parser.add_argument("--sentence-timestamps", "-st", default="false") + parser.add_argument("--redact-pii", default="false") + parser.add_argument("--redact-pci", default="false") + parser.add_argument("--numerals", default="auto") + parser.add_argument("--diarize", default="false") + + args = parser.parse_args() + asyncio.run(stream_audio( + args.audio_file, + args.api_key, + args.language, + args.encoding, + args.sample_rate, + args.word_timestamps, + args.full_transcript, + args.sentence_timestamps, + args.redact_pii, + args.redact_pci, + args.numerals, + args.diarize + )) +``` + diff --git a/fern/products/waves/pages/v4.0.0/api-references/pulse-stt.mdx b/fern/products/waves/pages/v4.0.0/api-references/pulse-stt.mdx new file mode 100644 index 0000000..c9775e6 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/pulse-stt.mdx @@ -0,0 +1,184 @@ +--- +title: "Pulse (Pre-Recorded)" +description: "Convert speech to text using file upload with the Pulse STT POST API" +openapi: "POST /waves/v1/pulse/get_text" +--- + +The STT POST API allows you to convert speech to text using two different input methods: + +1. **Raw Audio Bytes** (`application/octet-stream`) - Send raw audio data with all parameters as query parameters +2. **Audio URL** (`application/json`) - Provide only a URL to an audio file in the JSON body, with all other parameters as query parameters + +Both methods use our Pulse STT model with automatic language detection across 30+ languages. + +## Authentication + +This endpoint requires authentication using a Bearer token in the Authorization header: + +```bash +Authorization: Bearer YOUR_API_KEY +``` + +## Input Methods + +Choose the input method that best fits your use case: + +| Method | Content Type | Use Case | Parameters | +| ------------- | -------------------------- | ------------------------------------------ | ---------------- | +| **Raw Bytes** | `application/octet-stream` | Streaming audio data, real-time processing | Query parameters | +| **Audio URL** | `application/json` | Remote audio files, webhook processing | Query parameters | + +## Code Examples + +### Method 1: Raw Audio Bytes (application/octet-stream) + + + +```bash cURL - Raw Bytes +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ + --header 'Authorization: Bearer ' \ + --header 'Content-Type: audio/wav' \ + --data-binary '@/path/to/your/audio.wav' +``` + +```python Python - Raw Bytes +import requests + +url = "https://api.smallest.ai/waves/v1/pulse/get_text" +headers = { + "Authorization": "Bearer YOUR_API_KEY", + "Content-Type": "audio/wav" +} +params = { + "language": "en", + "word_timestamps": "true", + "diarize": "true", + "age_detection": "true", + "gender_detection": "true", + "emotion_detection": "true" +} + +with open("path/to/your/audio.wav", "rb") as audio_file: + audio_data = audio_file.read() + +response = requests.post(url, headers=headers, params=params, data=audio_data) +result = response.json() +print(f"Transcription: {result['transcription']}") +``` + +```javascript JavaScript - Raw Bytes +const audioFile = await fetch("/path/to/audio.wav"); +const audioBuffer = await audioFile.arrayBuffer(); + +const params = new URLSearchParams({ + language: "en", + word_timestamps: "true", + diarize: "true", + age_detection: "true", + gender_detection: "true", + emotion_detection: "true", +}); + +const response = await fetch( + `https://api.smallest.ai/waves/v1/pulse/get_text?${params}`, + { + method: "POST", + headers: { + Authorization: "Bearer YOUR_API_KEY", + "Content-Type": "audio/wav", + }, + body: audioBuffer, + } +); + +const result = await response.json(); +console.log("Transcription:", result.transcription); +``` + + + +### Method 2: Audio URL (application/json) + + + +```bash cURL - Audio URL +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ + --header 'Authorization: Bearer ' \ + --header 'Content-Type: application/json' \ + --data '{ + "url": "https://example.com/audio.mp3" + }' +``` + +```python Python - Audio URL +import requests +import json + +url = "https://api.smallest.ai/waves/v1/pulse/get_text" +headers = { + "Authorization": "Bearer YOUR_API_KEY", + "Content-Type": "application/json" +} +params = { + "language": "en", + "word_timestamps": "true", + "diarize": "true", + "age_detection": "true", + "gender_detection": "true", + "emotion_detection": "true" +} +payload = { + "url": "https://example.com/audio.mp3" +} + +response = requests.post(url, headers=headers, params=params, data=json.dumps(payload)) +result = response.json() +print(f"Transcription: {result['transcription']}") +``` + +```javascript JavaScript - Audio URL +const params = new URLSearchParams({ + language: "en", + word_timestamps: "true", + diarize: "true", + age_detection: "true", + gender_detection: "true", + emotion_detection: "true", +}); + +const payload = { + url: "https://example.com/audio.mp3", +}; + +const response = await fetch( + `https://api.smallest.ai/waves/v1/pulse/get_text?${params}`, + { + method: "POST", + headers: { + Authorization: "Bearer YOUR_API_KEY", + "Content-Type": "application/json", + }, + body: JSON.stringify(payload), + } +); + +const result = await response.json(); +console.log("Transcription:", result.transcription); +``` + + + +## Supported Languages + +The Pulse STT model supports **automatic language detection** and transcription across **30+ languages**. + +For the full list of supported languages, please check [**STT Supported Languages**](/waves/documentation/getting-started/models#model-overview-stt). + + + Specify the **language** of the input audio using its [ISO + 639-1](https://en.wikipedia.org/wiki/ISO_639-1) code. Use **`multi`** to + enable automatic language detection from the supported list. The default is + **`en`** (English). + diff --git a/fern/products/waves/pages/v4.0.0/api-references/websocket.mdx b/fern/products/waves/pages/v4.0.0/api-references/websocket.mdx new file mode 100644 index 0000000..bfb415e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/api-references/websocket.mdx @@ -0,0 +1,106 @@ +--- +title: WebSocket Support for Text to Speech (TTS) API +sidebarTitle: WebSocket +description: Learn about WebSocket support for our Text-to-Speech (TTS) API, how it works, and when to use it. +icon: arrow-down-wide-short +--- + +Our Text to Speech (TTS) API supports WebSocket communication, providing a real-time, low-latency streaming experience for applications that require instant speech synthesis. WebSockets allow continuous data exchange, making them ideal for use cases that demand uninterrupted audio generation. + +--- + +## When to Use WebSockets + +### 1. **Real-Time Streaming** + +WebSockets are perfect for applications that need real-time speech synthesis, eliminating the delays associated with traditional HTTP requests. + +### 2. **Interactive Applications** + +For voice assistants, chatbots, and live transcription services, WebSockets ensure smooth, uninterrupted audio playback and response times. + +### 3. **Reduced Latency** + +A persistent WebSocket connection reduces the need for repeated request-response cycles, significantly improving performance for applications requiring rapid audio generation. + +--- + +## How It Works + +1. **Establish a Connection**: The client opens a WebSocket connection to our TTS API. +2. **Send Text Data**: The client sends the text payload to be synthesized. +3. **Process in Chunks**: The API breaks the text into chunks and processes them individually. +4. **Receive Audio Stream**: As each chunk is processed, it is sent back to the client as a base64-encoded audio buffer. +5. **Completion**: Once all chunks are processed, a complete message is sent to indicate the end of the stream. + +--- + +## Timeout Behavior + +By default, the WebSocket connection enforces a **20-second inactivity timeout**. This means that if the client does not send any data within 20 seconds, the server will automatically close the connection to free up resources. + +To support longer sessions for use cases where clients need more time (e.g., long pauses between messages), the timeout can be extended up to **60 seconds**. + +### To extend the timeout: + +You can include the `timeout` parameter in the WebSocket URL like so: + +```link +wss://api.smallest.ai/waves/v1/lightning-v3.1/get_speech/stream?timeout=60 +``` + + +This sets the inactivity timeout to 60 seconds. Valid values range from **20 (default)** to **60 seconds**. + +--- + +## Implementation Details + +The WebSocket TTS API is optimized to handle real-time text-to-speech conversions efficiently. Key aspects include: + +- **Input Validation**: Ensures the provided text and voice ID are valid before processing. +- **Chunk Processing**: Long texts are split into smaller chunks (e.g., 240 characters) to optimize processing. +- **Voice Caching**: The API fetches and caches voice configurations to reduce redundant database queries. +- **Task Queue System**: Tasks are pushed to a Redis-based queue for efficient processing and real-time audio generation. +- **Error Handling**: If any chunk fails, an error message is logged and sent to the client. + +--- + +## Example Request Flow + +1. The client sends a WebSocket message: + + ```json + { + "text": "Hello, world!", + "voice_id": "12345", + "speed": 1.0, + "sample_rate": 24000 + } + +2. The API validates the request and retrieves the voice settings. + +3. The text is split into chunks and processed in the background. + +4. The client receives responses like: +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "chunk", + "data": { + "audio": "" + } +} +``` + +5. Once all chunks are sent, a final message is returned: +```json +{ + "request_id": "047c9091-b770-41d8-b96b-907d1c8406c0", + "status": "complete", + "message": "All chunks sent", + "done": true +} +``` + +For implementation details, check our [WebSocket API documentation](/waves/documentation/api-references/lightning-v3.1-ws). diff --git a/fern/products/waves/pages/v4.0.0/audio/stt-sample-audio.wav b/fern/products/waves/pages/v4.0.0/audio/stt-sample-audio.wav new file mode 100644 index 0000000..5f9e2f4 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/audio/stt-sample-audio.wav differ diff --git a/fern/products/waves/pages/v4.0.0/audio/tts-sample-hello.wav b/fern/products/waves/pages/v4.0.0/audio/tts-sample-hello.wav new file mode 100644 index 0000000..ade03e7 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/audio/tts-sample-hello.wav differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/pvc-best-practices.mdx b/fern/products/waves/pages/v4.0.0/best-practices/pvc-best-practices.mdx new file mode 100644 index 0000000..8432644 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/best-practices/pvc-best-practices.mdx @@ -0,0 +1,38 @@ +--- +title: "Professional Voice Cloning - Best Practices" +description: "Best practices for recording high-quality reference audio." +icon: "user-tie" +--- + +To get the most accurate and natural voice clone, it's essential to provide **high-quality reference audio**. The best practices for recording remain the same as those for **Instant Voice Cloning**, which you can find here: + +🔗 **[Instant Voice Cloning - Best Practices](/waves/documentation/best-practices/vc-best-practices)** + +However, **Professional Voice Cloning (PVC) significantly improves upon Instant Voice Cloning** in the following ways: + +## 🎙️ How PVC Enhances Voice Cloning + +### 1. **Handles Background Noise More Effectively** + - PVC can filter out mild background noise without affecting voice quality. + - Unlike Instant Cloning, **PVC adapts better to real-world recording conditions**. + +### 2. **Captures a More Natural Speaking Style** + - Supports a **wider range of tones and vocal inflections**. + - Preserves the natural rhythm and personality of speech. + +### 3. **Understands Extreme Emotions & Variability** + - PVC models can **learn from expressive speech**, making them ideal for voices with dynamic emotions (anger, excitement, sadness). + - Instant Cloning may struggle with highly expressive tones. + +### 4. **Improves Inconsistent Speaking Patterns** + - Can learn from **pauses, breath sounds, and fluctuations in speaking speed**. + - Works well even if the reference recordings contain slight variations. + +### 5. **More Robust for Long-Form Content** + - Best suited for audiobook narration, dubbing, and professional voice applications. + - Produces high-quality results even in long recordings. + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/best-practices/tts-best-practices.mdx b/fern/products/waves/pages/v4.0.0/best-practices/tts-best-practices.mdx new file mode 100644 index 0000000..2b051cc --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/best-practices/tts-best-practices.mdx @@ -0,0 +1,349 @@ +--- +title: "Text to Speech (TTS) - Best Practices" +description: "Learn best practices for text formatting for optimal Audio Generation." +icon: "comment-dots" +--- + +This comprehensive guide outlines the recommended practices for formatting text to ensure optimal processing, accuracy, and consistent output across different use cases. Following these guidelines will help improve the quality of generated audio and reduce potential errors. + +## Language and Script Guidelines + +### Mixed Language Formatting + +When working with mixed language content, particularly English and Hindi, proper script selection is crucial for accurate processing: + +- English text must be written in Latin script +- Hindi text must be written in Devanagari script +- Avoid transliteration of Hindi words into Latin script + +**Examples:** +``` +✅ Correct: I want to eat खाना +❌ Incorrect: I want to eat khana + +✅ Correct: मैं school जाता हूं +❌ Incorrect: main school jata hun +``` + +### Proper Nouns Handling + +For Indian proper nouns, maintain cultural and linguistic accuracy by following these rules: + +1. **City Names:** + - Use Devanagari script for Indian city names + - Maintain Latin script for non-Indian city names + +2. **Personal Names:** + - Use Devanagari script for Indian personal names + - Maintain original script for non-Indian names + +**Examples:** +``` +✅ Correct: I live in मुंबई near अंधेरी station +❌ Incorrect: I live in Mumbai near Andheri station + +✅ Correct: Hello! अमित and रोहित are my friends from New York +❌ Incorrect: Hello! Amit and Rohit are my friends from New York + +✅ Correct: Hello! मैं दिल्ली में रहता हूं। My name is John and my friend's name is श्याम। +❌ Incorrect: Hello! Mai Delhi me rehta hun. My name is John and my friend's name is Shyam. +``` + +## Text Chunking + +### Character Limit Guidelines + +To optimize real-time processing and reduce latency, implement these chunking practices: + +1. **Size Constraints:** + - Maximum chunk size: 250 characters + - Break at natural punctuation points + - Maintain sentence coherence when possible + +2. **Breaking Points Priority:** + - First priority: Sentence-ending punctuation (., !, ?) + - Second priority: Other punctuation (;, :) + - Third priority: Natural word breaks + +### Chunking Implementation + +Use the following Python code for implementing text chunking: +- For `lightning-large` model, set `max_chunk_size=140`. +- For `lightning` model, set `max_chunk_size=250`. + + +```python python +def chunk_text(text, max_chunk_size=250): + """ + Chunks text with a maximum size of 250 characters, preferring to break at punctuation marks. + + - For `lightning-large` model, set `max_chunk_size=140`. + - For `lightning` model, set `max_chunk_size=250`. + + Args: + text (str): Input text to be chunked + max_chunk_size (int): Maximum size of each chunk (default: 250) + + Returns: + list: List of text chunks + """ + chunks = [] + while text: + if len(text) <= max_chunk_size: + chunks.append(text) + break + + # Look for punctuation within the last 50 characters of the max chunk size + chunk_end = max_chunk_size + punctuation_marks = '.,:;।!?' + + # Search backward from max_chunk_size for punctuation + found_punct = False + for i in range(chunk_end, max(chunk_end - 50, 0), -1): + if i < len(text) and text[i] in punctuation_marks: + chunk_end = i + 1 # Include the punctuation mark + found_punct = True + break + + # If no punctuation found, look for space + if not found_punct: + for i in range(chunk_end, max(chunk_end - 50, 0), -1): + if i < len(text) and text[i].isspace(): + chunk_end = i + break + # If no space found, force break at max_chunk_size + if not found_punct and chunk_end == max_chunk_size: + chunk_end = max_chunk_size + + # Add chunk and remove it from original text + chunks.append(text[:chunk_end].strip()) + text = text[chunk_end:].strip() + + return chunks +``` + + +## Handling numbers + + +### Order IDs and Large Numbers + +When handling order IDs or large numbers: +- Send them as separate requests +- Split the text around the number + +**Example:** +``` +Original: "Your order id is 123456789012345" +Split into: +1. "Your order id is" +2. "123456789012345" +``` + +### Phone Numbers + +#### Default Grouping +- Numbers are automatically grouped in 3-4-3 format +- Example: "9876543210" is read as "987-6543-210" + +#### Custom Formatting +For specific reading patterns: +- Format numbers explicitly in text +- Write out the exact pronunciation desired + +**Example:** +``` +✅ Correct: "double nine triple eight double seven double six" (for 9988877766) +❌ Incorrect: "9988877766" (if you want it read as "double nine...") +``` + +## Date and Time Formatting Guidelines + +### Date Formats +You may use any of the following formats when writing dates: + +1. DD/MM/YYYY → `12/02/2025` → "twelve, two, twenty twenty-five" +2. DD-MM-YYYY → `12-02-2025` → "twelve, two, twenty twenty-five" +3. DD Month YYYY → `12 February 2025` → "twelve February twenty twenty five" +4. Month DD YYYY → `February 12th 2025` → "February, twelfth, twenty twenty-five" +5. DD-MM-YY → `12-02-25` → "twelve, two, twenty-five" +6. DD/MM/YY → `12/02/25` → "twelve, two, twenty-five" + +> Note: Ordinal suffixes (st, nd, rd, th) could be used in dates. + +``` +✅ My birthday is on 31/12/2002. +✅ The event is scheduled for 05th March 2024. +✅ We will launch the project on June 15 2023. +✅ The deadline is 30-06-24. + +❌ 21st of June, 2003. (Will be read as twenty-first of June, two thousand and three) +❌ 12.02.2025. (Will be read as twelve two two thousand and twenty-five) +``` + +### Time Formats +You may use the following formats when specifying time: + +1. HH:MM:SS → `14:30:15` → "fourteen thirty fifteen" +2. HH:MM → `14:30` → "fourteen thirty" + +``` +✅ Let's meet at 12:32 PM on 12/02/2025. +✅ The meeting starts at 09:45 AM. +✅ The match will begin at 18:00. +✅ The alarm is set for 07:15:30. + +❌ 14.30 (Will be read as fourteen [long pause] thirty) +❌ 7'5 AM (Will be read as seven five) +``` + +## Mathematical Expressions + +Express mathematical operations in words for clarity. For complex mathematical expressions, break down into simpler components: + +``` +✅ Correct: two plus three equals five +✅ Correct: 2 plus 3 equals 5 +❌ Incorrect: 2+3=5 + +✅ Correct: ten minus three equals seven +✅ Correct: 10 minus 3 equals 7 +❌ Incorrect: 10-3=7 + +✅ Correct: five multiplied by three equals fifteen +✅ Correct: 5 multiplied by 3 equals 15 +❌ Incorrect: 5x3=15, 5*3=15 + +✅ Correct: ten divided by two equals five +✅ Correct: 10 divided by 2 equals 5 +❌ Incorrect: 10/2=5, 10÷5=2 + +✅ Correct: open parentheses five plus three close parentheses multiplied by two equals sixteen +✅ Correct: open parentheses 5 plus 3 close parentheses multiplied by 2 equals 16 +❌ Incorrect: (5+3)*2=16 + +✅ Correct: square root of sixteen equals four +✅ Correct: square root of 16 equals 4 +❌ Incorrect: √16=4 +``` + +## Approximate Values + +When expressing approximate values: +- Write out the full words +- Avoid using symbols for approximation +- Be explicit about the approximation + +**Examples:** +``` +✅ Correct: Your delivery will arrive in approximately twenty minutes +✅ Correct: Your delivery will arrive in approximately 20 minutes +❌ Incorrect: Your delivery will arrive in ~20 mins + +✅ Correct: around five hundred people attended +✅ Correct: around 500 people attended +❌ Incorrect: ~500 people attended +``` + +## Units and Measurements + + +When expressing measurements, write out the units in full words to ensure clear understanding: + +``` +✅ Correct: five kilometers, 5 kilometers +❌ Incorrect: 5km, 5 kms + +✅ Correct: twenty kilograms of rice, 20 kilograms of rice +❌ Incorrect: 20kg rice, 20kgs rice + +✅ Correct: thirty degrees Celsius, 30 degrees Celsius +❌ Incorrect: 30°C, 30 C + +✅ Correct: two liters of water, 2 liters of water +❌ Incorrect: 2L water, 2l water + +✅ Correct: five feet six inches tall, 5 feet 6 inches tall +❌ Incorrect: 5'6" tall, 5ft 6in tall +``` + +## Symbols and Special Characters + +### Basic Symbols +Spell out special characters and symbols in all contexts: +``` +. → "dot" +@ → "at" +_ → "underscore" +- → "dash" +/ → "forward slash" +# → "hashtag" +& → "and" +``` + +### Digital Content Formatting + +**1. URLs:** +``` +✅ Correct: visit docs dot example dot com forward slash guide +❌ Incorrect: visit docs.example.com/guide + +✅ Correct: my dash website dot com forward slash about +❌ Incorrect: my-website.com/about +``` + +**2. Email Addresses:** +``` +✅ Correct: support dot company at gmail dot com +❌ Incorrect: support.company@gmail.com + +✅ Correct: info underscore help at company dot com +❌ Incorrect: info_help@company.com +``` + +**3. Social Media:** +``` +✅ Correct: at company underscore name +❌ Incorrect: @company_name + +✅ Correct: hashtag trending now +❌ Incorrect: #TrendingNow + +✅ Correct: follow us at tech underscore company hashtag latest news +❌ Incorrect: follow us @tech_company #LatestNews +``` + +### Range and Interval Notation +Always write out ranges and relationships explicitly to avoid ambiguity: +``` +✅ Correct: five to eight days +❌ Incorrect: 5-8 days + +✅ Correct: between ten and fifteen minutes +❌ Incorrect: 10-15 minutes + +✅ Correct: temperatures from twenty to thirty degrees +❌ Incorrect: temperatures 20-30° +``` + +Note: +- Consistency is key - use the same format throughout your content +- When in doubt, write out the full words +- For complex URLs or handles, break them into smaller, manageable chunks +- Avoid using symbols that could have multiple interpretations + +--- + +## Next Steps + + + + Explore all TTS API parameters and response formats. + + + Generate your first speech audio in under 60 seconds. + + + Clone any voice from just 5-15 seconds of audio. + + diff --git a/fern/products/waves/pages/v4.0.0/best-practices/vc-best-practices.mdx b/fern/products/waves/pages/v4.0.0/best-practices/vc-best-practices.mdx new file mode 100644 index 0000000..1e66ff3 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/best-practices/vc-best-practices.mdx @@ -0,0 +1,118 @@ +--- +title: "Voice Cloning - Best Practices" +description: "Best Practices for Recording Reference Audio" +icon: "clone" +--- + +To achieve the best results when cloning your voice, it's essential to provide high-quality reference audio. Below are some best practices, dos and don'ts, and examples to guide you. + +Ready to Clone Your Voice? Try it out on our [platform](https://app.smallest.ai/waves/voice-cloning?utm_source=documentation&utm_medium=best-practices) + +--- + +## 🎙️ How to Record Reference Audio + +1. **Environment** + - Record in a quiet room with minimal background noise. + - Use a good quality microphone. While dedicated mics are ideal, MacBook and Mobile microphones work well for this purpose. + - Mobile and Laptop recordings can work well too, as long as the device is placed at an adequate distance—not too far or too close—to ensure clear, natural sound without distortion. + - Make sure the recording environment doesn’t introduce echo or distortion (e.g., avoid large empty rooms or outdoor spaces). + - After uploading the audio, listen to it to ensure it is clear and free of interruptions, background noise, or distortion. + +2. **Speaking Style** + - Speak naturally and avoid excessive emotion unless a specific tone is required. + - Maintain a consistent pace and tone throughout the recording. Be mindful of long pauses, as they can impact the quality of the cloned voice. + +3. **Length of Audio** + - Provide at least 5 seconds to 15 seconds of clean audio. + +--- + +## 🎧 Examples of Good and Bad Reference Audio + +> **_NOTE:_** Currently, there is no direct support for adding audio to Mintlify. As a workaround, we have embedded a video to include the necessary audio content. + +### Good Reference Audio +- High-quality, clear, and consistent tone. + + + + +### Bad Reference Audio +1. **With Background Noise** + + +2. **Inconsistent Speaking Style** + + +3. **Overlapping Voices** + + +--- + +# 🎭 Creating Expressive Voice Clones + +Our platform supports emotional reference audio, meaning the emotions, pitch or tone in the reference audio will influence the output. This is ideal for creating expressive clones that match your intended tone. + +## 😄 Emotional Control +- The emotions in the reference audio (e.g., angry, happy, sad) directly impact the tone of the generated voice. +- For example, if the reference audio conveys happiness, the output will replicate that cheerful tone. + +## ⚡ Speed Control +- The pace of your reference audio determines the speed of the output. +- A fast-paced reference will generate a similarly fast delivery, while a slower reference will produce a more measured response. + +## 🔊 Loudness Control +- The loudness or volume in your reference audio is reflected in the output. +- For instance, a soft-spoken input will result in a quieter clone, while a louder, more energetic recording will produce a bolder output. + +--- + +## 🎧 Emotional Reference Audio Examples + +### Angry Tone +- **Reference Audio Sample**: + + +- **Output Audio Example**: + + +### Silent Tone +- **Reference Audio Sample**: + + +- **Output Audio Example**: + + + +### Fast-Paced Tone +- **Reference Audio Sample**: + + +- **Output Audio Example**: + + + +--- + +By following these guidelines and leveraging emotional reference audio, you can achieve highly accurate and expressive voice clones tailored to your needs. diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/angry_gen_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/angry_gen_t.mp4 new file mode 100644 index 0000000..0b3628e Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/angry_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/angry_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/angry_ref_t.mp4 new file mode 100644 index 0000000..aca4571 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/angry_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/bg_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/bg_ref_t.mp4 new file mode 100644 index 0000000..7d1d723 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/bg_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/fast_gen_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/fast_gen_t.mp4 new file mode 100644 index 0000000..9c66deb Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/fast_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/fast_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/fast_ref_t.mp4 new file mode 100644 index 0000000..1be0d9c Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/fast_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/good_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/good_ref_t.mp4 new file mode 100644 index 0000000..8b16dec Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/good_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/inconsistent_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/inconsistent_ref_t.mp4 new file mode 100644 index 0000000..dec6109 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/inconsistent_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/overlap_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/overlap_ref_t.mp4 new file mode 100644 index 0000000..ebc3285 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/overlap_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/whisper_gen_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/whisper_gen_t.mp4 new file mode 100644 index 0000000..2c6188a Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/whisper_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/best-practices/video/whisper_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/best-practices/video/whisper_ref_t.mp4 new file mode 100644 index 0000000..6a2edeb Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/best-practices/video/whisper_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/changelog/announcements.mdx b/fern/products/waves/pages/v4.0.0/changelog/announcements.mdx new file mode 100644 index 0000000..2851fc5 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/changelog/announcements.mdx @@ -0,0 +1,74 @@ +--- +title: "Announcements" +description: "New updates and improvements from Smallest AI." +mode: "center" +--- + + + ## Introducing Lightning v3.1 + + We are excited to announce the release of Lightning v3.1, our most natural-sounding text-to-speech model yet. Key features include: + + - **Natural, Expressive Speech**: A 44 kHz model that sounds realistic. + - **Ultra-Low Latency**: Optimized for real-time applications. + - **Voice Cloning Support**: Full compatibility with cloned voices. + - **Multiple Output Formats**: PCM, MP3, WAV, and mulaw. + - **Language Support**: English, Hindi, Tamil, and Spanish. + + Experience the new capabilities of Lightning v3.1 on our [platform](https://app.smallest.ai/waves/studio/create?utm_source=documentation&utm_medium=changelog) or via the [API](/waves/documentation/api-references/lightning-v3.1). + + + + ## Introducing Lightning v2 + + We are thrilled to announce the release of our Lightning v2 model. This model supports 16 languages, providing high-quality speech synthesis across multiple languages. Key features include: + + - **Multilingual Support**: High-quality speech synthesis in 16 languages with voice cloning. + - **100ms TTFB**: Superfast and scalable to support your realtime applications. + - **0.05 per 10K characters**: 3x cheaper than other providers. + + Experience the new capabilities of Lightning v2 on our [Platform](https://app.smallest.ai/waves/studio/create). + + + + ## Introducing Lightning Multilingual - Now in Beta + + We are thrilled to announce the beta release of our Lightning Multilingual model. This model supports 30 languages, providing high-quality speech synthesis across multiple languages. Key features include: + + - **Multilingual Support**: High-quality speech synthesis in 30 languages. + - **Versatile Applications**: Ideal for global applications requiring diverse language support. + - **Beta Stage**: Currently in beta, with ongoing improvements and updates. + + Experience the new capabilities of Lightning Multilingual on our [Platform](https://app.smallest.ai/waves/studio/create). + + + + ## Introducing Lightning Large - Enhanced Expressiveness and Quality + + We are excited to announce the latest enhancements to our Lightning Large model. With improved emotional depth and expressiveness, Lightning Large now delivers even more natural and engaging speech synthesis. This update includes: + - **Enhanced Voice Cloning**: More accurate and lifelike voice clones. + - **Best-in-Market Latency**: Instant voice cloning with a latency of under 300ms. + + Experience the new capabilities of Lightning Large via our [Platform](https://app.smallest.ai/waves/studio/create) or the [API](/v3.0.1/content/api-references/lightning). + + + + ## Introducing Waves + + [Waves](https://app.smallest.ai/waves/studio/create) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. + However, waves will also be supporting various features such as voice cloning, accent cloning, style transfer, speech to speech etc in the near future, making it a ubiquitous platform + for anyone who needs AI generated speech. + + The voices available on waves platform are available via the [Smallest AI API](/v3.0.1/content/api-references/lightning) + + + + ## Introducing Lightning - World's Fastest Text to Speech + Lightning is the world's fastest text to speech model, generating around 10 seconds of hyper-realistic audio in just 100ms, all at once, no streaming. + + Read more about lightning in our release post [here](https://smallest.ai/blog/lightning-fast-text-to-speech). + + You can access lightning via the [Smallest AI API](/v3.0.1/content/api-references/lightning) + + +*A lot more coming up, very soon* \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/client-libraries/overview.mdx b/fern/products/waves/pages/v4.0.0/client-libraries/overview.mdx new file mode 100644 index 0000000..9920dae --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/client-libraries/overview.mdx @@ -0,0 +1,10 @@ +--- +title: "Client Libraries" +description: "Access Client Libraries by Smallest AI." +mode: "center" +--- + +Welcome to the Smallest AI Client Libraries documentation. Our client libraries provide easy-to-use interfaces for integrating Smallest AI's powerful text-to-speech and voice cloning capabilities into your applications. + +- [Python](https://github.com/smallest-inc/smallest-python-sdk) +- [Node](https://github.com/smallest-inc/smallest-node-sdk) \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/cookbooks/speech-to-text.mdx b/fern/products/waves/pages/v4.0.0/cookbooks/speech-to-text.mdx new file mode 100644 index 0000000..09e62dd --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/cookbooks/speech-to-text.mdx @@ -0,0 +1,25 @@ +--- +title: "Speech to Text Examples" +sidebarTitle: "Speech to Text" +description: "Production-ready code examples for Pulse STT - from real-time streaming to batch transcription." +icon: "code" +--- + +Explore complete, runnable examples from our cookbook repository. + + + + Stream audio from your microphone over WebSocket and get real-time transcriptions. + + + Automatically transcribe and take notes from online meetings with speaker identification. + + + Transcribe podcast episodes and generate concise summaries. + + + Generate SRT/VTT subtitle files from audio and video content. + + + +Browse all examples on our [GitHub repository](https://github.com/smallest-inc/cookbook/tree/main/speech-to-text). diff --git a/fern/products/waves/pages/v4.0.0/cookbooks/text-to-speech.mdx b/fern/products/waves/pages/v4.0.0/cookbooks/text-to-speech.mdx new file mode 100644 index 0000000..76df48c --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/cookbooks/text-to-speech.mdx @@ -0,0 +1,37 @@ +--- +title: "Text to Speech Examples" +sidebarTitle: "Text to Speech" +description: "Production-ready code examples for Lightning TTS - from basic synthesis to streaming, voice cloning, and full applications." +icon: "code" +--- + +Explore complete, runnable examples from our cookbook repository. + + + + Generate speech in 5 lines of code — the simplest way to start. + + + Real-time audio streaming with latency metrics and chunk-by-chunk playback. + + + List, filter, and preview 80+ voices by language, gender, and accent. + + + Custom pronunciations for brand names, acronyms, and technical terms. + + + Give it a topic, get a two-host AI podcast with LLM-generated script. + + + Convert any text file into a narrated, chaptered audiobook. + + + Web app to browse and preview all voices — deploy to Vercel. + + + Translate text between 40+ languages with TTS and STT. + + + +Browse all examples on our [GitHub repository](https://github.com/smallest-inc/cookbook/tree/main/text-to-speech). diff --git a/fern/products/waves/pages/v4.0.0/getting-started/authentication.mdx b/fern/products/waves/pages/v4.0.0/getting-started/authentication.mdx new file mode 100644 index 0000000..d6eeaf1 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/getting-started/authentication.mdx @@ -0,0 +1,104 @@ +--- +title: "Authentication" +description: "Create an API key and authenticate requests to the Smallest AI APIs." +icon: "lock" +--- + +Every API request requires an API key in the `Authorization` header. + +## Create Your API Key + + + + In the [Smallest AI Console](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=authentication), click **API Keys** in the Settings sidebar. + + API Keys settings page with Create API Key button highlighted + + + Click **Create API Key**, give it a descriptive name (e.g., `my-tts-app`), and click **Create API Key**. + + Create New API Key dialog with name field and create button + + Copy the key immediately — it won't be shown again. + + + ```bash + export SMALLEST_API_KEY="your-api-key-here" + ``` + + Add this to your `.bashrc` or `.zshrc` to persist across sessions. + + + +## Using Your API Key + +Include your key in the `Authorization` header with every request: + +``` +Authorization: Bearer YOUR_API_KEY +``` + + + +```bash cURL +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"text": "Authentication test", "voice_id": "magnus", "output_format": "wav"}' \ + --output test.wav +``` + +```python Python +import os +import requests + +response = requests.post( + "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech", + headers={ + "Authorization": f"Bearer {os.environ['SMALLEST_API_KEY']}", + "Content-Type": "application/json", + }, + json={"text": "Authentication test", "voice_id": "magnus", "output_format": "wav"}, +) +``` + +```javascript JavaScript +const response = await fetch( + "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech", + { + method: "POST", + headers: { + Authorization: `Bearer ${process.env.SMALLEST_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + text: "Authentication test", + voice_id: "magnus", + output_format: "wav", + }), + } +); +``` + + + +## Security + + +Your API key is a secret. Never expose it in client-side code, public repositories, or browser applications. + + +- Store keys in environment variables, not in source code +- Use `.env` files locally (add `.env` to `.gitignore`) +- Rotate keys periodically via the console +- Each key tracks usage against your account quota + +## Error Responses + +| Status | Meaning | +|--------|---------| +| `401 Unauthorized` | Missing or invalid API key | +| `403 Forbidden` | Key doesn't have access to this resource | +| `429 Too Many Requests` | Rate limit exceeded — wait and retry | + +For rate limits and concurrency details, see [Concurrency and Limits](/waves/api-reference/api-references/concurrency-and-limits). diff --git a/fern/products/waves/pages/v4.0.0/getting-started/http-stream.mdx b/fern/products/waves/pages/v4.0.0/getting-started/http-stream.mdx new file mode 100644 index 0000000..f8cfa57 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/getting-started/http-stream.mdx @@ -0,0 +1,69 @@ +--- +title: "HTTP vs HTTP Streaming vs Websockets" +description: "What should you use?" +icon: "handshake-angle" +--- + +--- + +### Choosing the Right Protocol for Your TTS Application: HTTP, HTTP Streaming, or WebSocket? + +If you’re integrating Lightning TTS into your application, one important decision is how to connect to the TTS engine. We support three protocols: HTTP, HTTP Streaming, and WebSocket, each tailored to different use cases. In this post, we’ll break down the strengths of each and help you choose the best fit for your needs. + +## HTTP: Best for Simplicity and Short Requests +**What it is**: +A classic REST-style interaction. You send a complete request (e.g., the full text to be converted to speech), and receive the synthesized audio as a downloadable response. + +**When to use it**: +- You have short or moderate-length texts. +- You want a simple integration, such as from a browser, mobile app, or backend job. +- You don’t need real-time feedback or streaming audio. + +**Pros and Cons**: + +| Pros | Cons | +|-----------------------------------------------|--------------------------------------------------------| +| Simple to integrate with standard HTTP tools | Full audio is returned only after complete synthesis | +| Easy to debug and monitor | Not suitable for real-time or long-form audio | +| Stateless; good for serverless environments | Reconnect needed for each request | +| Works well with caching and CDNs | Higher latency compared to streaming methods | + + +## HTTP Streaming: Best for Faster Playback Without Complexity + +**What it is**: +An enhancement of standard HTTP. The client sends a complete request, but the server streams back the audio as it's being generated, no need to wait for the full file. + +**When to use it**: +- You want faster playback with lower perceived latency. +- You send full input text but need audio to start as soon as possible. +- You want low-latency audio delivery without handling connection persistence. + +**Pros and Cons**: + +| Pros | Cons | +|------------------------------------------------|--------------------------------------------------------| +| Lower latency than regular HTTP | Only one-way communication (client → server) | +| Compatible with standard HTTP infrastructure | Full input must still be sent before synthesis starts | +| Audio starts playing as it's generated | No partial or live input updates | +| Easy to adopt with minimal changes | Slightly more complex than basic HTTP | + + +## WebSocket: Best for Real-Time, Interactive Applications + +**What it is**: +A full-duplex, persistent connection that allows two-way communication between the client and server. You can send text dynamically and receive streaming audio back continuously. + +**When to use it**: +- You need real-time, interactive TTS responses. +- Input is dynamic or arrives in chunks (e.g., live typing, conversation). +- You want persistent connections with minimal overhead per message. + +**Pros and Cons**: + +| Pros | Cons | +|----------------------------------------------------|---------------------------------------------------------| +| Ultra low latency | More complex to implement and manage | +| Supports real-time, chunked input and responses | Requires persistent connection management | +| Bi-directional communication | Not ideal for simple or infrequent tasks | +| Great for chatbots, live agents, or dictation apps | May require additional libraries or WebSocket support | diff --git a/fern/products/waves/pages/v4.0.0/getting-started/introduction.mdx b/fern/products/waves/pages/v4.0.0/getting-started/introduction.mdx new file mode 100644 index 0000000..148db02 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/getting-started/introduction.mdx @@ -0,0 +1,122 @@ +--- +title: "Introduction" +description: "Speech AI APIs by Smallest AI — generate speech with Lightning TTS and transcribe audio with Pulse STT." +icon: "book-open" +--- + +[Smallest AI](https://smallest.ai?utm_source=documentation&utm_medium=getting-started) builds speech AI models and APIs. Generate natural speech, transcribe audio in real-time, and clone voices — all through simple API calls. + +## Models + + + + Generate speech with 80+ voices, 44.1 kHz audio, and ~200ms latency. English, Hindi, Spanish, Tamil. + + + Transcribe audio in real-time or from files. 32+ languages, speaker diarization, emotion detection. + + + +## Get Your API Key + + + + Go to [app.smallest.ai](https://app.smallest.ai?utm_source=documentation&utm_medium=getting-started) and sign up with email or Google. + + Smallest AI sign up page with email and Google authentication + + + Click the **platform switcher** (top-left dropdown) → **Settings**, then select **API Keys** in the sidebar. + + Platform dropdown showing Settings menu option + + API Keys settings page with Create API Key button highlighted + + + Click **Create API Key**, give it a name, and copy the key. + + Create New API Key dialog with name field and create button + + Set it in your terminal: + + ```bash + export SMALLEST_API_KEY="your-api-key-here" + ``` + + + +## Try It Now + +### Generate speech (Lightning TTS) + +Paste this in your terminal — no install required: + +```bash +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"text": "Hello from Smallest AI!", "voice_id": "magnus", "sample_rate": 24000, "output_format": "wav"}' \ + --output hello.wav +``` + +Play `hello.wav` — you should hear natural, expressive speech. + + + +### Transcribe audio (Pulse STT) + +```python +# Use any WAV file you have, or download the sample: +# curl -o audio.wav https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech \ +# -H "Authorization: Bearer $SMALLEST_API_KEY" \ +# -H "Content-Type: application/json" \ +# -d '{"text":"Hello world","voice_id":"magnus","output_format":"wav"}' + +with open("audio.wav", "rb") as f: + audio_data = f.read() +``` + +You'll get back: + +```json +{ + "transcription": "This is a sample audio file for testing speech to text transcription with the Pulse API." +} +``` + +## Next Steps + + + + Full guide with Python, JavaScript, and SDK examples. + + + Transcribe files and stream audio in real-time. + + + Benchmarks, specs, and capabilities. + + + Production-ready example projects. + + + See what developers have built with Smallest AI. + + + Open-source cookbook with 20+ examples. + + + +## Community & Support + + + + Ask questions, share projects, and connect with other developers. + + + Reach our team directly for technical assistance. + + diff --git a/fern/products/waves/pages/v4.0.0/getting-started/models.mdx b/fern/products/waves/pages/v4.0.0/getting-started/models.mdx new file mode 100644 index 0000000..447085d --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/getting-started/models.mdx @@ -0,0 +1,61 @@ +--- +title: "Models" +description: "Find detailed description of each model along with their capabilities and supported languages." +icon: "cube" +--- + +## Text to Speech (TTS) Models + + + + Deprecated An upgrade from the Lightning Large model, offering improved performance and + quality. It supports 16 languages, making it suitable for a wider range of + applications requiring expressive and high-quality speech synthesis. + + + Latest A 44 kHz model delivering natural, expressive, and realistic speech. Supports voice cloning with ultra-low latency. Supports English, Hindi, Tamil, and Spanish. + + + +## Speech to Text (STT) Models + + + + Low-latency speech recognition for real-time and pre-recorded transcription. + Automatic language detection across 32+ languages. Supports keyword boosting for domain-specific vocabulary. + + + +Click on a model name to view its detailed model card. + +## Geo-location Based Routing + +Waves intelligently routes every request to the nearest server cluster to ensure the lowest possible latency for your applications. We currently operate server clusters in: + +- 🇮🇳 India (Mumbai) +- 🇺🇸 USA (Oregon) + +Our routing system automatically detects the client's geographical location and connects them to the optimal server based on network proximity and latency. This process is fully automated, no manual configuration is required on your side. + +## Model Overview (TTS) + +| Model ID | Description | Languages Supported | +| ------------------- | -------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **lightning-v2** Deprecated | 100ms TTFB, Supports 16 languages with voice cloning. | `English`
`Hindi`
`Tamil`
`Kannada`
`Malayalam`
`Telugu`
`Gujarati`
`Bengali`
`Marathi`
`German`
`French`
`Spanish`
`Italian`
`Polish`
`Dutch`
`Russian`
`Arabic`
`Hebrew`
`Swedish` | +| [**lightning-v3.1**](/waves/model-cards/text-to-speech/lightning-v-3-1) Latest | 44 kHz model, natural expressive speech, ultra-low latency, supports voice cloning. | `English`
`Hindi`
`Tamil`
`Spanish` | + +## Model Overview (STT) + +| Model ID | Description | Languages Supported | +| --------- | ----------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [**pulse**](/waves/model-cards/speech-to-text/pulse) | Low-latency speech-to-text model supporting automatic language detection and real-time transcription. | `Italian`
`Spanish`
`English`
`Portuguese`
`Hindi`
`German`
`French`
`Ukrainian`
`Russian`
`Kannada`
`Malayalam`
`Polish`
`Marathi`
`Gujarati`
`Czech`
`Slovak`
`Telugu`
`Oriya (Odia)`
`Dutch`
`Bengali`
`Latvian`
`Estonian`
`Romanian`
`Punjabi`
`Finnish`
`Swedish`
`Bulgarian`
`Tamil`
`Hungarian`
`Danish`
`Lithuanian`
`Maltese` | + + + Note: The API uses [ISO 639-1 language codes - Set + 1](https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes) (2-letter + codes) to specify supported languages. + + +## Pricing + +Our pricing model is designed to be flexible and scalable, catering to different usage needs. For detailed pricing information, please visit our [pricing page](https://smallest.ai/text-to-speech) or contact our sales team at [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v4.0.0/images/agent-dashboard-conversions.png b/fern/products/waves/pages/v4.0.0/images/agent-dashboard-conversions.png new file mode 100644 index 0000000..edf540d Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/agent-dashboard-conversions.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/agent-dashboard.png b/fern/products/waves/pages/v4.0.0/images/agent-dashboard.png new file mode 100644 index 0000000..888291a Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/agent-dashboard.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/api-keys-page-create-button.png b/fern/products/waves/pages/v4.0.0/images/api-keys-page-create-button.png new file mode 100644 index 0000000..d7ffbcb Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/api-keys-page-create-button.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/checks-passed.png b/fern/products/waves/pages/v4.0.0/images/checks-passed.png new file mode 100644 index 0000000..3303c77 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/checks-passed.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/conversions-list.png b/fern/products/waves/pages/v4.0.0/images/conversions-list.png new file mode 100644 index 0000000..aa46a94 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/conversions-list.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/create-api-key-modal.png b/fern/products/waves/pages/v4.0.0/images/create-api-key-modal.png new file mode 100644 index 0000000..b5f77c8 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/create-api-key-modal.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/create-audience.png b/fern/products/waves/pages/v4.0.0/images/create-audience.png new file mode 100644 index 0000000..e1e062d Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/create-audience.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/create-campaign.png b/fern/products/waves/pages/v4.0.0/images/create-campaign.png new file mode 100644 index 0000000..a7efb9d Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/create-campaign.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/create-conversion.png b/fern/products/waves/pages/v4.0.0/images/create-conversion.png new file mode 100644 index 0000000..2e95316 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/create-conversion.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/download.svg b/fern/products/waves/pages/v4.0.0/images/download.svg new file mode 100644 index 0000000..f41f94e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/images/download.svg @@ -0,0 +1,3 @@ + + + diff --git a/fern/products/waves/pages/v4.0.0/images/hero-dark.svg b/fern/products/waves/pages/v4.0.0/images/hero-dark.svg new file mode 100644 index 0000000..c6a30e8 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/images/hero-dark.svg @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/v4.0.0/images/hero-light.svg b/fern/products/waves/pages/v4.0.0/images/hero-light.svg new file mode 100644 index 0000000..297d68f --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/images/hero-light.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/v4.0.0/images/ivc-image-1.png b/fern/products/waves/pages/v4.0.0/images/ivc-image-1.png new file mode 100644 index 0000000..5c18503 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/ivc-image-1.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/ivc-image-2.png b/fern/products/waves/pages/v4.0.0/images/ivc-image-2.png new file mode 100644 index 0000000..b4d3f32 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/ivc-image-2.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/ivc-image-3.png b/fern/products/waves/pages/v4.0.0/images/ivc-image-3.png new file mode 100644 index 0000000..0cf238e Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/ivc-image-3.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/ivc-image-4.png b/fern/products/waves/pages/v4.0.0/images/ivc-image-4.png new file mode 100644 index 0000000..5ecf5d6 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/ivc-image-4.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/lightning_cover.png b/fern/products/waves/pages/v4.0.0/images/lightning_cover.png new file mode 100644 index 0000000..acaebb8 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/lightning_cover.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/platform-switcher-settings.png b/fern/products/waves/pages/v4.0.0/images/platform-switcher-settings.png new file mode 100644 index 0000000..3ffcdc4 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/platform-switcher-settings.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/pvc_page.png b/fern/products/waves/pages/v4.0.0/images/pvc_page.png new file mode 100644 index 0000000..8bf1c5e Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/pvc_page.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/save-campaign.png b/fern/products/waves/pages/v4.0.0/images/save-campaign.png new file mode 100644 index 0000000..972b0df Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/save-campaign.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/sign-up-page.png b/fern/products/waves/pages/v4.0.0/images/sign-up-page.png new file mode 100644 index 0000000..897301e Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/sign-up-page.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/smallest_cover.jpeg b/fern/products/waves/pages/v4.0.0/images/smallest_cover.jpeg new file mode 100644 index 0000000..2f584f4 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/smallest_cover.jpeg differ diff --git a/fern/products/waves/pages/v4.0.0/images/started-campaign.png b/fern/products/waves/pages/v4.0.0/images/started-campaign.png new file mode 100644 index 0000000..0c1857a Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/started-campaign.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/test-agent.png b/fern/products/waves/pages/v4.0.0/images/test-agent.png new file mode 100644 index 0000000..f90fbec Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/test-agent.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/thunder.png b/fern/products/waves/pages/v4.0.0/images/thunder.png new file mode 100644 index 0000000..2a64ee9 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/images/thunder.png differ diff --git a/fern/products/waves/pages/v4.0.0/images/thunder.svg b/fern/products/waves/pages/v4.0.0/images/thunder.svg new file mode 100644 index 0000000..35f6a0e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/images/thunder.svg @@ -0,0 +1,26 @@ + + + + + + + + + + image/svg+xml + + + + + Openclipart + + + + + + + + + + + diff --git a/fern/products/waves/pages/v4.0.0/integrations/livekit.mdx b/fern/products/waves/pages/v4.0.0/integrations/livekit.mdx new file mode 100644 index 0000000..f51537d --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/integrations/livekit.mdx @@ -0,0 +1,113 @@ +--- +title: "LiveKit" +description: "Build LiveKit voice agents using Smallest AI TTS plugin." +icon: "voicemail" +--- + +This example provides scripts and tools to perform standalone audio generation and build Livekit voice assistants using the Smallest AI TTS plugin. Follow the steps below to set up and run the experiments. + +## Code Examples Repository + +You can find the code examples for this setup in the following GitHub repository: + +[Livekit Example Code Repository](https://github.com/smallest-inc/waves-examples/tree/main/lightning/livekit_example) + +## Common Steps + +### 1. Create a Virtual Environment + +To ensure your Python environment is isolated, create a virtual environment: + +```bash +python3 -m venv venv +``` + +Activate the virtual environment: + +- On Linux/Mac: + ```bash + source venv/bin/activate + ``` + +- On Windows: + ```bash + venv\Scripts\activate + ``` + +### 2. Install Requirements + +Once the virtual environment is activated, install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +### 3. Sign in and Create a New Project on Livekit + +Sign in here: [Livekit Cloud](https://cloud.livekit.io) + +After signing in, create a new project and copy the following tokens: + +```bash +LIVEKIT_API_KEY +LIVEKIT_API_SECRET +LIVEKIT_URL +``` + +### 4. Create a `.env` File + +Create a `.env` file in the project root directory. This file should contain the following keys with appropriate values: + +```bash +LIVEKIT_API_KEY=... +LIVEKIT_API_SECRET=... +LIVEKIT_URL=... +OPENAI_API_KEY=... +DEEPGRAM_API_KEY=... +SMALLEST_API_KEY=... +``` + +### 5. Install the Plugin + +To set up the Livekit plugin for [smallest.ai](https://smallest.ai), run the following commands: + +```bash +chmod +x install_plugin.sh +./install_plugin.sh +``` + +--- + +## Usage + +### 1. Running `generate_audio.py` + +To generate audio using the Smallest AI plugin as a WAV file, run the following command: + +```bash +python3 generate_audio.py +``` + +You can change the parameters in the script and try out different voices, languages, and texts. + +### 2. Running `minimal_assistant.py` + +To build a minimal Livekit voice assistant using the Smallest model, run the following command: + +```bash +python3 minimal_assistant.py dev +``` + +### 3. Connect to the Agent Here + +You can connect to the agent by visiting the following link: + +[Livekit Agent Playground](https://agents-playground.livekit.io) + +--- + +## Notes + +- Ensure that you have added the correct API keys and other credentials in the `.env` file before running the scripts. +- For any issues or questions, feel free to open an issue in the repository or contact us on [Discord](https://discord.gg/Ub25S48hSf). + diff --git a/fern/products/waves/pages/v4.0.0/integrations/openclaw.mdx b/fern/products/waves/pages/v4.0.0/integrations/openclaw.mdx new file mode 100644 index 0000000..591d75e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/integrations/openclaw.mdx @@ -0,0 +1,96 @@ +--- +title: "OpenClaw" +description: "Add ultra-fast TTS and STT to your OpenClaw agent with the Smallest AI skill." +icon: "message-bot" +--- + +Add voice capabilities to your [OpenClaw](https://openclaw.ai) agent. Generate speech with sub-100ms latency and transcribe audio with the Smallest AI skill. + +## Installation + +```bash +# Via ClawHub (recommended) +clawhub install smallest-ai + +# Or manually +git clone https://github.com/smallest-inc/smallest-ai-openclaw.git +cp -r smallest-ai-openclaw ~/.openclaw/skills/smallest-ai +``` + +## Setup + +Set your API key: + +```bash +export SMALLEST_API_KEY="your_key_here" +``` + +Get a free key at [waves.smallest.ai](https://waves.smallest.ai). + +Restart the gateway: + +```bash +openclaw gateway stop && openclaw gateway start +``` + +## Usage + +The skill triggers automatically when you ask your agent to generate speech or transcribe audio. Just talk naturally: + +**Text-to-Speech:** +- "Say good morning in a male voice" +- "Read this aloud: The meeting is at 3pm" +- "Generate a voice note saying hello in Hindi" + +**Speech-to-Text:** +- "Transcribe this audio file" +- "What did they say in this recording?" + +**Multilingual:** +- "Say 'namaste, kaise hain aap' in advika's voice" +- "Say 'hola buenos dias' using camilla" + +## Voices + +The skill auto-selects voices based on your request: + +| Voice | Gender | Accent | Best For | +|-------|--------|--------|----------| +| `sophia` | Female | American | General use (default) | +| `robert` | Male | American | Professional (default male) | +| `advika` | Female | Indian | Hindi, code-switching | +| `vivaan` | Male | Indian | Bilingual English/Hindi | +| `camilla` | Female | Mexican/Latin | Spanish | +| `zara` | Female | American | Conversational | +| `melody` | Female | American | Storytelling | +| `arjun` | Male | Indian | English/Hindi bilingual | +| `stella` | Female | American | Expressive, warm | + +80+ more voices available. The agent picks the right voice based on language and gender preference. + +## Features + +- Sub-100ms text-to-speech via Lightning v3.1 +- 64ms speech-to-text via Pulse +- Supports WAV, MP3, OGG, FLAC, M4A, and WebM audio formats (STT) +- 30+ languages with automatic language detection +- Speaker diarization and emotion detection (STT) +- Hindi-English code-switching +- Voice cloning — clone any voice with just 5 seconds of audio (Basic plan+) + +## Links + + + + Install from ClawHub + + + Source code + + + OpenClaw main site + + + Full API documentation + + diff --git a/fern/products/waves/pages/v4.0.0/integrations/plivo.mdx b/fern/products/waves/pages/v4.0.0/integrations/plivo.mdx new file mode 100644 index 0000000..ce2d638 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/integrations/plivo.mdx @@ -0,0 +1,108 @@ +--- +title: "Telephony: Plivo" +description: "Learn how to integrate Smallest AI TTS in Plivo for telephony solutions." +icon: "phone-volume" +--- + +This guide demonstrates how to use the **Smallest AI API** with telephony call functionality. It sets up a local FastAPI server to stream audio data and uses **ngrok** to expose the server to the public for testing. + +You can access the code for this example on GitHub [here ](https://github.com/smallest-inc/waves-examples/tree/main/lightning/telephone_example/plivo_example). + +## Prerequisites + +Before you begin, ensure you have the following: + +- **Python 3.8+** installed +- **ngrok** for tunneling local servers to the internet. You can download ngrok from [here](https://ngrok.com/download). +- **Plivo** account to handle calls and text-to-speech. + +## Setup Steps + +### 1. Configure Environment Variables + +Start by creating a `.env` file in your project directory with the necessary API keys. This configuration is essential for interacting with **Smallest.ai** and **Plivo**. + +```bash +SMALLEST_API_KEY=... +PLIVO_AUTH_ID=... +PLIVO_AUTH_TOKEN=... +``` + +### 2. Set Up Ngrok + +**Ngrok** is used to expose your local FastAPI server to the public internet. Follow these steps: + +1. Install **ngrok** (if not already installed). +2. Expose your FastAPI server for Plivo by running: + +```bash +ngrok http 5000 +``` + +3. Once ngrok is running, it will generate a public URL (e.g., `https://abcd-1234-5678.ngrok.io`). + +### 3. Install Dependencies + +To run the example code, you’ll need to install the required dependencies. Run the following command: + +```bash +pip install -r requirements.txt +``` + +### 4. Run the FastAPI Application + +Now, you can start the FastAPI server, which will handle the streaming of audio. + +**Note**: Make sure to update the ngrok URL and paths in the script before running. + +Run the Plivo server: +```bash +python plivo_example/plivo_app.py +``` + +### 5. Run the Phonetic Call Client + +Now that the FastAPI server is up and running, it's time to make a phonetic call. This client connects to the server, and you can test the audio stream with the desired telephony platform. + + +Similarly, update the phone numbers and ngrok URL in the script, then run: + +```bash +python plivo_example/plivo_make_call.py +``` + +### 6. Testing the Call + +- Once the client script is executed, a call will be placed to the provided number with phonetic audio. +- Plivo will handle the audio and make the call based on the provided parameters. + +### 7. Accessing the Public URL + +If you’ve successfully run ngrok, your application will be accessible via a public URL provided by ngrok, such as `https://abcd-1234-5678.ngrok.io`. + +This public URL will allow external services like Plivo to interact with your server. + +--- + +## Notes + +- **Public URL Requirement:** If you already have a public URL (e.g., from deploying the app), you don't need to use ngrok. +- **API Keys:** Ensure your `.env` file contains all required keys for **Plivo** to ensure successful API calls. +- **Phone Numbers:** Update the client scripts with valid phone numbers to receive the call. +- **Testing:** Use ngrok for easy testing in a local environment before deploying the application. + +--- + +## Troubleshooting + +If you face any issues during setup or while making calls, ensure the following: + +- **Correct API keys**: Double-check the credentials in your `.env` file. +- **Ngrok URL**: Ensure that ngrok is running and the correct URL is used in the scripts. +- **Dependencies**: Ensure all dependencies are installed correctly via `pip install -r requirements.txt`. + +If issues persist, you can reach out to the [Smallest.ai support team](https://discord.gg/Ub25S48hSf) or raise an issue on the [GitHub repository](https://github.com/smallest-inc/waves-examples). + +--- + +This setup provides seamless testing of phonetic calls using the **Smallest AI API** and **Plivo** for real-time voice interaction. \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/integrations/vercel-ai-sdk.mdx b/fern/products/waves/pages/v4.0.0/integrations/vercel-ai-sdk.mdx new file mode 100644 index 0000000..e3bf141 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/integrations/vercel-ai-sdk.mdx @@ -0,0 +1,169 @@ +--- +title: "Vercel AI SDK" +description: "Use Smallest AI TTS and STT with the Vercel AI SDK in Next.js and Node.js apps." +icon: "triangle" +--- + +Use Smallest AI as a speech and transcription provider in the [Vercel AI SDK](https://ai-sdk.dev). Generate speech and transcribe audio with a few lines of code. + +## Installation + +```bash +npm install smallestai-vercel-provider ai +``` + +## Setup + +Get your API key from [waves.smallest.ai](https://waves.smallest.ai) and set it as an environment variable: + +```bash +export SMALLEST_API_KEY="your_key_here" +``` + +## Text-to-Speech + +Supported models: `lightning-v3.1` (recommended — 44.1 kHz, natural expressive speech) and `lightning-v2` (16 languages, voice cloning). + +```typescript +import { experimental_generateSpeech as generateSpeech } from 'ai'; +import { smallestai } from 'smallestai-vercel-provider'; + +const { audio } = await generateSpeech({ + model: smallestai.speech('lightning-v3.1'), + text: 'Hello from Smallest AI!', + voice: 'sophia', + speed: 1.0, +}); + +// audio.uint8Array — raw WAV bytes +// audio.base64 — base64 encoded audio +``` + +## Speech-to-Text + +```typescript +import { experimental_transcribe as transcribe } from 'ai'; +import { smallestai } from 'smallestai-vercel-provider'; +import { readFileSync } from 'fs'; + +const { text, segments } = await transcribe({ + model: smallestai.transcription('pulse'), + audio: readFileSync('recording.wav'), + mediaType: 'audio/wav', +}); + +console.log(text); // "Hello from Smallest AI!" +console.log(segments); // [{ text: "Hello", startSecond: 0, endSecond: 0.5 }, ...] +``` + +## Next.js API Route Example + +Create a TTS endpoint in your Next.js app: + +```typescript +// app/api/speak/route.ts +import { experimental_generateSpeech as generateSpeech } from 'ai'; +import { smallestai } from 'smallestai-vercel-provider'; + +export async function POST(req: Request) { + const { text, voice } = await req.json(); + + const { audio } = await generateSpeech({ + model: smallestai.speech('lightning-v3.1'), + text, + voice: voice || 'sophia', + }); + + return new Response(Buffer.from(audio.uint8Array), { + headers: { 'Content-Type': 'audio/wav' }, + }); +} +``` + +Play it in the browser: + +```typescript +const res = await fetch('/api/speak', { + method: 'POST', + body: JSON.stringify({ text: 'Hello!', voice: 'sophia' }), +}); +const blob = await res.blob(); +new Audio(URL.createObjectURL(blob)).play(); +``` + +## Provider Options + +### TTS Options + +```typescript +const { audio } = await generateSpeech({ + model: smallestai.speech('lightning-v3.1'), + text: 'Hello!', + voice: 'robert', + providerOptions: { + smallestai: { + sampleRate: 48000, // 8000 | 16000 | 24000 | 44100 | 48000 + outputFormat: 'mp3', // pcm | mp3 | wav | mulaw + language: 'en', // ISO 639-1 code + consistency: 0.5, // voice consistency (0–1) + similarity: 0.5, // voice similarity (0–1) + enhancement: 1, // audio enhancement level (0–2) + }, + }, +}); +``` + +### STT Options + +```typescript +const result = await transcribe({ + model: smallestai.transcription('pulse'), + audio: audioBuffer, + mediaType: 'audio/wav', + providerOptions: { + smallestai: { + language: 'hi', // ISO 639-1 code + diarize: true, // speaker identification + emotionDetection: true, // detect emotions + ageDetection: true, // detect speaker age + genderDetection: true, // detect speaker gender + }, + }, +}); +``` + +## Available Voices + +80+ voices across multiple languages. Popular voices: + +| Voice | Gender | Accent | Best For | +|-------|--------|--------|----------| +| `sophia` | Female | American | General use (default) | +| `robert` | Male | American | Professional | +| `advika` | Female | Indian | Hindi, code-switching | +| `vivaan` | Male | Indian | Bilingual English/Hindi | +| `camilla` | Female | Mexican/Latin | Spanish | + +Fetch the full voice list programmatically: + +```bash +curl -s "https://api.smallest.ai/waves/v1/lightning-v3.1/get_voices" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" +``` + +## Links + + + + Install from npm + + + Source code + + + AI SDK documentation + + + More examples + + diff --git a/fern/products/waves/pages/v4.0.0/integrations/vonage.mdx b/fern/products/waves/pages/v4.0.0/integrations/vonage.mdx new file mode 100644 index 0000000..ed17a42 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/integrations/vonage.mdx @@ -0,0 +1,109 @@ +--- +title: "Telephony: Vonage" +description: "Learn how to integrate Smallest AI TTS in Vonage for telephony solutions." +icon: "tty" +--- + +This guide demonstrates how to use the **Smallest AI API** with telephony call functionality. It sets up a local FastAPI server to stream audio data and uses **ngrok** to expose the server to the public for testing. + +You can access the code for this example on GitHub [here ](https://github.com/smallest-inc/waves-examples/tree/main/lightning/telephone_example/vonage_example). + + +## Prerequisites + +Before you begin, ensure you have the following: + +- **Python 3.8+** installed +- **ngrok** for tunneling local servers to the internet. You can download ngrok from [here](https://ngrok.com/download). +- **Vonage** account to handle calls and text-to-speech. + +## Setup Steps + +### 1. Configure Environment Variables + +Start by creating a `.env` file in your project directory with the necessary API keys. This configuration is essential for interacting with **Smallest.ai** and **Vonage**. + +```bash +SMALLEST_API_KEY=... +VONAGE_APPLICATION_ID=... +``` + +Additionally, ensure that the private key is added to `secrets/private.key`. + +### 2. Set Up Ngrok + +**Ngrok** is used to expose your local FastAPI server to the public internet. Follow these steps: + +1. Install **ngrok** (if not already installed). +2. Expose your FastAPI server for Vonage by running: + +- For Vonage: +```bash +ngrok http 8000 +``` + +3. Once ngrok is running, it will generate a public URL (e.g., `https://abcd-1234-5678.ngrok.io`). + +### 3. Install Dependencies + +To run the example code, you’ll need to install the required dependencies. Run the following command: + +```bash +pip install -r requirements.txt +``` + +### 4. Run the FastAPI Application + +Now, you can start the FastAPI server, which will handle the streaming of audio. + +Run the Vonage server: + +```bash +python vonage_example/vonage_app.py +``` + +### 5. Run the Phonetic Call Client + +Now that the FastAPI server is up and running, it's time to make a phonetic call. This client connects to the server, and you can test the audio stream with the desired telephony platform. + +Update the phone numbers and ngrok URL in the script, then run: + +```bash +python vonage_example/vonage_make_call.py +``` + +### 6. Testing the Call + +- Once the client script is executed, a call will be placed to the provided number with phonetic audio. +- Vonage will handle the audio and make the call based on the provided parameters. + +### 7. Accessing the Public URL + +If you’ve successfully run ngrok, your application will be accessible via a public URL provided by ngrok, such as `https://abcd-1234-5678.ngrok.io`. + +This public URL will allow external services like Vonage to interact with your server. + +--- + +## Notes + +- **Public URL Requirement:** If you already have a public URL (e.g., from deploying the app), you don't need to use ngrok. +- **API Keys:** Ensure your `.env` file contains all required keys for **Vonage** to ensure successful API calls. +- **Phone Numbers:** Update the client scripts with valid phone numbers to receive the call. +- **Testing:** Use ngrok for easy testing in a local environment before deploying the application. + +--- + +## Troubleshooting + +If you face any issues during setup or while making calls, ensure the following: + +- **Correct API keys**: Double-check the credentials in your `.env` file. +- **Ngrok URL**: Ensure that ngrok is running and the correct URL is used in the scripts. +- **Dependencies**: Ensure all dependencies are installed correctly via `pip install -r requirements.txt`. + +If issues persist, you can reach out to the [Smallest AI support team](https://discord.gg/Ub25S48hSf) or raise an issue on the [GitHub repository](https://github.com/smallest-inc/waves-examples). + +--- + +This setup provides seamless testing of phonetic calls using the **Smallest AI API** and **Vonage** for real-time voice interaction. \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/on-prem/api-reference/authentication.mdx b/fern/products/waves/pages/v4.0.0/on-prem/api-reference/authentication.mdx new file mode 100644 index 0000000..174f2e6 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/api-reference/authentication.mdx @@ -0,0 +1,359 @@ +--- +title: Authentication +description: Authenticate API requests with your license key +--- + +## Overview + +All API requests to Smallest Self-Host require authentication using your license key. This ensures only authorized clients can access the speech-to-text service. + +## Authentication Method + +Smallest Self-Host uses **Bearer token authentication** with your license key. + +### Authorization Header + +Include your license key in the `Authorization` header: + +```http +Authorization: Token YOUR_LICENSE_KEY +``` + +## Example Requests + + + + ```bash + curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.com/audio.wav" + }' + ``` + + + + ```python + import requests + + LICENSE_KEY = "your-license-key-here" + API_URL = "http://localhost:7100" + + headers = { + "Authorization": f"Token {LICENSE_KEY}", + "Content-Type": "application/json" + } + + response = requests.post( + f"{API_URL}/v1/listen", + headers=headers, + json={"url": "https://example.com/audio.wav"} + ) + + print(response.json()) + ``` + + + + ```javascript + const LICENSE_KEY = "your-license-key-here"; + const API_URL = "http://localhost:7100"; + + const response = await fetch(`${API_URL}/v1/listen`, { + method: "POST", + headers: { + "Authorization": `Token ${LICENSE_KEY}`, + "Content-Type": "application/json" + }, + body: JSON.stringify({ + url: "https://example.com/audio.wav" + }) + }); + + const result = await response.json(); + console.log(result); + ``` + + + + ```go + package main + + import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + ) + + func main() { + licenseKey := "your-license-key-here" + apiURL := "http://localhost:7100/v1/listen" + + payload := map[string]string{ + "url": "https://example.com/audio.wav", + } + jsonData, _ := json.Marshal(payload) + + req, _ := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData)) + req.Header.Set("Authorization", "Token "+licenseKey) + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + panic(err) + } + defer resp.Body.Close() + + var result map[string]interface{} + json.NewDecoder(resp.Body).Decode(&result) + fmt.Println(result) + } + ``` + + + +## Response Codes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeStatusDescription
200OKRequest successful
400Bad RequestInvalid request parameters
401UnauthorizedInvalid or missing license key
403ForbiddenLicense expired or quota exceeded
429Too Many RequestsRate limit exceeded
500Internal Server ErrorServer error
503Service UnavailableService temporarily unavailable
+ +## Error Responses + +### 401 Unauthorized + +```json +{ + "error": "Invalid license key", + "code": "INVALID_LICENSE" +} +``` + +**Solutions**: +- Verify license key is correct +- Check Authorization header format +- Ensure license hasn't expired + +### 403 Forbidden + +```json +{ + "error": "License expired", + "code": "LICENSE_EXPIRED", + "expires_at": "2024-12-31T23:59:59Z" +} +``` + +**Solutions**: +- Renew license with Smallest.ai +- Contact support@smallest.ai + +### 429 Rate Limited + +```json +{ + "error": "Rate limit exceeded", + "code": "RATE_LIMIT_EXCEEDED", + "retry_after": 60 +} +``` + +**Solutions**: +- Wait and retry after specified seconds +- Implement exponential backoff +- Contact support for higher limits + +## Security Best Practices + + + + Never hardcode license keys in source code. + + **Use environment variables**: + ```bash + export LICENSE_KEY="your-license-key-here" + ``` + + **Or secret managers**: + - AWS Secrets Manager + - HashiCorp Vault + - Kubernetes Secrets + + + + Always use HTTPS for API requests in production: + + ```javascript + const API_URL = "https://api.example.com"; + ``` + + Configure TLS: + ```yaml + apiServer: + tls: + enabled: true + certSecretName: "api-server-tls" + ``` + + + + Implement key rotation policy: + + - Rotate keys every 90 days + - Use different keys for dev/staging/prod + - Revoke compromised keys immediately + + + + Track API usage to detect anomalies: + + - Unusual traffic patterns + - Failed authentication attempts + - Quota approaching limits + + + + Add client-side rate limiting: + + ```python + from ratelimit import limits, sleep_and_retry + + @sleep_and_retry + @limits(calls=100, period=60) + def call_api(): + response = requests.post(...) + return response + ``` + + + +## SDK Integration + +### Python SDK + +```bash +pip install smallest-client +``` + +```python +from smallest import Client + +client = Client( + api_url="http://localhost:7100", + license_key="your-license-key-here" +) + +result = client.transcribe_url("https://example.com/audio.wav") +print(result.text) +``` + +### JavaScript SDK + +```bash +npm install @smallest/client +``` + +```javascript +import { SmallestClient } from '@smallest/client'; + +const client = new SmallestClient({ + apiUrl: 'http://localhost:7100', + licenseKey: 'your-license-key-here' +}); + +const result = await client.transcribeUrl('https://example.com/audio.wav'); +console.log(result.text); +``` + + +SDKs automatically handle authentication, retries, and error handling. + + +## Testing Authentication + +### Health Check (No Auth Required) + +```bash +curl http://localhost:7100/health +``` + +Expected response: +```json +{ + "status": "healthy" +} +``` + +### Verify License Key + +```bash +curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"url": "https://example.com/test.wav"}' +``` + +Successful authentication returns transcription results. + +## What's Next? + + + + Learn about the transcription API + + + + Monitor service health + + + + See complete integration examples + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/api-reference/endpoints/health-check.mdx b/fern/products/waves/pages/v4.0.0/on-prem/api-reference/endpoints/health-check.mdx new file mode 100644 index 0000000..5f2f355 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/api-reference/endpoints/health-check.mdx @@ -0,0 +1,396 @@ +--- +title: Health Check +description: Monitor service health and availability +--- + +## Overview + +The health check endpoint provides a simple way to verify that the API server is running and responsive. Use this for monitoring, load balancer health checks, and readiness probes. + +## Endpoint + +``` +GET /health +``` + +## Authentication + +**No authentication required** - This endpoint is publicly accessible. + +## Request + +Simple GET request with no parameters: + +```bash +curl http://localhost:7100/health +``` + +## Response + +### Healthy Response + +HTTP Status: `200 OK` + +```json +{ + "status": "healthy" +} +``` + +### Unhealthy Response + +HTTP Status: `503 Service Unavailable` + +```json +{ + "status": "unhealthy", + "reason": "No ASR workers available" +} +``` + +## Use Cases + +### Load Balancer Health Checks + +Configure your load balancer to use the health endpoint: + + + + ```yaml + apiServer: + service: + type: LoadBalancer + healthCheckPath: /health + healthCheckInterval: 30 + healthCheckTimeout: 5 + healthyThreshold: 2 + unhealthyThreshold: 3 + ``` + + + + ```yaml + apiVersion: v1 + kind: Service + metadata: + name: api-server + annotations: + service.beta.kubernetes.io/aws-load-balancer-healthcheck-path: "/health" + spec: + type: LoadBalancer + ``` + + + +### Kubernetes Liveness Probe + +Monitor pod health in Kubernetes: + +```yaml +livenessProbe: + httpGet: + path: /health + port: 7100 + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 +``` + +### Kubernetes Readiness Probe + +Determine when pod is ready to receive traffic: + +```yaml +readinessProbe: + httpGet: + path: /health + port: 7100 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 +``` + +### Monitoring and Alerting + +Monitor service availability: + + + + ```yaml + - job_name: 'api-server-health' + metrics_path: '/health' + scrape_interval: 30s + static_configs: + - targets: ['api-server:7100'] + ``` + + + + ```python + import requests + import time + + def check_health(): + try: + response = requests.get( + "http://localhost:7100/health", + timeout=5 + ) + return response.status_code == 200 + except Exception as e: + print(f"Health check failed: {e}") + return False + + while True: + if not check_health(): + print("Service unhealthy!") + time.sleep(30) + ``` + + + + ```bash + #!/bin/bash + + while true; do + STATUS=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:7100/health) + + if [ "$STATUS" -ne 200 ]; then + echo "Health check failed: HTTP $STATUS" + fi + + sleep 30 + done + ``` + + + +### Uptime Monitoring + +Integration with uptime monitoring services: + + + + - **Monitor Type**: HTTP(s) + - **URL**: `https://api.example.com/health` + - **Keyword**: `healthy` + - **Interval**: 5 minutes + + + + - **Check Type**: HTTP + - **URL**: `https://api.example.com/health` + - **Expected Status**: 200 + - **Check Interval**: 1 minute + + + + ```yaml + init_config: + + instances: + - url: http://api-server:7100/health + name: smallest-api + timeout: 5 + http_response_status_code: 200 + ``` + + + +## Advanced Health Checks + +### Detailed Health Status + +For more detailed health information, add query parameter: + +```bash +curl http://localhost:7100/health?detailed=true +``` + +Response: +```json +{ + "status": "healthy", + "components": { + "api_server": "healthy", + "lightning_asr": "healthy", + "license_proxy": "healthy", + "redis": "healthy" + }, + "uptime_seconds": 86400, + "version": "1.0.0" +} +``` + +### Component-Specific Checks + +Check individual components: + +```bash +curl http://localhost:7100/health/asr +curl http://localhost:7100/health/license +curl http://localhost:7100/health/redis +``` + +## Integration Examples + +### Docker Compose Healthcheck + +```yaml docker-compose.yml +services: + api-server: + image: quay.io/smallestinc/self-hosted-api-server:latest + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7100/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s +``` + +### Kubernetes Deployment + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: api-server +spec: + template: + spec: + containers: + - name: api-server + image: quay.io/smallestinc/self-hosted-api-server:latest + livenessProbe: + httpGet: + path: /health + port: 7100 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 7100 + initialDelaySeconds: 10 + periodSeconds: 5 +``` + +### Automated Testing + +Include health checks in CI/CD: + +```yaml .github/workflows/deploy.yml +- name: Wait for deployment + run: | + for i in {1..30}; do + if curl -f http://api.example.com/health; then + echo "Service is healthy" + exit 0 + fi + sleep 10 + done + echo "Service failed to become healthy" + exit 1 +``` + +## Best Practices + + + + Configure reasonable timeouts: + + - **Timeout**: 5 seconds max + - **Interval**: 10-30 seconds + - **Retries**: 3-5 attempts + + ```yaml + healthcheck: + timeout: 5s + interval: 30s + retries: 3 + ``` + + + + Always configure health checks in load balancers: + + - Prevents traffic to unhealthy instances + - Enables automatic failover + - Reduces user-facing errors + + + + Set up continuous monitoring: + + - External uptime monitoring + - Internal health checks + - Alerting on failures + + + + Regularly test health check behavior: + + ```bash + kubectl delete pod api-server-xxx + ``` + + Verify: + - Health check fails + - Load balancer stops routing + - New pod becomes ready + - Health check succeeds + + + +## Troubleshooting + +### Health Check Failing + +**Check API server logs**: + +```bash +kubectl logs -l app=api-server -n smallest +``` + +**Common causes**: +- Lightning ASR not available +- License proxy down +- Redis connection failed + +**Solutions**: +- Verify all components running +- Check service connectivity +- Review component logs + +### False Positives + +**Symptoms**: Health returns 200 but requests fail + +**Solutions**: +- Use detailed health checks +- Test actual transcription endpoint +- Monitor error rates + +### Timeout Issues + +**Symptoms**: Health checks timing out + +**Solutions**: +- Increase timeout values +- Check network latency +- Verify no network policies blocking + +## What's Next? + + + + Learn about the transcription endpoint + + + + See complete integration examples + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/api-reference/endpoints/transcription.mdx b/fern/products/waves/pages/v4.0.0/on-prem/api-reference/endpoints/transcription.mdx new file mode 100644 index 0000000..c3e4155 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/api-reference/endpoints/transcription.mdx @@ -0,0 +1,522 @@ +--- +title: Transcription +description: Convert speech to text with the /v1/listen endpoint +--- + +## Overview + +The transcription endpoint converts audio files to text using Lightning ASR. Supports both batch processing and streaming. + +## Endpoint + +``` +POST /v1/listen +``` + +## Authentication + +Requires Bearer token authentication with your license key. + +```http +Authorization: Token YOUR_LICENSE_KEY +``` + +See [Authentication](/waves/self-host/api-reference/authentication) for details. + +## Request + +### From URL + +Transcribe audio from a publicly accessible URL: + +```json +{ + "url": "https://example.com/audio.wav" +} +``` + +### From File Upload + +Upload audio directly: + +```bash +curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -F "audio=@/path/to/audio.wav" +``` + +### Parameters + + + URL to audio file (mutually exclusive with file upload) + + Supported protocols: `http://`, `https://`, `s3://` + + + + Audio file upload (mutually exclusive with URL) + + Supported formats: WAV, MP3, FLAC, OGG, M4A + + + + Language code (ISO 639-1) + + Examples: `en`, `es`, `fr`, `de`, `zh` + + + + Add punctuation to transcript + + + + Enable speaker diarization (identify different speakers) + + + + Expected number of speakers (for diarization) + + If not specified, automatically detected + + + + Include word-level timestamps + + + + Webhook URL for async results delivery + + If provided, returns immediately with job ID + + +## Response + +### Successful Response + +```json +{ + "request_id": "req_abc123", + "text": "Hello, this is a sample transcription.", + "confidence": 0.95, + "duration": 3.2, + "language": "en", + "words": [ + { + "word": "Hello", + "start": 0.0, + "end": 0.5, + "confidence": 0.98 + }, + { + "word": "this", + "start": 0.6, + "end": 0.8, + "confidence": 0.97 + } + ] +} +``` + +### Response Fields + + + Unique identifier for this transcription request + + + + Complete transcription text + + + + Overall confidence score (0.0 to 1.0) + + + + Audio duration in seconds + + + + Detected or specified language + + + + Word-level details (if `timestamps: true`) + + Each word object contains: + - `word`: The word text + - `start`: Start time in seconds + - `end`: End time in seconds + - `confidence`: Word confidence score + + +## Examples + +### Basic Transcription + + + + ```bash + curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.com/audio.wav" + }' + ``` + + + + ```python + import requests + + response = requests.post( + "http://localhost:7100/v1/listen", + headers={ + "Authorization": f"Token {LICENSE_KEY}", + "Content-Type": "application/json" + }, + json={ + "url": "https://example.com/audio.wav" + } + ) + + result = response.json() + print(result['text']) + ``` + + + + ```javascript + const response = await fetch('http://localhost:7100/v1/listen', { + method: 'POST', + headers: { + 'Authorization': `Token ${LICENSE_KEY}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + url: 'https://example.com/audio.wav' + }) + }); + + const result = await response.json(); + console.log(result.text); + ``` + + + +### With Punctuation and Timestamps + +```json +{ + "url": "https://example.com/audio.wav", + "punctuate": true, + "timestamps": true +} +``` + +Response: +```json +{ + "request_id": "req_abc123", + "text": "Hello, this is a sample transcription.", + "confidence": 0.95, + "duration": 3.2, + "words": [ + {"word": "Hello", "start": 0.0, "end": 0.5, "confidence": 0.98}, + {"word": ",", "start": 0.5, "end": 0.5, "confidence": 1.0}, + {"word": "this", "start": 0.6, "end": 0.8, "confidence": 0.97} + ] +} +``` + +### With Speaker Diarization + +```json +{ + "url": "https://example.com/conversation.wav", + "diarize": true, + "num_speakers": 2 +} +``` + +Response: +```json +{ + "request_id": "req_abc123", + "text": "Hello. Hi there!", + "speakers": [ + { + "speaker": "SPEAKER_00", + "text": "Hello.", + "start": 0.0, + "end": 0.8 + }, + { + "speaker": "SPEAKER_01", + "text": "Hi there!", + "start": 1.0, + "end": 1.8 + } + ] +} +``` + +### File Upload + +```bash +curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -F "audio=@recording.wav" \ + -F "punctuate=true" \ + -F "language=en" +``` + +### Async with Callback + +```json +{ + "url": "https://example.com/long-audio.wav", + "callback_url": "https://myapp.com/webhook/transcription" +} +``` + +Immediate response: +```json +{ + "job_id": "job_xyz789", + "status": "processing" +} +``` + +Later, webhook receives: +```json +{ + "job_id": "job_xyz789", + "status": "completed", + "result": { + "text": "...", + "confidence": 0.95 + } +} +``` + +## Error Responses + +### 400 Bad Request + +```json +{ + "error": "Missing required parameter: url or audio file", + "code": "MISSING_PARAMETER" +} +``` + +### 415 Unsupported Media Type + +```json +{ + "error": "Unsupported audio format", + "code": "UNSUPPORTED_FORMAT", + "supported_formats": ["wav", "mp3", "flac", "ogg", "m4a"] +} +``` + +### 422 Unprocessable Entity + +```json +{ + "error": "Audio file too large", + "code": "FILE_TOO_LARGE", + "max_size_mb": 100 +} +``` + +### 503 Service Unavailable + +```json +{ + "error": "No ASR workers available", + "code": "SERVICE_UNAVAILABLE", + "retry_after": 30 +} +``` + +## Audio Format Requirements + +### Supported Formats + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FormatExtensionNotes
WAV.wavRecommended for best quality
MP3.mp3Widely supported
FLAC.flacLossless compression
OGG.oggOpen format
M4A.m4aApple format
+ +### Recommended Specifications + +- **Sample Rate**: 16 kHz or higher (44.1 kHz recommended) +- **Bit Depth**: 16-bit or higher +- **Channels**: Mono or stereo +- **Max Duration**: 2 hours +- **Max File Size**: 100 MB + +### Audio Preprocessing + +For best results: +- Remove background noise +- Normalize audio levels +- Use mono audio when possible +- Encode at 16 kHz or 44.1 kHz + +## Rate Limits + +Default rate limits: + +- **Requests per minute**: 60 +- **Concurrent requests**: 10 +- **Audio hours per day**: 100 + + +Contact support@smallest.ai to increase limits for your license. + + +## Performance + +Typical performance metrics: + + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricValue
Real-time Factor0.05-0.15x
Latency (1 min audio)3-9 seconds
Concurrent capacity100+ requests
Throughput100+ hours/hour
+ + +Performance varies based on: +- Audio duration and complexity +- Number of speakers +- GPU instance type +- Current load + + +## Best Practices + + + + - Use lossless formats (WAV, FLAC) when possible + - Ensure clear audio with minimal background noise + - Use appropriate sample rate (16 kHz minimum) + + + + Implement retry logic with exponential backoff: + + ```python + import time + from requests.adapters import HTTPAdapter + from requests.packages.urllib3.util.retry import Retry + + session = requests.Session() + retry = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504] + ) + adapter = HTTPAdapter(max_retries=retry) + session.mount('http://', adapter) + ``` + + + + For audio longer than 5 minutes, use callback URL: + + ```json + { + "url": "https://example.com/podcast.mp3", + "callback_url": "https://myapp.com/webhook" + } + ``` + + + + Cache transcription results to avoid duplicate processing: + + ```python + import hashlib + + def get_cache_key(audio_url): + return hashlib.md5(audio_url.encode()).hexdigest() + + cache_key = get_cache_key(audio_url) + if cache_key in cache: + return cache[cache_key] + + result = transcribe(audio_url) + cache[cache_key] = result + return result + ``` + + + +## What's Next? + + + + Monitor service availability + + + + Complete integration examples + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/api-reference/examples.mdx b/fern/products/waves/pages/v4.0.0/on-prem/api-reference/examples.mdx new file mode 100644 index 0000000..f5189db --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/api-reference/examples.mdx @@ -0,0 +1,540 @@ +--- +title: Integration Examples +description: Complete examples for integrating with Smallest Self-Host +--- + +## Overview + +This page provides complete, production-ready examples for integrating Smallest Self-Host into your applications. + +## Python Examples + +### Basic Transcription + +```python +import requests +import os + +LICENSE_KEY = os.getenv("LICENSE_KEY") +API_URL = "http://localhost:7100" + +def transcribe_audio(audio_url): + response = requests.post( + f"{API_URL}/v1/listen", + headers={ + "Authorization": f"Token {LICENSE_KEY}", + "Content-Type": "application/json" + }, + json={"url": audio_url} + ) + + if response.status_code == 200: + return response.json() + else: + raise Exception(f"Transcription failed: {response.text}") + +result = transcribe_audio("https://example.com/audio.wav") +print(f"Transcription: {result['text']}") +print(f"Confidence: {result['confidence']}") +``` + +### With Retry Logic + +```python +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +import time + +class SmallestClient: + def __init__(self, api_url, license_key): + self.api_url = api_url + self.license_key = license_key + + self.session = requests.Session() + retry = Retry( + total=3, + backoff_factor=1, + status_forcelist=[429, 500, 502, 503, 504] + ) + adapter = HTTPAdapter(max_retries=retry) + self.session.mount('http://', adapter) + self.session.mount('https://', adapter) + + def transcribe(self, audio_url, **kwargs): + headers = { + "Authorization": f"Token {self.license_key}", + "Content-Type": "application/json" + } + + payload = {"url": audio_url, **kwargs} + + response = self.session.post( + f"{self.api_url}/v1/listen", + headers=headers, + json=payload, + timeout=300 + ) + + response.raise_for_status() + return response.json() + +client = SmallestClient( + api_url="http://localhost:7100", + license_key=os.getenv("LICENSE_KEY") +) + +result = client.transcribe( + "https://example.com/audio.wav", + punctuate=True, + timestamps=True +) +print(result['text']) +``` + +### Async Processing with Webhook + +```python +from flask import Flask, request, jsonify +import requests +import os + +app = Flask(__name__) + +LICENSE_KEY = os.getenv("LICENSE_KEY") +API_URL = "http://localhost:7100" + +@app.route('/webhook/transcription', methods=['POST']) +def transcription_webhook(): + data = request.json + job_id = data['job_id'] + status = data['status'] + + if status == 'completed': + result = data['result'] + print(f"Job {job_id} completed: {result['text']}") + elif status == 'failed': + print(f"Job {job_id} failed: {data['error']}") + + return jsonify({"received": True}) + +def submit_async_transcription(audio_url): + response = requests.post( + f"{API_URL}/v1/listen", + headers={ + "Authorization": f"Token {LICENSE_KEY}", + "Content-Type": "application/json" + }, + json={ + "url": audio_url, + "callback_url": "https://myapp.com/webhook/transcription" + } + ) + + return response.json() + +if __name__ == '__main__': + job = submit_async_transcription("https://example.com/long-audio.mp3") + print(f"Job submitted: {job['job_id']}") + + app.run(port=5000) +``` + +### Batch Processing + +```python +import concurrent.futures +import requests +import os + +LICENSE_KEY = os.getenv("LICENSE_KEY") +API_URL = "http://localhost:7100" + +def transcribe_single(audio_url): + try: + response = requests.post( + f"{API_URL}/v1/listen", + headers={ + "Authorization": f"Token {LICENSE_KEY}", + "Content-Type": "application/json" + }, + json={"url": audio_url}, + timeout=300 + ) + response.raise_for_status() + return { + "url": audio_url, + "success": True, + "result": response.json() + } + except Exception as e: + return { + "url": audio_url, + "success": False, + "error": str(e) + } + +audio_urls = [ + "https://example.com/audio1.wav", + "https://example.com/audio2.wav", + "https://example.com/audio3.wav", +] + +with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: + results = list(executor.map(transcribe_single, audio_urls)) + +for result in results: + if result['success']: + print(f"{result['url']}: {result['result']['text']}") + else: + print(f"{result['url']}: ERROR - {result['error']}") +``` + +## JavaScript/Node.js Examples + +### Basic Transcription + +```javascript +const axios = require('axios'); + +const LICENSE_KEY = process.env.LICENSE_KEY; +const API_URL = 'http://localhost:7100'; + +async function transcribeAudio(audioUrl) { + try { + const response = await axios.post( + `${API_URL}/v1/listen`, + { url: audioUrl }, + { + headers: { + 'Authorization': `Token ${LICENSE_KEY}`, + 'Content-Type': 'application/json' + } + } + ); + + return response.data; + } catch (error) { + console.error('Transcription failed:', error.response?.data || error.message); + throw error; + } +} + +transcribeAudio('https://example.com/audio.wav') + .then(result => { + console.log('Transcription:', result.text); + console.log('Confidence:', result.confidence); + }); +``` + +### TypeScript Client Class + +```typescript +import axios, { AxiosInstance } from 'axios'; + +interface TranscriptionOptions { + url?: string; + file?: File; + language?: string; + punctuate?: boolean; + diarize?: boolean; + timestamps?: boolean; + callback_url?: string; +} + +interface TranscriptionResult { + request_id: string; + text: string; + confidence: number; + duration: number; + language: string; + words?: Array<{ + word: string; + start: number; + end: number; + confidence: number; + }>; +} + +class SmallestClient { + private client: AxiosInstance; + + constructor(apiUrl: string, licenseKey: string) { + this.client = axios.create({ + baseURL: apiUrl, + headers: { + 'Authorization': `Token ${licenseKey}`, + 'Content-Type': 'application/json' + }, + timeout: 300000 + }); + } + + async transcribe(options: TranscriptionOptions): Promise { + const response = await this.client.post('/v1/listen', options); + return response.data; + } + + async health(): Promise<{ status: string }> { + const response = await this.client.get('/health'); + return response.data; + } +} + +const client = new SmallestClient( + process.env.API_URL || 'http://localhost:7100', + process.env.LICENSE_KEY! +); + +async function main() { + const result = await client.transcribe({ + url: 'https://example.com/audio.wav', + punctuate: true, + timestamps: true + }); + + console.log(result.text); +} + +main(); +``` + +### Express.js API Integration + +```javascript +const express = require('express'); +const axios = require('axios'); +const multer = require('multer'); +const FormData = require('form-data'); + +const app = express(); +const upload = multer({ storage: multer.memoryStorage() }); + +const LICENSE_KEY = process.env.LICENSE_KEY; +const API_URL = 'http://localhost:7100'; + +app.post('/transcribe', upload.single('audio'), async (req, res) => { + try { + let result; + + if (req.file) { + const formData = new FormData(); + formData.append('audio', req.file.buffer, req.file.originalname); + + const response = await axios.post( + `${API_URL}/v1/listen`, + formData, + { + headers: { + 'Authorization': `Token ${LICENSE_KEY}`, + ...formData.getHeaders() + } + } + ); + result = response.data; + } else if (req.body.url) { + const response = await axios.post( + `${API_URL}/v1/listen`, + { url: req.body.url }, + { + headers: { + 'Authorization': `Token ${LICENSE_KEY}`, + 'Content-Type': 'application/json' + } + } + ); + result = response.data; + } else { + return res.status(400).json({ error: 'No audio file or URL provided' }); + } + + res.json(result); + } catch (error) { + console.error('Transcription error:', error.response?.data || error.message); + res.status(500).json({ error: 'Transcription failed' }); + } +}); + +app.listen(3000, () => { + console.log('Server running on port 3000'); +}); +``` + +## Go Examples + +### Basic Client + +```go +package main + +import ( + "bytes" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "os" + "time" +) + +type TranscriptionRequest struct { + URL string `json:"url"` + Punctuate bool `json:"punctuate,omitempty"` + Language string `json:"language,omitempty"` +} + +type TranscriptionResult struct { + RequestID string `json:"request_id"` + Text string `json:"text"` + Confidence float64 `json:"confidence"` + Duration float64 `json:"duration"` +} + +type SmallestClient struct { + APIUrl string + LicenseKey string + HTTPClient *http.Client +} + +func NewClient(apiURL, licenseKey string) *SmallestClient { + return &SmallestClient{ + APIUrl: apiURL, + LicenseKey: licenseKey, + HTTPClient: &http.Client{Timeout: 5 * time.Minute}, + } +} + +func (c *SmallestClient) Transcribe(req TranscriptionRequest) (*TranscriptionResult, error) { + jsonData, err := json.Marshal(req) + if err != nil { + return nil, err + } + + httpReq, err := http.NewRequest("POST", c.APIUrl+"/v1/listen", bytes.NewBuffer(jsonData)) + if err != nil { + return nil, err + } + + httpReq.Header.Set("Authorization", "Token "+c.LicenseKey) + httpReq.Header.Set("Content-Type", "application/json") + + resp, err := c.HTTPClient.Do(httpReq) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := ioutil.ReadAll(resp.Body) + return nil, fmt.Errorf("API error: %s", string(body)) + } + + var result TranscriptionResult + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, err + } + + return &result, nil +} + +func main() { + client := NewClient( + "http://localhost:7100", + os.Getenv("LICENSE_KEY"), + ) + + result, err := client.Transcribe(TranscriptionRequest{ + URL: "https://example.com/audio.wav", + Punctuate: true, + Language: "en", + }) + + if err != nil { + fmt.Printf("Error: %v\n", err) + return + } + + fmt.Printf("Transcription: %s\n", result.Text) + fmt.Printf("Confidence: %.2f\n", result.Confidence) +} +``` + +## Best Practices + + + + Never hardcode credentials: + + ```bash + export LICENSE_KEY="your-license-key" + export API_URL="https://api.example.com" + ``` + + + + Always handle errors gracefully: + + ```python + try: + result = client.transcribe(audio_url) + except requests.exceptions.Timeout: + print("Request timed out") + except requests.exceptions.HTTPError as e: + print(f"HTTP error: {e.response.status_code}") + except Exception as e: + print(f"Unexpected error: {e}") + ``` + + + + Implement exponential backoff: + + ```python + from tenacity import retry, stop_after_attempt, wait_exponential + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=4, max=10) + ) + def transcribe_with_retry(audio_url): + return client.transcribe(audio_url) + ``` + + + + Reuse connections for better performance: + + ```python + session = requests.Session() + session.mount('http://', HTTPAdapter(pool_connections=10, pool_maxsize=10)) + ``` + + + + Track API usage and errors: + + ```python + import logging + + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + + logger.info(f"Transcribing: {audio_url}") + result = client.transcribe(audio_url) + logger.info(f"Success: {result['request_id']}") + ``` + + + +## What's Next? + + + + Learn about API authentication + + + + Debug common integration issues + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/configuration.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/configuration.mdx new file mode 100644 index 0000000..2cb856e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/configuration.mdx @@ -0,0 +1,585 @@ +--- +title: Configuration +description: Advanced configuration options for Docker deployments +--- + +## Overview + +This guide covers advanced configuration options for customizing your Docker deployment. Learn how to optimize resources, configure external services, and tune performance. + +## Environment Variables + +All configuration is managed through environment variables in the `.env` file. + +### Core Configuration + + + Your Smallest.ai license key for validation and usage reporting + + + + Download URL for the Lightning ASR model (provided by Smallest.ai) + + +### API Server Configuration + + + Port for the API server to listen on + + ```bash + PORT=7100 + ``` + + + + Internal URL for license proxy communication + + + + Internal URL for Lightning ASR communication + + +### Lightning ASR Configuration + + + Port for Lightning ASR to listen on + + ```bash + PORT=3369 + ``` + + + + Redis connection URL for caching and state management + + For external Redis: + ```bash + REDIS_URL=redis://external-redis.example.com:6379 + ``` + + With password: + ```bash + REDIS_URL=redis://:password@redis:6379 + ``` + + + + GPU device ID to use (for multi-GPU systems) + + ```bash + GPU_DEVICE_ID=0 + ``` + + +## Resource Configuration + +### GPU Allocation + +For systems with multiple GPUs, you can specify which GPU to use: + +```yaml docker-compose.yml +lightning-asr: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] +``` + +For multiple GPUs per container: + +```yaml docker-compose.yml +lightning-asr: + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 2 + capabilities: [gpu] +``` + +### Memory Limits + +Set memory limits for containers: + +```yaml docker-compose.yml +api-server: + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 512M + +lightning-asr: + deploy: + resources: + limits: + memory: 16G + reservations: + memory: 12G +``` + +### CPU Allocation + +Reserve CPU cores for each service: + +```yaml docker-compose.yml +lightning-asr: + deploy: + resources: + limits: + cpus: '8' + reservations: + cpus: '4' +``` + +## Redis Configuration + +### Using External Redis + +To use an external Redis instance instead of the embedded one: + + + + Modify `.env` file: + ```bash + REDIS_URL=redis://your-redis-host:6379 + REDIS_PASSWORD=your-password + ``` + + + + Comment out or remove the Redis service: + ```yaml docker-compose.yml + # redis: + # image: redis:latest + # ... + ``` + + + + Remove Redis from depends_on: + ```yaml docker-compose.yml + api-server: + depends_on: + - lightning-asr + - license-proxy + # - redis # removed + ``` + + + +### Redis Persistence + +Enable data persistence for Redis: + +```yaml docker-compose.yml +redis: + image: redis:latest + command: redis-server --appendonly yes + volumes: + - redis-data:/data + networks: + - smallest-network + +volumes: + redis-data: + driver: local +``` + +### Redis with Authentication + +Add password protection: + +```yaml docker-compose.yml +redis: + image: redis:latest + command: redis-server --requirepass ${REDIS_PASSWORD} + environment: + - REDIS_PASSWORD=${REDIS_PASSWORD} +``` + +Update `.env`: +```bash +REDIS_PASSWORD=your-secure-password +REDIS_URL=redis://:your-secure-password@redis:6379 +``` + +## Scaling Configuration + +### Multiple ASR Workers + +Run multiple Lightning ASR containers for higher throughput: + +```yaml docker-compose.yml +services: + lightning-asr-1: + image: quay.io/smallestinc/lightning-asr:latest + ports: + - "2233:2233" + environment: + - MODEL_URL=${MODEL_URL} + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_URL=redis://redis:6379 + - PORT=2233 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + networks: + - smallest-network + + lightning-asr-2: + image: quay.io/smallestinc/lightning-asr:latest + ports: + - "2234:2233" + environment: + - MODEL_URL=${MODEL_URL} + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_URL=redis://redis:6379 + - PORT=2233 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['1'] + capabilities: [gpu] + networks: + - smallest-network + + api-server: + environment: + - LIGHTNING_ASR_BASE_URL=http://lightning-asr-1:2233,http://lightning-asr-2:2233 +``` + + +This configuration requires multiple GPUs in your system and will distribute load across workers. + + +## Network Configuration + +### Custom Network Settings + +Configure custom network with specific subnet: + +```yaml docker-compose.yml +networks: + smallest-network: + driver: bridge + ipam: + config: + - subnet: 172.28.0.0/16 + gateway: 172.28.0.1 +``` + +### Expose on Specific Interface + +Bind to specific host IP: + +```yaml docker-compose.yml +api-server: + ports: + - "192.168.1.100:7100:7100" +``` + +### Use Host Network + +For maximum performance (loses network isolation): + +```yaml docker-compose.yml +api-server: + network_mode: host +``` + + +Host network mode bypasses Docker networking and directly uses host network stack. Use only if necessary. + + +## Logging Configuration + +### Custom Log Drivers + +Use JSON file logging with rotation: + +```yaml docker-compose.yml +services: + api-server: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" +``` + +### Syslog Integration + +Send logs to syslog: + +```yaml docker-compose.yml +services: + api-server: + logging: + driver: "syslog" + options: + syslog-address: "tcp://192.168.1.100:514" + tag: "smallest-api-server" +``` + +### Centralized Logging + +Forward logs to external logging service: + +```yaml docker-compose.yml +services: + api-server: + logging: + driver: "fluentd" + options: + fluentd-address: "localhost:24224" + tag: "docker.{{.Name}}" +``` + +## Volume Configuration + +### Persistent Model Storage + +Avoid re-downloading models on container restart: + +```yaml docker-compose.yml +services: + lightning-asr: + volumes: + - model-cache:/app/models + +volumes: + model-cache: + driver: local +``` + +### Custom Model Location + +Use a specific host directory: + +```yaml docker-compose.yml +services: + lightning-asr: + volumes: + - /mnt/models:/app/models + environment: + - MODEL_CACHE_DIR=/app/models +``` + +## Health Checks + +### Custom Health Check Intervals + +Adjust health check timing: + +```yaml docker-compose.yml +redis: + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 30s +``` + +### API Server Health Check + +Add health check for API server: + +```yaml docker-compose.yml +api-server: + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7100/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s +``` + +## Security Configuration + +### Run as Non-Root User + +Add user specification: + +```yaml docker-compose.yml +api-server: + user: "1000:1000" +``` + +### Read-Only Filesystem + +Increase security with read-only root filesystem: + +```yaml docker-compose.yml +api-server: + read_only: true + tmpfs: + - /tmp + - /var/run +``` + +### Resource Limits + +Prevent resource exhaustion: + +```yaml docker-compose.yml +api-server: + deploy: + resources: + limits: + cpus: '2' + memory: 2G + pids: 100 +``` + +## Example: Production Configuration + +Here's a complete production-ready configuration: + +```yaml docker-compose.yml +version: "3.8" + +services: + lightning-asr: + image: quay.io/smallestinc/lightning-asr:latest + ports: + - "127.0.0.1:2233:2233" + environment: + - MODEL_URL=${MODEL_URL} + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_URL=redis://:${REDIS_PASSWORD}@redis:6379 + - PORT=2233 + volumes: + - model-cache:/app/models + deploy: + resources: + limits: + memory: 16G + cpus: '8' + reservations: + memory: 12G + cpus: '4' + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + networks: + - smallest-network + + api-server: + image: quay.io/smallestinc/self-hosted-api-server:latest + container_name: api-server + ports: + - "7100:7100" + environment: + - LICENSE_KEY=${LICENSE_KEY} + - LIGHTNING_ASR_BASE_URL=http://lightning-asr:2233 + - API_BASE_URL=http://license-proxy:6699 + deploy: + resources: + limits: + memory: 2G + cpus: '2' + reservations: + memory: 512M + cpus: '0.5' + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7100/health"] + interval: 30s + timeout: 10s + retries: 3 + networks: + - smallest-network + depends_on: + - lightning-asr + - license-proxy + - redis + + license-proxy: + image: quay.io/smallestinc/license-proxy:latest + container_name: license-proxy + environment: + - LICENSE_KEY=${LICENSE_KEY} + deploy: + resources: + limits: + memory: 512M + cpus: '1' + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + networks: + - smallest-network + + redis: + image: redis:7-alpine + command: redis-server --requirepass ${REDIS_PASSWORD} --appendonly yes + ports: + - "127.0.0.1:6379:6379" + volumes: + - redis-data:/data + deploy: + resources: + limits: + memory: 1G + cpus: '1' + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "--raw", "incr", "ping"] + interval: 10s + timeout: 3s + retries: 5 + networks: + - smallest-network + +networks: + smallest-network: + driver: bridge + name: smallest-network + +volumes: + model-cache: + driver: local + redis-data: + driver: local +``` + +## What's Next? + + + + Learn about each service component in detail + + + + Debug common issues and optimize performance + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/docker-troubleshooting.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/docker-troubleshooting.mdx new file mode 100644 index 0000000..53fb0b5 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/docker-troubleshooting.mdx @@ -0,0 +1,655 @@ +--- +title: Docker Troubleshooting +description: Debug common issues and optimize your Docker deployment +--- + +## Common Issues + +### GPU Not Accessible + +**Symptoms:** +- Error: `could not select device driver "nvidia"` +- Error: `no NVIDIA GPU devices found` +- Lightning ASR fails to start + +**Diagnosis:** + +```bash +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi +``` + + + + ```bash + sudo systemctl restart docker + docker compose up -d + ``` + + + + ```bash + sudo apt-get remove nvidia-container-toolkit + sudo apt-get update + sudo apt-get install -y nvidia-container-toolkit + + sudo systemctl restart docker + ``` + + + + ```bash + nvidia-smi + ``` + + If driver version is below 470, update: + ```bash + sudo ubuntu-drivers autoinstall + sudo reboot + ``` + + + + Verify `/etc/docker/daemon.json` contains: + ```json + { + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + } + } + ``` + + Restart Docker after changes: + ```bash + sudo systemctl restart docker + ``` + + + +### License Validation Failed + +**Symptoms:** +- Error: `License validation failed` +- Error: `Invalid license key` +- Services fail to start + +**Diagnosis:** + +Check license-proxy logs: +```bash +docker compose logs license-proxy +``` + + + + Check `.env` file: + ```bash + cat .env | grep LICENSE_KEY + ``` + + Ensure there are no: + - Extra spaces + - Quotes around the key + - Line breaks + + Correct format: + ```bash + LICENSE_KEY=abc123def456 + ``` + + + + Test connection to license server: + ```bash + curl -v https://api.smallest.ai + ``` + + If this fails, check: + - Firewall rules + - Proxy settings + - DNS resolution + + + + If the key appears correct and network is accessible, your license may be: + - Expired + - Revoked + - Invalid + + Contact **support@smallest.ai** with: + - Your license key + - License-proxy logs + - Error messages + + + +### Model Download Failed + +**Symptoms:** +- Lightning ASR stuck at startup +- Error: `Failed to download model` +- Error: `Connection timeout` + +**Diagnosis:** + +Check Lightning ASR logs: +```bash +docker compose logs lightning-asr +``` + + + + Check `.env` file: + ```bash + cat .env | grep MODEL_URL + ``` + + Test URL accessibility: + ```bash + curl -I "${MODEL_URL}" + ``` + + + + Models require ~20-30 GB: + ```bash + df -h + ``` + + Free up space if needed: + ```bash + docker system prune -a + ``` + + + + Download model manually and use volume mount: + + ```bash + mkdir -p ~/models + cd ~/models + wget "${MODEL_URL}" -O model.bin + ``` + + Update docker-compose.yml: + ```yaml + lightning-asr: + volumes: + - ~/models:/app/models + ``` + + + + For slow connections, increase download timeout: + + ```yaml + lightning-asr: + environment: + - DOWNLOAD_TIMEOUT=3600 + ``` + + + +### Port Already in Use + +**Symptoms:** +- Error: `port is already allocated` +- Error: `bind: address already in use` + +**Diagnosis:** + +Find what's using the port: +```bash +sudo lsof -i :7100 +sudo netstat -tulpn | grep 7100 +``` + + + + If another service is using the port: + ```bash + sudo systemctl stop [service-name] + ``` + + Or kill the process: + ```bash + sudo kill -9 [PID] + ``` + + + + Modify docker-compose.yml to use different port: + ```yaml + api-server: + ports: + - "8080:7100" + ``` + + Access API at http://localhost:8080 instead + + + + Old containers may still be bound: + ```bash + docker compose down + docker container prune -f + docker compose up -d + ``` + + + +### Out of Memory + +**Symptoms:** +- Container killed unexpectedly +- Error: `OOMKilled` +- System becomes unresponsive + +**Diagnosis:** + +Check container status: +```bash +docker compose ps +docker inspect [container-name] | grep OOMKilled +``` + + + + Lightning ASR requires minimum 16 GB RAM + + Check current memory: + ```bash + free -h + ``` + + + + Prevent one service from consuming all memory: + ```yaml + services: + lightning-asr: + deploy: + resources: + limits: + memory: 14G + reservations: + memory: 12G + ``` + + + + Add swap space (temporary solution): + ```bash + sudo fallocate -l 16G /swapfile + sudo chmod 600 /swapfile + sudo mkswap /swapfile + sudo swapon /swapfile + ``` + + + + Use smaller model or reduce batch size: + ```yaml + lightning-asr: + environment: + - BATCH_SIZE=1 + - MODEL_PRECISION=fp16 + ``` + + + +### Container Keeps Restarting + +**Symptoms:** +- Container status shows `Restarting` +- Logs show crash loop + +**Diagnosis:** + +View recent logs: +```bash +docker compose logs --tail=100 [service-name] +``` + + + + ```bash + docker inspect [container-name] --format='{{.State.ExitCode}}' + ``` + + Common exit codes: + - `137`: Out of memory (OOMKilled) + - `139`: Segmentation fault + - `1`: General error + + + + Temporarily disable restart to debug: + ```yaml + lightning-asr: + restart: "no" + ``` + + Start manually and watch logs: + ```bash + docker compose up lightning-asr + ``` + + + + Ensure required services are healthy: + ```bash + docker compose ps + ``` + + All should show `Up (healthy)` or `Up` + + + +### Slow Performance + +**Symptoms:** +- High latency (>500ms) +- Low throughput +- GPU underutilized + +**Diagnosis:** + +Monitor GPU usage: +```bash +watch -n 1 nvidia-smi +``` + +Check container resources: +```bash +docker stats +``` + + + + Ensure GPU is not throttling: + ```bash + nvidia-smi -q -d PERFORMANCE + ``` + + Enable persistence mode: + ```bash + sudo nvidia-smi -pm 1 + ``` + + + + ```yaml + lightning-asr: + deploy: + resources: + limits: + cpus: '8' + ``` + + + + For maximum performance (loses isolation): + ```yaml + api-server: + network_mode: host + ``` + + + + Use Redis with persistence disabled for speed: + ```yaml + redis: + command: redis-server --save "" + ``` + + + + Scale Lightning ASR workers: + ```bash + docker compose up -d --scale lightning-asr=2 + ``` + + + +## Performance Optimization + +### Best Practices + + + + Cache models to avoid re-downloading: + ```yaml + volumes: + - model-cache:/app/models + ``` + + + + Reduces GPU initialization time: + ```bash + sudo nvidia-smi -pm 1 + ``` + + + + Allocate appropriate CPU/memory: + ```yaml + deploy: + resources: + limits: + cpus: '8' + memory: 14G + ``` + + + + Use monitoring tools: + ```bash + docker stats + nvidia-smi dmon + ``` + + + +### Benchmark Your Deployment + +Test transcription performance: + +```bash +time curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.com/test-audio-60s.wav" + }' +``` + +Expected performance: +- **Cold start**: First request after container start (5-10 seconds) +- **Warm requests**: Subsequent requests (50-200ms) +- **Real-time factor**: 0.05-0.15x (60s audio in 3-9 seconds) + +## Debugging Tools + +### View All Logs + +```bash +docker compose logs -f +``` + +### Follow Specific Service + +```bash +docker compose logs -f lightning-asr +``` + +### Last N Lines + +```bash +docker compose logs --tail=100 api-server +``` + +### Save Logs to File + +```bash +docker compose logs > deployment-logs.txt +``` + +### Execute Commands in Container + +```bash +docker compose exec lightning-asr bash +``` + +### Check Container Configuration + +```bash +docker inspect lightning-asr-1 +``` + +### Network Debugging + +Test connectivity between containers: + +```bash +docker compose exec api-server ping lightning-asr +docker compose exec api-server curl http://lightning-asr:2233/health +``` + +## Health Checks + +### API Server + +```bash +curl http://localhost:7100/health +``` + +Expected: `{"status": "healthy"}` + +### Lightning ASR + +```bash +curl http://localhost:2233/health +``` + +Expected: `{"status": "ready", "gpu": "NVIDIA A10"}` + +### License Proxy + +```bash +docker compose exec license-proxy wget -q -O- http://localhost:6699/health +``` + +Expected: `{"status": "valid"}` + +### Redis + +```bash +docker compose exec redis redis-cli ping +``` + +Expected: `PONG` + +## Log Analysis + +### Common Log Patterns + + + + ```log + redis-1 | Ready to accept connections + license-proxy | License validated successfully + lightning-asr-1 | Model loaded successfully + lightning-asr-1 | GPU: NVIDIA A10 (24GB) + lightning-asr-1 | Server ready on port 2233 + api-server | Connected to Lightning ASR + api-server | API server listening on port 7100 + ``` + + + + ```log + license-proxy | ERROR: License validation failed + license-proxy | ERROR: Invalid license key + license-proxy | ERROR: Connection to license server failed + ``` + + + + ```log + lightning-asr-1 | ERROR: No CUDA-capable device detected + lightning-asr-1 | ERROR: CUDA out of memory + lightning-asr-1 | ERROR: GPU not accessible + ``` + + + + ```log + api-server | ERROR: Connection refused: lightning-asr:2233 + api-server | ERROR: Timeout connecting to license-proxy + ``` + + + +## Getting Help + +### Before Contacting Support + +Collect the following information: + + + + ```bash + docker version + docker compose version + nvidia-smi + uname -a + ``` + + + + ```bash + docker compose ps > status.txt + docker stats --no-stream > resources.txt + ``` + + + + ```bash + docker compose logs > all-logs.txt + ``` + + + + Sanitize and include: + - docker-compose.yml + - .env (remove license key) + + + +### Contact Support + +Email: **support@smallest.ai** + +Include: +- Description of the issue +- Steps to reproduce +- System information +- Logs and configuration +- License key (via secure channel) + +## What's Next? + + + + Advanced configuration options + + + + Integrate with your applications + + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/prerequisites.mdx similarity index 70% rename from fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites.mdx rename to fern/products/waves/pages/v4.0.0/on-prem/docker/prerequisites.mdx index ff13e68..3a39900 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites.mdx +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/prerequisites.mdx @@ -1,6 +1,6 @@ --- title: Prerequisites -description: System requirements and setup needed before deploying Speech-to-Text with Docker +description: System requirements and setup needed before deploying with Docker --- ## Overview @@ -12,27 +12,26 @@ Before deploying Smallest Self-Host with Docker, ensure your system meets the ha ### Minimum Specifications - + **4 cores** minimum 8+ cores recommended for production - + **16 GB** minimum 32+ GB recommended for production - + **NVIDIA GPU required** - - L4 or L40s (recommended) - - A10, A100, H100, T4 (supported) - - Minimum 16GB VRAM + - L40s or A10 (recommended) + - A10, A100, H100, T4, L4 (supported) - + **100 GB** minimum - 50 GB for models @@ -226,13 +225,13 @@ You should see your GPU information displayed. Before installation, obtain the following from Smallest.ai: - + Your unique license key for validation Contact: **support@smallest.ai** - + Credentials to pull Docker images: - Registry URL: `quay.io` - Username @@ -242,7 +241,7 @@ Before installation, obtain the following from Smallest.ai: Contact: **support@smallest.ai** - + Download URLs for ASR models Contact: **support@smallest.ai** @@ -253,16 +252,41 @@ Before installation, obtain the following from Smallest.ai: Ensure the following ports are available: -| Port | Service | Purpose | -|------|---------|---------| -| 7100 | API Server | Client API requests | -| 2233 | Lightning ASR | Internal ASR processing | -| 6699 | License Proxy | Internal license validation | -| 6379 | Redis | Internal caching | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PortServicePurpose
7100API ServerClient API requests
2233Lightning ASRInternal ASR processing
6699License ProxyInternal license validation
6379RedisInternal caching
The License Proxy requires outbound HTTPS access to Smallest's license servers for validation. Ensure your firewall allows outbound connections to: -- `console-api.smallest.ai` (port 443) +- `api.smallest.ai` (port 443) ## Verification Checklist @@ -302,7 +326,7 @@ Before proceeding to installation, verify: Once all prerequisites are met, proceed to the quick start guide: - - Deploy Smallest Self-Host STT with Docker Compose + + Deploy Smallest Self-Host with Docker Compose diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/quick-start.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/quick-start.mdx new file mode 100644 index 0000000..99a3596 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/quick-start.mdx @@ -0,0 +1,461 @@ +--- +title: Quick Start +description: Deploy Smallest Self-Host with Docker Compose in under 15 minutes +--- + +## Overview + +This guide walks you through deploying Smallest Self-Host using Docker Compose. You'll have a fully functional speech-to-text service running in under 15 minutes. + + + Ensure you've completed all [prerequisites](/waves/self-host/docker-setup/stt-deployment/prerequisites/hardware-requirements) before + starting this guide. + + +## Step 1: Create Project Directory + +Create a directory for your deployment: + +```bash +mkdir -p ~/smallest-self-host +cd ~/smallest-self-host +``` + +## Step 2: Login to Container Registry + +Authenticate with the Smallest container registry using credentials provided by support: + +```bash +docker login quay.io +``` + +Enter your username and password when prompted. + + + Save your credentials securely. You'll need them if you restart or redeploy + the containers. + + +## Step 3: Create Environment File + +Create a `.env` file with your license key: + +```bash +cat > .env << 'EOF' +LICENSE_KEY=your-license-key-here +EOF +``` + +Replace `your-license-key-here` with the actual license key provided by Smallest.ai. + + + Never commit your `.env` file to version control. Add it to `.gitignore` if + using git. + + +## Step 4: Create Docker Compose File + +Choose the configuration that matches your model type: + + + + **Best for:** Fast inference, real-time applications + + Create a `docker-compose.yml` file: + + ```yaml docker-compose.yml + version: "3.8" + + services: + lightning-asr: + image: quay.io/smallestinc/lightning-asr:latest + ports: + - "2233:2233" + environment: + - MODEL_URL=${MODEL_URL} + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_URL=redis://redis:6379 + - PORT=2233 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - smallest-network + + api-server: + image: quay.io/smallestinc/self-hosted-api-server:latest + container_name: api-server + environment: + - LICENSE_KEY=${LICENSE_KEY} + - LIGHTNING_ASR_BASE_URL=http://lightning-asr:2233 + - API_BASE_URL=http://license-proxy:3369 + ports: + - "7100:7100" + networks: + - smallest-network + restart: unless-stopped + depends_on: + - lightning-asr + - license-proxy + + license-proxy: + image: quay.io/smallestinc/license-proxy:latest + container_name: license-proxy + environment: + - LICENSE_KEY=${LICENSE_KEY} + networks: + - smallest-network + restart: unless-stopped + + redis: + image: redis:7-alpine + ports: + - "6379:6379" + networks: + - smallest-network + restart: unless-stopped + command: redis-server --appendonly yes + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + + networks: + smallest-network: + driver: bridge + name: smallest-network + ``` + + + + + **Best for:** Latest model version with improved accuracy + + Create a `docker-compose.yml` file: + + ```yaml docker-compose.yml + version: "3.8" + + services: + lightning-v2: + image: quay.io/smallestinc/lightning-v2:latest + container_name: lightning-v2 + ports: + - "2269:2269" + environment: + - PORT=2269 + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_HOST=redis + - REDIS_PORT=6379 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - smallest-network + + api-server: + image: quay.io/smallestinc/self-hosted-api-server:latest + container_name: api-server + environment: + - PORT=7100 + - LICENSE_KEY=${LICENSE_KEY} + - LIGHTNING_V2_BASE_URL=http://lightning-v2:2269 + - API_BASE_URL=http://license-proxy:3369 + - REDIS_HOST=redis + - REDIS_PORT=6379 + ports: + - "7100:7100" + networks: + - smallest-network + restart: unless-stopped + depends_on: + - lightning-v2 + - license-proxy + + license-proxy: + image: quay.io/smallestinc/license-proxy:latest + container_name: license-proxy + environment: + - PORT=3369 + - LICENSE_KEY=${LICENSE_KEY} + networks: + - smallest-network + restart: unless-stopped + + redis: + image: redis:7-alpine + container_name: redis-server + ports: + - "6379:6379" + networks: + - smallest-network + restart: unless-stopped + command: redis-server --appendonly yes + volumes: + - redis-data:/data + + networks: + smallest-network: + driver: bridge + name: smallest-network + + volumes: + redis-data: + ``` + + + + +## Step 5: Additional Configuration for Lightning ASR + + + + Add the model URL to your `.env` file (required for Lightning ASR): + + ```bash + echo "MODEL_URL=your-model-url-here" >> .env + ``` + + The MODEL_URL is provided by Smallest.ai support. + + + + +## Step 6: Start Services + +Launch all services with Docker Compose: + +```bash +docker compose up -d +``` + + + + First startup will take 5-10 minutes as the system: + 1. Pulls container images (~5 GB) + 2. Downloads ASR models (~20 GB) + 3. Initializes GPU and loads models + + + + First startup will take 3-5 minutes as the system: + 1. Pulls container images (~15-25 GB, includes models) + 2. Initializes GPU and loads models + + Models are embedded in the container - no separate download needed. + + + + After the first run, startup takes 30-60 seconds for all model types as images are cached. + + + +## Step 7: Monitor Startup + +Watch the logs to monitor startup progress: + +```bash +docker compose logs -f +``` + +Look for these success indicators: + + + + ``` + redis-1 | Ready to accept connections + ``` + + + + ``` + license-proxy | License validated successfully + license-proxy | Server listening on port 3369 + ``` + + + + **Lightning ASR:** + ``` + lightning-asr-1 | Model loaded successfully + lightning-asr-1 | Server ready on port 2233 + ``` + + **Lightning V2:** + ``` + lightning-v2 | Model loaded successfully + lightning-v2 | Server ready on port 2269 + ``` + + **Lightning Large:** + ``` + lightning-large | Model loaded successfully + lightning-large | Server ready on port 9989 + ``` + + + + ``` + api-server | Connected to Lightning ASR + api-server | API server listening on port 7100 + ``` + + + +Press `Ctrl+C` to stop following logs. + +## Step 8: Test API + +Test the API with a sample request: + +```bash +curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.com/sample-audio.wav" + }' +``` + + +Replace the URL with your own audio file, or use the health check endpoint first: + +```bash +curl http://localhost:7100/health +``` + +Expected response: `{"status": "healthy"}` + + + +## Common Startup Issues + + + + **Error:** `could not select device driver "nvidia"` + + **Solution:** + ```bash + sudo systemctl restart docker + docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi + ``` + + If this fails, reinstall NVIDIA Container Toolkit. + + + + **Error:** `License validation failed` + + **Solution:** + - Verify LICENSE_KEY in `.env` is correct + - Check internet connectivity + - Ensure firewall allows HTTPS to api.smallest.ai + + + + **Error:** `Failed to download model` + + **Solution:** + - Verify MODEL_URL in `.env` is correct + - Check disk space: `df -h` + - Check internet connectivity + + + + **Error:** `port is already allocated` + + **Solution:** + Check what's using the port: + ```bash + sudo lsof -i :7100 + ``` + + Either stop the conflicting service or change the port in docker-compose.yml + + + +## Managing Your Deployment + +### Stop Services + +```bash +docker compose stop +``` + +### Restart Services + +```bash +docker compose restart +``` + +### View Logs + +```bash +docker compose logs -f [service-name] +``` + +Examples: + +```bash +docker compose logs -f api-server +docker compose logs -f lightning-asr +``` + +### Update Images + +Pull latest images and restart: + +```bash +docker compose pull +docker compose up -d +``` + +### Remove Deployment + +Stop and remove all containers: + +```bash +docker compose down +``` + +Remove containers and volumes (including downloaded models): + +```bash +docker compose down -v +``` + + + Using `-v` flag will delete all data including downloaded models. They will + need to be re-downloaded on next startup. + + +## What's Next? + + + + Customize your deployment with advanced configuration options + + + + Learn about each service component in detail + + + + Debug common issues and optimize performance + + + + Integrate with your applications using the API + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/configuration.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/configuration.mdx new file mode 100644 index 0000000..8d9b3bb --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/configuration.mdx @@ -0,0 +1,585 @@ +--- +title: Configuration +description: Advanced configuration options for STT Docker deployments +--- + +## Overview + +This guide covers advanced configuration options for customizing your Docker deployment. Learn how to optimize resources, configure external services, and tune performance. + +## Environment Variables + +All configuration is managed through environment variables in the `.env` file. + +### Core Configuration + + + Your Smallest.ai license key for validation and usage reporting + + + + Download URL for the Lightning ASR model (provided by Smallest.ai) + + +### API Server Configuration + + + Port for the API server to listen on + + ```bash + API_SERVER_PORT=8080 + ``` + + + + Internal URL for license proxy communication + + + + Internal URL for Lightning ASR communication + + +### Lightning ASR Configuration + + + Port for Lightning ASR to listen on + + ```bash + ASR_PORT=2233 + ``` + + + + Redis connection URL for caching and state management + + For external Redis: + ```bash + REDIS_URL=redis://external-redis.example.com:6379 + ``` + + With password: + ```bash + REDIS_URL=redis://:password@redis:6379 + ``` + + + + GPU device ID to use (for multi-GPU systems) + + ```bash + GPU_DEVICE_ID=0 + ``` + + +## Resource Configuration + +### GPU Allocation + +For systems with multiple GPUs, you can specify which GPU to use: + +```yaml docker-compose.yml +lightning-asr: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] +``` + +For multiple GPUs per container: + +```yaml docker-compose.yml +lightning-asr: + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 2 + capabilities: [gpu] +``` + +### Memory Limits + +Set memory limits for containers: + +```yaml docker-compose.yml +api-server: + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 512M + +lightning-asr: + deploy: + resources: + limits: + memory: 16G + reservations: + memory: 12G +``` + +### CPU Allocation + +Reserve CPU cores for each service: + +```yaml docker-compose.yml +lightning-asr: + deploy: + resources: + limits: + cpus: '8' + reservations: + cpus: '4' +``` + +## Redis Configuration + +### Using External Redis + +To use an external Redis instance instead of the embedded one: + + + + Modify `.env` file: + ```bash + REDIS_URL=redis://your-redis-host:6379 + REDIS_PASSWORD=your-password + ``` + + + + Comment out or remove the Redis service: + ```yaml docker-compose.yml + # redis: + # image: redis:latest + # ... + ``` + + + + Remove Redis from depends_on: + ```yaml docker-compose.yml + api-server: + depends_on: + - lightning-asr + - license-proxy + # - redis # removed + ``` + + + +### Redis Persistence + +Enable data persistence for Redis: + +```yaml docker-compose.yml +redis: + image: redis:latest + command: redis-server --appendonly yes + volumes: + - redis-data:/data + networks: + - smallest-network + +volumes: + redis-data: + driver: local +``` + +### Redis with Authentication + +Add password protection: + +```yaml docker-compose.yml +redis: + image: redis:latest + command: redis-server --requirepass ${REDIS_PASSWORD} + environment: + - REDIS_PASSWORD=${REDIS_PASSWORD} +``` + +Update `.env`: +```bash +REDIS_PASSWORD=your-secure-password +REDIS_URL=redis://:your-secure-password@redis:6379 +``` + +## Scaling Configuration + +### Multiple ASR Workers + +Run multiple Lightning ASR containers for higher throughput: + +```yaml docker-compose.yml +services: + lightning-asr-1: + image: quay.io/smallestinc/lightning-asr:latest + ports: + - "2233:2233" + environment: + - MODEL_URL=${MODEL_URL} + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_URL=redis://redis:6379 + - PORT=2233 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] + networks: + - smallest-network + + lightning-asr-2: + image: quay.io/smallestinc/lightning-asr:latest + ports: + - "2234:2233" + environment: + - MODEL_URL=${MODEL_URL} + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_URL=redis://redis:6379 + - PORT=2233 + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['1'] + capabilities: [gpu] + networks: + - smallest-network + + api-server: + environment: + - LIGHTNING_ASR_BASE_URL=http://lightning-asr-1:2233,http://lightning-asr-2:2233 +``` + + +This configuration requires multiple GPUs in your system and will distribute load across workers. + + +## Network Configuration + +### Custom Network Settings + +Configure custom network with specific subnet: + +```yaml docker-compose.yml +networks: + smallest-network: + driver: bridge + ipam: + config: + - subnet: 172.28.0.0/16 + gateway: 172.28.0.1 +``` + +### Expose on Specific Interface + +Bind to specific host IP: + +```yaml docker-compose.yml +api-server: + ports: + - "192.168.1.100:7100:7100" +``` + +### Use Host Network + +For maximum performance (loses network isolation): + +```yaml docker-compose.yml +api-server: + network_mode: host +``` + + +Host network mode bypasses Docker networking and directly uses host network stack. Use only if necessary. + + +## Logging Configuration + +### Custom Log Drivers + +Use JSON file logging with rotation: + +```yaml docker-compose.yml +services: + api-server: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" +``` + +### Syslog Integration + +Send logs to syslog: + +```yaml docker-compose.yml +services: + api-server: + logging: + driver: "syslog" + options: + syslog-address: "tcp://192.168.1.100:514" + tag: "smallest-api-server" +``` + +### Centralized Logging + +Forward logs to external logging service: + +```yaml docker-compose.yml +services: + api-server: + logging: + driver: "fluentd" + options: + fluentd-address: "localhost:24224" + tag: "docker.{{.Name}}" +``` + +## Volume Configuration + +### Persistent Model Storage + +Avoid re-downloading models on container restart: + +```yaml docker-compose.yml +services: + lightning-asr: + volumes: + - model-cache:/app/models + +volumes: + model-cache: + driver: local +``` + +### Custom Model Location + +Use a specific host directory: + +```yaml docker-compose.yml +services: + lightning-asr: + volumes: + - /mnt/models:/app/models + environment: + - MODEL_CACHE_DIR=/app/models +``` + +## Health Checks + +### Custom Health Check Intervals + +Adjust health check timing: + +```yaml docker-compose.yml +redis: + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 30s +``` + +### API Server Health Check + +Add health check for API server: + +```yaml docker-compose.yml +api-server: + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7100/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s +``` + +## Security Configuration + +### Run as Non-Root User + +Add user specification: + +```yaml docker-compose.yml +api-server: + user: "1000:1000" +``` + +### Read-Only Filesystem + +Increase security with read-only root filesystem: + +```yaml docker-compose.yml +api-server: + read_only: true + tmpfs: + - /tmp + - /var/run +``` + +### Resource Limits + +Prevent resource exhaustion: + +```yaml docker-compose.yml +api-server: + deploy: + resources: + limits: + cpus: '2' + memory: 2G + pids: 100 +``` + +## Example: Production Configuration + +Here's a complete production-ready configuration: + +```yaml docker-compose.yml +version: "3.8" + +services: + lightning-asr: + image: quay.io/smallestinc/lightning-asr:latest + ports: + - "127.0.0.1:2233:2233" + environment: + - MODEL_URL=${MODEL_URL} + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_URL=redis://:${REDIS_PASSWORD}@redis:6379 + - PORT=2233 + volumes: + - model-cache:/app/models + deploy: + resources: + limits: + memory: 16G + cpus: '8' + reservations: + memory: 12G + cpus: '4' + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + networks: + - smallest-network + + api-server: + image: quay.io/smallestinc/self-hosted-api-server:latest + container_name: api-server + ports: + - "7100:7100" + environment: + - LICENSE_KEY=${LICENSE_KEY} + - LIGHTNING_ASR_BASE_URL=http://lightning-asr:2233 + - API_BASE_URL=http://license-proxy:6699 + deploy: + resources: + limits: + memory: 2G + cpus: '2' + reservations: + memory: 512M + cpus: '0.5' + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7100/health"] + interval: 30s + timeout: 10s + retries: 3 + networks: + - smallest-network + depends_on: + - lightning-asr + - license-proxy + - redis + + license-proxy: + image: quay.io/smallestinc/license-proxy:latest + container_name: license-proxy + environment: + - LICENSE_KEY=${LICENSE_KEY} + deploy: + resources: + limits: + memory: 512M + cpus: '1' + restart: unless-stopped + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" + networks: + - smallest-network + + redis: + image: redis:7-alpine + command: redis-server --requirepass ${REDIS_PASSWORD} --appendonly yes + ports: + - "127.0.0.1:6379:6379" + volumes: + - redis-data:/data + deploy: + resources: + limits: + memory: 1G + cpus: '1' + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "--raw", "incr", "ping"] + interval: 10s + timeout: 3s + retries: 5 + networks: + - smallest-network + +networks: + smallest-network: + driver: bridge + name: smallest-network + +volumes: + model-cache: + driver: local + redis-data: + driver: local +``` + +## What's Next? + + + + Learn about each service component in detail + + + + Debug common issues and optimize performance + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/docker-troubleshooting.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/docker-troubleshooting.mdx new file mode 100644 index 0000000..0059f81 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/docker-troubleshooting.mdx @@ -0,0 +1,655 @@ +--- +title: Docker Troubleshooting +description: Debug common issues and optimize your STT Docker deployment +--- + +## Common Issues + +### GPU Not Accessible + +**Symptoms:** +- Error: `could not select device driver "nvidia"` +- Error: `no NVIDIA GPU devices found` +- Lightning ASR fails to start + +**Diagnosis:** + +```bash +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi +``` + + + + ```bash + sudo systemctl restart docker + docker compose up -d + ``` + + + + ```bash + sudo apt-get remove nvidia-container-toolkit + sudo apt-get update + sudo apt-get install -y nvidia-container-toolkit + + sudo systemctl restart docker + ``` + + + + ```bash + nvidia-smi + ``` + + If driver version is below 470, update: + ```bash + sudo ubuntu-drivers autoinstall + sudo reboot + ``` + + + + Verify `/etc/docker/daemon.json` contains: + ```json + { + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + } + } + ``` + + Restart Docker after changes: + ```bash + sudo systemctl restart docker + ``` + + + +### License Validation Failed + +**Symptoms:** +- Error: `License validation failed` +- Error: `Invalid license key` +- Services fail to start + +**Diagnosis:** + +Check license-proxy logs: +```bash +docker compose logs license-proxy +``` + + + + Check `.env` file: + ```bash + cat .env | grep LICENSE_KEY + ``` + + Ensure there are no: + - Extra spaces + - Quotes around the key + - Line breaks + + Correct format: + ```bash + LICENSE_KEY=abc123def456 + ``` + + + + Test connection to license server: + ```bash + curl -v https://api.smallest.ai + ``` + + If this fails, check: + - Firewall rules + - Proxy settings + - DNS resolution + + + + If the key appears correct and network is accessible, your license may be: + - Expired + - Revoked + - Invalid + + Contact **support@smallest.ai** with: + - Your license key + - License-proxy logs + - Error messages + + + +### Model Download Failed + +**Symptoms:** +- Lightning ASR stuck at startup +- Error: `Failed to download model` +- Error: `Connection timeout` + +**Diagnosis:** + +Check Lightning ASR logs: +```bash +docker compose logs lightning-asr +``` + + + + Check `.env` file: + ```bash + cat .env | grep MODEL_URL + ``` + + Test URL accessibility: + ```bash + curl -I "${MODEL_URL}" + ``` + + + + Models require ~20-30 GB: + ```bash + df -h + ``` + + Free up space if needed: + ```bash + docker system prune -a + ``` + + + + Download model manually and use volume mount: + + ```bash + mkdir -p ~/models + cd ~/models + wget "${MODEL_URL}" -O model.bin + ``` + + Update docker-compose.yml: + ```yaml + lightning-asr: + volumes: + - ~/models:/app/models + ``` + + + + For slow connections, increase download timeout: + + ```yaml + lightning-asr: + environment: + - DOWNLOAD_TIMEOUT=3600 + ``` + + + +### Port Already in Use + +**Symptoms:** +- Error: `port is already allocated` +- Error: `bind: address already in use` + +**Diagnosis:** + +Find what's using the port: +```bash +sudo lsof -i :7100 +sudo netstat -tulpn | grep 7100 +``` + + + + If another service is using the port: + ```bash + sudo systemctl stop [service-name] + ``` + + Or kill the process: + ```bash + sudo kill -9 [PID] + ``` + + + + Modify docker-compose.yml to use different port: + ```yaml + api-server: + ports: + - "8080:7100" + ``` + + Access API at http://localhost:8080 instead + + + + Old containers may still be bound: + ```bash + docker compose down + docker container prune -f + docker compose up -d + ``` + + + +### Out of Memory + +**Symptoms:** +- Container killed unexpectedly +- Error: `OOMKilled` +- System becomes unresponsive + +**Diagnosis:** + +Check container status: +```bash +docker compose ps +docker inspect [container-name] | grep OOMKilled +``` + + + + Lightning ASR requires minimum 16 GB RAM + + Check current memory: + ```bash + free -h + ``` + + + + Prevent one service from consuming all memory: + ```yaml + services: + lightning-asr: + deploy: + resources: + limits: + memory: 14G + reservations: + memory: 12G + ``` + + + + Add swap space (temporary solution): + ```bash + sudo fallocate -l 16G /swapfile + sudo chmod 600 /swapfile + sudo mkswap /swapfile + sudo swapon /swapfile + ``` + + + + Use smaller model or reduce batch size: + ```yaml + lightning-asr: + environment: + - BATCH_SIZE=1 + - MODEL_PRECISION=fp16 + ``` + + + +### Container Keeps Restarting + +**Symptoms:** +- Container status shows `Restarting` +- Logs show crash loop + +**Diagnosis:** + +View recent logs: +```bash +docker compose logs --tail=100 [service-name] +``` + + + + ```bash + docker inspect [container-name] --format='{{.State.ExitCode}}' + ``` + + Common exit codes: + - `137`: Out of memory (OOMKilled) + - `139`: Segmentation fault + - `1`: General error + + + + Temporarily disable restart to debug: + ```yaml + lightning-asr: + restart: "no" + ``` + + Start manually and watch logs: + ```bash + docker compose up lightning-asr + ``` + + + + Ensure required services are healthy: + ```bash + docker compose ps + ``` + + All should show `Up (healthy)` or `Up` + + + +### Slow Performance + +**Symptoms:** +- High latency (>500ms) +- Low throughput +- GPU underutilized + +**Diagnosis:** + +Monitor GPU usage: +```bash +watch -n 1 nvidia-smi +``` + +Check container resources: +```bash +docker stats +``` + + + + Ensure GPU is not throttling: + ```bash + nvidia-smi -q -d PERFORMANCE + ``` + + Enable persistence mode: + ```bash + sudo nvidia-smi -pm 1 + ``` + + + + ```yaml + lightning-asr: + deploy: + resources: + limits: + cpus: '8' + ``` + + + + For maximum performance (loses isolation): + ```yaml + api-server: + network_mode: host + ``` + + + + Use Redis with persistence disabled for speed: + ```yaml + redis: + command: redis-server --save "" + ``` + + + + Scale Lightning ASR workers: + ```bash + docker compose up -d --scale lightning-asr=2 + ``` + + + +## Performance Optimization + +### Best Practices + + + + Cache models to avoid re-downloading: + ```yaml + volumes: + - model-cache:/app/models + ``` + + + + Reduces GPU initialization time: + ```bash + sudo nvidia-smi -pm 1 + ``` + + + + Allocate appropriate CPU/memory: + ```yaml + deploy: + resources: + limits: + cpus: '8' + memory: 14G + ``` + + + + Use monitoring tools: + ```bash + docker stats + nvidia-smi dmon + ``` + + + +### Benchmark Your Deployment + +Test transcription performance: + +```bash +time curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.com/test-audio-60s.wav" + }' +``` + +Expected performance: +- **Cold start**: First request after container start (5-10 seconds) +- **Warm requests**: Subsequent requests (50-200ms) +- **Real-time factor**: 0.05-0.15x (60s audio in 3-9 seconds) + +## Debugging Tools + +### View All Logs + +```bash +docker compose logs -f +``` + +### Follow Specific Service + +```bash +docker compose logs -f lightning-asr +``` + +### Last N Lines + +```bash +docker compose logs --tail=100 api-server +``` + +### Save Logs to File + +```bash +docker compose logs > deployment-logs.txt +``` + +### Execute Commands in Container + +```bash +docker compose exec lightning-asr bash +``` + +### Check Container Configuration + +```bash +docker inspect lightning-asr-1 +``` + +### Network Debugging + +Test connectivity between containers: + +```bash +docker compose exec api-server ping lightning-asr +docker compose exec api-server curl http://lightning-asr:2233/health +``` + +## Health Checks + +### API Server + +```bash +curl http://localhost:7100/health +``` + +Expected: `{"status": "healthy"}` + +### Lightning ASR + +```bash +curl http://localhost:2233/health +``` + +Expected: `{"status": "ready", "gpu": "NVIDIA A10"}` + +### License Proxy + +```bash +docker compose exec license-proxy wget -q -O- http://localhost:6699/health +``` + +Expected: `{"status": "valid"}` + +### Redis + +```bash +docker compose exec redis redis-cli ping +``` + +Expected: `PONG` + +## Log Analysis + +### Common Log Patterns + + + + ```log + redis-1 | Ready to accept connections + license-proxy | License validated successfully + lightning-asr-1 | Model loaded successfully + lightning-asr-1 | GPU: NVIDIA A10 (24GB) + lightning-asr-1 | Server ready on port 2233 + api-server | Connected to Lightning ASR + api-server | API server listening on port 7100 + ``` + + + + ```log + license-proxy | ERROR: License validation failed + license-proxy | ERROR: Invalid license key + license-proxy | ERROR: Connection to license server failed + ``` + + + + ```log + lightning-asr-1 | ERROR: No CUDA-capable device detected + lightning-asr-1 | ERROR: CUDA out of memory + lightning-asr-1 | ERROR: GPU not accessible + ``` + + + + ```log + api-server | ERROR: Connection refused: lightning-asr:2233 + api-server | ERROR: Timeout connecting to license-proxy + ``` + + + +## Getting Help + +### Before Contacting Support + +Collect the following information: + + + + ```bash + docker version + docker compose version + nvidia-smi + uname -a + ``` + + + + ```bash + docker compose ps > status.txt + docker stats --no-stream > resources.txt + ``` + + + + ```bash + docker compose logs > all-logs.txt + ``` + + + + Sanitize and include: + - docker-compose.yml + - .env (remove license key) + + + +### Contact Support + +Email: **support@smallest.ai** + +Include: +- Description of the issue +- Steps to reproduce +- System information +- Logs and configuration +- License key (via secure channel) + +## What's Next? + + + + Advanced configuration options + + + + Integrate with your applications + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/credentials.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/credentials.mdx new file mode 100644 index 0000000..48118c0 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/credentials.mdx @@ -0,0 +1,52 @@ +--- +title: Credentials & Access +description: License keys and registry credentials for STT Docker deployment +--- + +## Required Credentials + +Before installation, obtain the following from Smallest.ai: + + + + Your unique license key for validation + + Contact: **support@smallest.ai** + + + + Credentials to pull Docker images: + - Registry URL: `quay.io` + - Username + - Password + - Email + + Contact: **support@smallest.ai** + + + + Download URLs for ASR models + + Contact: **support@smallest.ai** + + + +## Login to Container Registry + +Once you have your credentials, authenticate with the registry: + +```bash +docker login quay.io -u -p +``` + +## Environment Variables + +You'll need to set these in your deployment: + +```bash +export LICENSE_KEY="your-license-key" +export QUAY_USERNAME="your-username" +export QUAY_PASSWORD="your-password" +export MODEL_URL="your-model-url" +``` + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/hardware-requirements.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/hardware-requirements.mdx new file mode 100644 index 0000000..14c7273 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/hardware-requirements.mdx @@ -0,0 +1,52 @@ +--- +title: Hardware Requirements +description: Hardware specifications for deploying Speech-to-Text with Docker +--- + +## Minimum Specifications + + + + **4 cores** minimum + + 8+ cores recommended for production + + + + **16 GB** minimum + + 32+ GB recommended for production + + + + **NVIDIA GPU required** + + - L4 or L40s (recommended) + - A10, A100, H100, T4 (supported) + - Minimum 16GB VRAM + + + + **100 GB** minimum + + - 50 GB for models + - 50 GB for logs and data + + + +## Network Requirements + +Ensure the following ports are available: + +| Port | Service | Purpose | +|------|---------|---------| +| 7100 | API Server | Client API requests | +| 2233 | Lightning ASR | Internal ASR processing | +| 6699 | License Proxy | Internal license validation | +| 6379 | Redis | Internal caching | + + +The License Proxy requires outbound HTTPS access to Smallest's license servers for validation. Ensure your firewall allows outbound connections to: +- `api.smallest.ai` (port 443) + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/software-requirements.mdx similarity index 53% rename from fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites.mdx rename to fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/software-requirements.mdx index 25398f8..cab73d5 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites.mdx +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/software-requirements.mdx @@ -1,50 +1,9 @@ --- -title: Prerequisites -description: System requirements and setup needed before deploying Text-to-Speech with Docker +title: Software Requirements +description: Software and dependencies for deploying Speech-to-Text with Docker --- -## Overview - -Before deploying Smallest Self-Host Text-to-Speech (TTS) with Docker, ensure your system meets the hardware and software requirements. This guide walks you through everything you need to prepare your environment. - -## Hardware Requirements - -### Minimum Specifications - - - - **4 cores** minimum - - 8+ cores recommended for production - - - - **16 GB** minimum - - 32+ GB recommended for production - - - - **NVIDIA GPU required** - - - L40s or A10 (recommended) - - A10, A100, H100, T4, L4 (supported) - - Minimum 16GB VRAM - - - - **100 GB** minimum - - - 50 GB for models - - 50 GB for logs and data - - - -## Software Requirements - -### Operating System - -Smallest Self-Host TTS supports the following operating systems: +## Operating System @@ -68,7 +27,7 @@ Smallest Self-Host TTS supports the following operating systems: -### Required Software +## Required Software @@ -112,9 +71,7 @@ Smallest Self-Host TTS supports the following operating systems: -## Installation Guides - -### Install Docker +## Install Docker @@ -160,7 +117,7 @@ Smallest Self-Host TTS supports the following operating systems: -### Install NVIDIA Driver +## Install NVIDIA Driver @@ -196,7 +153,7 @@ Smallest Self-Host TTS supports the following operating systems: -### Install NVIDIA Container Toolkit +## Install NVIDIA Container Toolkit ```bash distribution=$(. /etc/os-release;echo $ID$VERSION_ID) @@ -211,7 +168,7 @@ sudo apt-get install -y nvidia-container-toolkit sudo systemctl restart docker ``` -### Verify GPU Access +## Verify GPU Access Test that Docker can access the GPU: @@ -221,88 +178,3 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi You should see your GPU information displayed. -## Credentials - -Before installation, obtain the following from Smallest.ai: - - - - Your unique license key for validation - - Contact: **support@smallest.ai** - - - - Credentials to pull Docker images: - - Registry URL: `quay.io` - - Username - - Password - - Email - - Contact: **support@smallest.ai** - - - - Download URLs for TTS models (if required) - - Contact: **support@smallest.ai** - - - -## Network Requirements - -Ensure the following ports are available: - -| Port | Service | Purpose | -|------|---------|---------| -| 7100 | API Server | Client API requests | -| 8876 | Lightning TTS | TTS service endpoint | -| 3369 | License Proxy | Internal license validation | -| 6379 | Redis | Internal caching | - - -The License Proxy requires outbound HTTPS access to Smallest's license servers for validation. Ensure your firewall allows outbound connections to: -- `console-api.smallest.ai` (port 443) - - -## Verification Checklist - -Before proceeding to installation, verify: - - - - ```bash - docker ps - ``` - Should execute without errors - - - - ```bash - docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi - ``` - Should display GPU information - - - - - [ ] License key obtained - - [ ] Container registry username and password - - [ ] Model download URLs (if required) - - - - ```bash - sudo netstat -tuln | grep -E '(7100|8876|3369|6379)' - ``` - Should return no results (ports free) - - - -## What's Next? - -Once all prerequisites are met, proceed to the quick start guide: - - - Deploy Smallest Self-Host TTS with Docker Compose - - diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/verification.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/verification.mdx new file mode 100644 index 0000000..6360c0a --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/prerequisites/verification.mdx @@ -0,0 +1,94 @@ +--- +title: Verification Checklist +description: Verify all prerequisites before deploying STT with Docker +--- + +## Pre-Deployment Checklist + +Before proceeding to installation, verify each item: + + + + ```bash + docker ps + ``` + Should execute without errors + + + + ```bash + docker compose version + ``` + Should show version 2.0 or higher + + + + ```bash + docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi + ``` + Should display GPU information + + + + ```bash + docker login quay.io + ``` + Should show "Login Succeeded" + + + + - [ ] License key obtained + - [ ] Container registry username and password + - [ ] Model download URLs + + + + ```bash + sudo netstat -tuln | grep -E '(7100|2233|6699|6379)' + ``` + Should return no results (ports free) + + + + ```bash + df -h / + ``` + Should show at least 100 GB available + + + +## Quick Verification Script + +Run this script to check all prerequisites at once: + +```bash +#!/bin/bash +echo "=== Docker STT Prerequisites Check ===" + +echo -n "Docker: " +docker --version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Docker Compose: " +docker compose version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "NVIDIA Driver: " +nvidia-smi &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "NVIDIA Container Toolkit: " +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Port 7100 (API Server): " +netstat -tuln 2>/dev/null | grep -q ':7100 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 2233 (Lightning ASR): " +netstat -tuln 2>/dev/null | grep -q ':2233 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 6699 (License Proxy): " +netstat -tuln 2>/dev/null | grep -q ':6699 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 6379 (Redis): " +netstat -tuln 2>/dev/null | grep -q ':6379 ' && echo "IN USE" || echo "FREE" + +echo "=== Check Complete ===" +``` + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/quick-start.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/quick-start.mdx new file mode 100644 index 0000000..a38f227 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/quick-start.mdx @@ -0,0 +1,320 @@ +--- +title: Quick Start +description: Deploy Smallest Self-Host Speech-to-Text with Docker Compose in under 15 minutes +--- + +## Overview + +This guide walks you through deploying Smallest Self-Host using Docker Compose. You'll have a fully functional speech-to-text service running in under 15 minutes. + + + Ensure you've completed all [prerequisites](/waves/self-host/docker-setup/stt-deployment/prerequisites/hardware-requirements) before + starting this guide. + + + +## Step 1: Create Project Directory + +Create a directory for your deployment: + +```bash +mkdir -p ~/smallest-self-host +cd ~/smallest-self-host +``` + +## Step 2: Login to Container Registry + +Authenticate with the Smallest container registry using credentials provided by support: + +```bash +docker login quay.io +``` + +Enter your username and password when prompted. + + + Save your credentials securely. You'll need them if you restart or redeploy + the containers. + + +## Step 3: Create Environment File + +Create a `.env` file with your license key: + +```bash +cat > .env << 'EOF' +LICENSE_KEY=your-license-key-here +EOF +``` + +Replace `your-license-key-here` with the actual license key provided by Smallest.ai. + + + Never commit your `.env` file to version control. Add it to `.gitignore` if + using git. + + +## Step 4: Create Docker Compose File + + + + **Best for:** Fast inference, real-time applications + + Create a `docker-compose.yml` file: + + ```yaml docker-compose.yml + version: "3.8" + + services: + lightning-asr: + image: quay.io/smallestinc/lightning-asr:latest + ports: + - "2233:2233" + environment: + - MODEL_URL=${MODEL_URL} + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_URL=redis://redis:6379 + - PORT=2233 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - smallest-network + + api-server: + image: quay.io/smallestinc/self-hosted-api-server:latest + container_name: api-server + environment: + - LICENSE_KEY=${LICENSE_KEY} + - LIGHTNING_ASR_BASE_URL=http://lightning-asr:2233 + - API_BASE_URL=http://license-proxy:3369 + ports: + - "7100:7100" + networks: + - smallest-network + restart: unless-stopped + depends_on: + - lightning-asr + - license-proxy + + license-proxy: + image: quay.io/smallestinc/license-proxy:latest + container_name: license-proxy + environment: + - LICENSE_KEY=${LICENSE_KEY} + networks: + - smallest-network + restart: unless-stopped + + redis: + image: redis:7-alpine + ports: + - "6379:6379" + networks: + - smallest-network + restart: unless-stopped + command: redis-server --appendonly yes + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + + networks: + smallest-network: + driver: bridge + name: smallest-network + ``` + + + + +## Step 5: Additional Configuration for Lightning ASR + + + + Add the model URL to your `.env` file (required for Lightning ASR): + + ```bash + echo "MODEL_URL=your-model-url-here" >> .env + ``` + + The MODEL_URL is provided by Smallest.ai support. + + + + +## Step 6: Start Services + +Launch all services with Docker Compose: + +```bash +docker compose up -d +``` + +## Step 7: Monitor Startup + +Watch the logs to monitor startup progress: + +```bash +docker compose logs -f +``` + +Look for these success indicators: + + + + ``` + redis-1 | Ready to accept connections + ``` + + + + ``` + license-proxy | License validated successfully + license-proxy | Server listening on port 3369 + ``` + + + + **Lightning ASR:** + ``` + lightning-asr-1 | Model loaded successfully + lightning-asr-1 | Server ready on port 2233 + ``` + + + + ``` + api-server | Connected to Lightning ASR + api-server | API server listening on port 7100 + ``` + + + + +## Common Startup Issues + + + + **Error:** `could not select device driver "nvidia"` + + **Solution:** + ```bash + sudo systemctl restart docker + docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi + ``` + + If this fails, reinstall NVIDIA Container Toolkit. + + + + **Error:** `License validation failed` + + **Solution:** + - Verify LICENSE_KEY in `.env` is correct + - Check internet connectivity + - Ensure firewall allows HTTPS to api.smallest.ai + + + + **Error:** `Failed to download model` + + **Solution:** + - Verify MODEL_URL in `.env` is correct + - Check disk space: `df -h` + - Check internet connectivity + + + + **Error:** `port is already allocated` + + **Solution:** + Check what's using the port: + ```bash + sudo lsof -i :7100 + ``` + + Either stop the conflicting service or change the port in docker-compose.yml + + + +## Managing Your Deployment + +### Stop Services + +```bash +docker compose stop +``` + +### Restart Services + +```bash +docker compose restart +``` + +### View Logs + +```bash +docker compose logs -f [service-name] +``` + +Examples: + +```bash +docker compose logs -f api-server +docker compose logs -f lightning-asr +``` + +### Update Images + +Pull latest images and restart: + +```bash +docker compose pull +docker compose up -d +``` + +### Remove Deployment + +Stop and remove all containers: + +```bash +docker compose down +``` + +Remove containers and volumes (including downloaded models): + +```bash +docker compose down -v +``` + + + Using `-v` flag will delete all data including downloaded models. They will + need to be re-downloaded on next startup. + + +## What's Next? + + + + Customize your deployment with advanced configuration options + + + + Learn about each service component in detail + + + + Debug common issues and optimize performance + + + + Integrate with your applications using the API + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/services-overview.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/services-overview.mdx new file mode 100644 index 0000000..875dd1c --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/stt/services-overview.mdx @@ -0,0 +1,546 @@ +--- +title: Services Overview +description: Detailed breakdown of each service component in the STT Docker deployment +--- + +## Architecture + +The Docker deployment consists of four main services that work together: + +```mermaid +graph LR + Client[Client] -->|HTTP/WebSocket| API[API Server :7100] + API -->|gRPC| ASR[Lightning ASR :2233] + API -->|HTTP| License[License Proxy :6699] + ASR -->|HTTP| License + ASR -->|Cache| Redis[Redis :6379] + License -->|HTTPS| External[Smallest License Server] + + style API fill:#07C983 + style ASR fill:#0D9373 + style License fill:#1E90FF + style Redis fill:#DC382D +``` + +## API Server + +The API Server is the main entry point for all client requests. + +### Purpose + +- Routes incoming API requests to Lightning ASR workers +- Manages WebSocket connections for streaming +- Handles request queuing and load balancing +- Provides unified API interface + +### Container Details + + + `quay.io/smallestinc/self-hosted-api-server:latest` + + + + `7100` - Main API endpoint + + + + - CPU: 0.5-2 cores + - Memory: 512 MB - 2 GB + - No GPU required + + +### Key Endpoints + + + + + + + + + + + + + + + + + + + + + + + + + + +
EndpointMethodPurpose
/healthGETHealth check
/v1/listenPOSTSynchronous transcription
/v1/listen/streamWebSocketStreaming transcription
+ +### Environment Variables + +```yaml +LICENSE_KEY: Your license key +LIGHTNING_ASR_BASE_URL: Internal URL to Lightning ASR +API_BASE_URL: Internal URL to License Proxy +``` + +### Logs + +Key log messages: + +``` +✓ Connected to Lightning ASR at http://lightning-asr:2233 +✓ License validation successful +✓ API server listening on port 7100 +``` + +### Dependencies + +- Requires Lightning ASR to be running +- Requires License Proxy for validation +- Optionally uses Redis for request coordination + +## Lightning ASR + +The core speech recognition engine powered by GPU acceleration. + +### Purpose + +- Performs audio-to-text transcription +- Processes both batch and streaming requests +- Manages GPU resources and model inference +- Handles audio preprocessing and postprocessing + +### Container Details + + + `quay.io/smallestinc/lightning-asr:latest` + + + + `2233` - ASR service endpoint + + + + - CPU: 4-8 cores + - Memory: 12-16 GB + - **GPU: 1x NVIDIA GPU (16+ GB VRAM)** + + +### GPU Requirements + +Lightning ASR requires NVIDIA GPU with CUDA support: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GPU ModelVRAMPerformance
A10040-80 GBExcellent
A1024 GBExcellent
L424 GBVery Good
T416 GBGood
+ +### Environment Variables + +```yaml +MODEL_URL: Download URL for ASR model +LICENSE_KEY: Your license key +REDIS_URL: Redis connection string +PORT: Service port (default 2233) +GPU_DEVICE_ID: GPU to use (for multi-GPU) +``` + +### Model Loading + +On first startup, Lightning ASR: + +1. Downloads model from MODEL_URL (~20 GB) +2. Validates model integrity +3. Loads model into GPU memory +4. Performs warmup inference + + +Use persistent volumes to cache models and avoid re-downloading on container restart. + + +### Logs + +Key log messages: + +``` +✓ GPU detected: NVIDIA A10 (24GB) +✓ Downloading model from URL... +✓ Model loaded successfully (5.2GB) +✓ Warmup completed in 3.2s +✓ Server ready on port 2233 +``` + +### Performance + +Typical performance metrics: + + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricValue
Real-time Factor0.05-0.15x
Cold Start30-60 seconds
Warm Inference50-200ms latency
Throughput100+ hours/hour (A10)
+ +### Dependencies + +- Requires License Proxy for validation +- Requires Redis for request coordination +- Requires NVIDIA GPU + +## License Proxy + +Validates license keys and reports usage to Smallest servers. + +### Purpose + +- Validates license keys on startup +- Reports usage metadata to Smallest +- Provides grace period for offline operation +- Acts as licensing gateway for all services + +### Container Details + + + `quay.io/smallestinc/license-proxy:latest` + + + + `6699` - License validation endpoint (internal) + + + + - CPU: 0.25-1 core + - Memory: 256-512 MB + - No GPU required + + +### Environment Variables + +```yaml +LICENSE_KEY: Your license key +``` + +### Network Requirements + + +License Proxy requires outbound HTTPS access to: +- `api.smallest.ai` on port 443 + +Ensure your firewall allows these connections. + + +### Validation Process + +1. On startup, validates license key with Smallest servers +2. Receives license terms and quotas +3. Caches validation (valid for grace period) +4. Periodically reports usage metadata + +### Usage Reporting + +License Proxy reports only metadata: + + + + + + + + + + + + + + + + + + + + + + + + + + +
Data ReportedExample
Audio duration3600 seconds
Request count150 requests
Features usedstreaming, punctuation
Response codes200, 400, 500
+ + +**No audio or transcript data is transmitted** to Smallest servers. + + +### Offline Mode + +If connection to license server fails: + +- Uses cached validation (24-hour grace period) +- Continues serving requests +- Logs warning messages +- Retries connection periodically + +### Logs + +Key log messages: + +``` +✓ License validated successfully +✓ License valid until: 2024-12-31 +✓ Server listening on port 6699 +⚠ Connection to license server failed, using cached validation +``` + +## Redis + +Provides caching and state management for the system. + +### Purpose + +- Request queuing and coordination +- Session state for streaming connections +- Caching of frequent requests +- Performance optimization + +### Container Details + + + `redis:latest` or `redis:7-alpine` + + + + `6379` - Redis protocol + + + + - CPU: 0.5-1 core + - Memory: 512 MB - 1 GB + - No GPU required + + +### Configuration Options + + + + Default configuration with minimal setup: + ```yaml + redis: + image: redis:latest + ports: + - "6379:6379" + ``` + + + + Enable data persistence: + ```yaml + redis: + image: redis:latest + command: redis-server --appendonly yes + volumes: + - redis-data:/data + ``` + + + + Add password protection: + ```yaml + redis: + image: redis:latest + command: redis-server --requirepass ${REDIS_PASSWORD} + ``` + + + + Use external Redis instance: + ```yaml + environment: + REDIS_URL: redis://external-host:6379 + ``` + Remove Redis service from docker-compose.yml + + + +### Data Stored + +Redis stores: + +- Request queue state +- WebSocket session data +- Temporary audio chunks (streaming) +- Worker status and health + + +Data in Redis is temporary and can be safely cleared. No persistent state is stored. + + +### Health Check + +Built-in health check: + +```yaml +healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 +``` + +## Service Dependencies + +Startup order and dependencies: + +```mermaid +graph TD + Redis[Redis] --> ASR[Lightning ASR] + License[License Proxy] --> ASR + ASR --> API[API Server] + License --> API + + style Redis fill:#DC382D + style License fill:#1E90FF + style ASR fill:#0D9373 + style API fill:#07C983 +``` + +### Recommended Startup Sequence + +1. **Redis** - Starts immediately (5 seconds) +2. **License Proxy** - Validates license (10-15 seconds) +3. **Lightning ASR** - Downloads/loads model (30-600 seconds) +4. **API Server** - Connects to services (5-10 seconds) + +## Resource Planning + +### Minimum Configuration + +For development/testing: + +```yaml +Total Resources: + CPU: 6 cores + Memory: 16 GB + GPU: 1x T4 (16 GB VRAM) + Storage: 100 GB +``` + +### Production Configuration + +For production workloads: + +```yaml +Total Resources: + CPU: 12 cores + Memory: 32 GB + GPU: 1x A10 (24 GB VRAM) + Storage: 200 GB +``` + +### Multi-Worker Configuration + +For high-volume production: + +```yaml +Total Resources: + CPU: 24 cores + Memory: 64 GB + GPU: 2x A10 (24 GB VRAM each) + Storage: 300 GB +``` + +## Monitoring + +### Container Health + +Check container status: + +```bash +docker compose ps +``` + +### Resource Usage + +Monitor resource consumption: + +```bash +docker stats +``` + +### GPU Usage + +Monitor GPU utilization: + +```bash +watch -n 1 nvidia-smi +``` + +### Logs + +View service logs: + +```bash +docker compose logs -f [service-name] +``` + +## What's Next? + + + + Customize service configuration and resource allocation + + + + Debug issues and optimize performance + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/configuration.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/configuration.mdx new file mode 100644 index 0000000..056f4b5 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/configuration.mdx @@ -0,0 +1,314 @@ +--- +title: Configuration +description: Advanced configuration options for TTS Docker deployments +--- + +## Overview + +This guide covers advanced configuration options for customizing your TTS Docker deployment. Learn how to optimize resources, configure external services, and tune performance. + +## Environment Variables + +All configuration is managed through environment variables in the `.env` file. + +### Core Configuration + + + Your Smallest.ai license key for validation and usage reporting + + +### API Server Configuration + + + Port for the API server to listen on + + ```bash + API_SERVER_PORT=8080 + ``` + + + + Internal URL for license proxy communication + + + + Internal URL for Lightning TTS communication + + +### Lightning TTS Configuration + + + Port for Lightning TTS to listen on + + ```bash + TTS_PORT=8876 + ``` + + + + Redis connection URL for caching and state management + + For external Redis: + ```bash + REDIS_URL=redis://external-redis.example.com:6379 + ``` + + With password: + ```bash + REDIS_URL=redis://:password@redis:6379 + ``` + + + + GPU device ID to use (for multi-GPU systems) + + ```bash + GPU_DEVICE_ID=0 + ``` + + +## Resource Configuration + +### GPU Allocation + +For systems with multiple GPUs, you can specify which GPU to use: + +```yaml docker-compose.yml +lightning-tts: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ['0'] + capabilities: [gpu] +``` + +For multiple GPUs per container: + +```yaml docker-compose.yml +lightning-tts: + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 2 + capabilities: [gpu] +``` + +### Memory Limits + +Set memory limits to prevent resource exhaustion: + +```yaml docker-compose.yml +services: + lightning-tts: + deploy: + resources: + limits: + memory: 16G + reservations: + memory: 12G +``` + +### CPU Limits + +Control CPU allocation: + +```yaml docker-compose.yml +services: + lightning-tts: + deploy: + resources: + limits: + cpus: '8' + reservations: + cpus: '4' +``` + +## External Services + +### External Redis + +Use an external Redis instance instead of the embedded one: + +```yaml docker-compose.yml +services: + api-server: + environment: + - REDIS_HOST=external-redis.example.com + - REDIS_PORT=6379 + - REDIS_PASSWORD=${REDIS_PASSWORD} + + lightning-tts: + environment: + - REDIS_HOST=external-redis.example.com + - REDIS_PORT=6379 + - REDIS_PASSWORD=${REDIS_PASSWORD} +``` + +Remove the Redis service from docker-compose.yml. + +### Custom Network + +Use a custom Docker network: + +```yaml docker-compose.yml +networks: + custom-network: + driver: bridge + name: my-custom-network + +services: + api-server: + networks: + - custom-network +``` + +## Performance Tuning + +### Voice Configuration + +Configure voice parameters: + +```yaml docker-compose.yml +lightning-tts: + environment: + - DEFAULT_VOICE=default + - VOICE_SPEED=1.0 + - VOICE_PITCH=1.0 +``` + +### Batch Processing + +Optimize for batch processing: + +```yaml docker-compose.yml +lightning-tts: + environment: + - BATCH_SIZE=8 + - MAX_QUEUE_SIZE=100 +``` + +### Model Precision + +Control model precision for performance: + +```yaml docker-compose.yml +lightning-tts: + environment: + - MODEL_PRECISION=fp16 +``` + +Options: `fp32`, `fp16`, `int8` + +## Volume Mounts + +### Persistent Model Cache + +Cache models to avoid re-downloading: + +```yaml docker-compose.yml +services: + lightning-tts: + volumes: + - tts-models:/app/models + +volumes: + tts-models: +``` + +### Log Persistence + +Persist logs for debugging: + +```yaml docker-compose.yml +services: + api-server: + volumes: + - ./logs/api-server:/app/logs + + lightning-tts: + volumes: + - ./logs/tts:/app/logs +``` + +## Health Checks + +Add health checks for better monitoring: + +```yaml docker-compose.yml +services: + lightning-tts: + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8876/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 60s + + api-server: + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7100/health"] + interval: 30s + timeout: 10s + retries: 3 +``` + +## Security Configuration + +### Read-Only Root Filesystem + +Enhance security with read-only root filesystem: + +```yaml docker-compose.yml +services: + api-server: + read_only: true + tmpfs: + - /tmp + - /var/tmp +``` + +### Non-Root User + +Run containers as non-root: + +```yaml docker-compose.yml +services: + api-server: + user: "1000:1000" +``` + +## Environment File Example + +Complete `.env` file example: + +```bash .env +LICENSE_KEY=your-license-key-here + +API_SERVER_PORT=7100 +TTS_PORT=8876 + +REDIS_HOST=redis +REDIS_PORT=6379 + +GPU_DEVICE_ID=0 + +DEFAULT_VOICE=default +VOICE_SPEED=1.0 +``` + +## What's Next? + + + + Learn about each TTS service component + + + + Debug configuration issues + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/docker-troubleshooting.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/docker-troubleshooting.mdx new file mode 100644 index 0000000..431ed26 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/docker-troubleshooting.mdx @@ -0,0 +1,513 @@ +--- +title: Docker Troubleshooting +description: Debug common issues and optimize your TTS Docker deployment +--- + +## Common Issues + +### GPU Not Accessible + +**Symptoms:** +- Error: `could not select device driver "nvidia"` +- Error: `no NVIDIA GPU devices found` +- Lightning TTS fails to start + +**Diagnosis:** + +```bash +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi +``` + + + + ```bash + sudo systemctl restart docker + docker compose up -d + ``` + + + + ```bash + sudo apt-get remove nvidia-container-toolkit + sudo apt-get update + sudo apt-get install -y nvidia-container-toolkit + + sudo systemctl restart docker + ``` + + + + ```bash + nvidia-smi + ``` + + If driver version is below 470, update: + ```bash + sudo ubuntu-drivers autoinstall + sudo reboot + ``` + + + + Verify `/etc/docker/daemon.json` contains: + ```json + { + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime", + "runtimeArgs": [] + } + } + } + ``` + + Restart Docker after changes: + ```bash + sudo systemctl restart docker + ``` + + + +### License Validation Failed + +**Symptoms:** +- Error: `License validation failed` +- Error: `Invalid license key` +- Services fail to start + +**Diagnosis:** + +Check license-proxy logs: +```bash +docker compose logs license-proxy +``` + + + + Check `.env` file: + ```bash + cat .env | grep LICENSE_KEY + ``` + + Ensure there are no: + - Extra spaces + - Quotes around the key + - Line breaks + + Correct format: + ```bash + LICENSE_KEY=abc123def456 + ``` + + + + Test connection to license server: + ```bash + curl -v https://api.smallest.ai + ``` + + If this fails, check: + - Firewall rules + - Proxy settings + - DNS resolution + + + + If the key appears correct and network is accessible, your license may be: + - Expired + - Revoked + - Invalid + + Contact **support@smallest.ai** with: + - Your license key + - License-proxy logs + - Error messages + + + +### Model Loading Failed + +**Symptoms:** +- Lightning TTS stuck at startup +- Error: `Failed to load model` +- Container keeps restarting + +**Diagnosis:** + +Check Lightning TTS logs: +```bash +docker compose logs lightning-tts +``` + + + + Verify GPU has enough VRAM: + ```bash + nvidia-smi + ``` + + Lightning TTS requires minimum 16GB VRAM. + + + + Models require space: + ```bash + df -h + ``` + + Free up space if needed: + ```bash + docker system prune -a + ``` + + + + Models may need more time to load: + ```yaml + lightning-tts: + healthcheck: + start_period: 120s + ``` + + + +### Port Already in Use + +**Symptoms:** +- Error: `port is already allocated` +- Error: `bind: address already in use` + +**Diagnosis:** + +Find what's using the port: +```bash +sudo lsof -i :7100 +sudo netstat -tulpn | grep 7100 +``` + + + + If another service is using the port: + ```bash + sudo systemctl stop [service-name] + ``` + + Or kill the process: + ```bash + sudo kill -9 [PID] + ``` + + + + Modify docker-compose.yml to use different port: + ```yaml + api-server: + ports: + - "8080:7100" + ``` + + Access API at http://localhost:8080 instead + + + + Old containers may still be bound: + ```bash + docker compose down + docker container prune -f + docker compose up -d + ``` + + + +### Out of Memory + +**Symptoms:** +- Container killed unexpectedly +- Error: `OOMKilled` +- System becomes unresponsive + +**Diagnosis:** + +Check container status: +```bash +docker compose ps +docker inspect [container-name] | grep OOMKilled +``` + + + + Lightning TTS requires minimum 16 GB RAM + + Check current memory: + ```bash + free -h + ``` + + + + Prevent one service from consuming all memory: + ```yaml + services: + lightning-tts: + deploy: + resources: + limits: + memory: 14G + reservations: + memory: 12G + ``` + + + + Add swap space (temporary solution): + ```bash + sudo fallocate -l 16G /swapfile + sudo chmod 600 /swapfile + sudo mkswap /swapfile + sudo swapon /swapfile + ``` + + + +### Slow Performance + +**Symptoms:** +- High latency (>500ms) +- Low throughput +- GPU underutilized + +**Diagnosis:** + +Monitor GPU usage: +```bash +watch -n 1 nvidia-smi +``` + +Check container resources: +```bash +docker stats +``` + + + + Ensure GPU is not throttling: + ```bash + nvidia-smi -q -d PERFORMANCE + ``` + + Enable persistence mode: + ```bash + sudo nvidia-smi -pm 1 + ``` + + + + ```yaml + lightning-tts: + deploy: + resources: + limits: + cpus: '8' + ``` + + + + Use Redis with persistence disabled for speed: + ```yaml + redis: + command: redis-server --save "" + ``` + + + +## Performance Optimization + +### Best Practices + + + + Reduces GPU initialization time: + ```bash + sudo nvidia-smi -pm 1 + ``` + + + + Allocate appropriate CPU/memory: + ```yaml + deploy: + resources: + limits: + cpus: '8' + memory: 14G + ``` + + + + Use monitoring tools: + ```bash + docker stats + nvidia-smi dmon + ``` + + + +### Benchmark Your Deployment + +Test TTS performance: + +```bash +time curl -X POST http://localhost:7100/v1/speak \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "text": "This is a test of the text-to-speech service.", + "voice": "default" + }' +``` + +Expected performance: +- **Cold start**: First request after container start (5-10 seconds) +- **Warm requests**: Subsequent requests (100-300ms) +- **Real-time factor**: 0.1-0.3x + +## Debugging Tools + +### View All Logs + +```bash +docker compose logs -f +``` + +### Follow Specific Service + +```bash +docker compose logs -f lightning-tts +``` + +### Last N Lines + +```bash +docker compose logs --tail=100 api-server +``` + +### Save Logs to File + +```bash +docker compose logs > deployment-logs.txt +``` + +### Execute Commands in Container + +```bash +docker compose exec lightning-tts bash +``` + +### Check Container Configuration + +```bash +docker inspect lightning-tts +``` + +### Network Debugging + +Test connectivity between containers: + +```bash +docker compose exec api-server ping lightning-tts +docker compose exec api-server curl http://lightning-tts:8876/health +``` + +## Health Checks + +### API Server + +```bash +curl http://localhost:7100/health +``` + +Expected: `{"status": "healthy"}` + +### Lightning TTS + +```bash +curl http://localhost:8876/health +``` + +Expected: `{"status": "ready", "gpu": "NVIDIA A10"}` + +### License Proxy + +```bash +docker compose exec license-proxy wget -q -O- http://localhost:3369/health +``` + +Expected: `{"status": "valid"}` + +### Redis + +```bash +docker compose exec redis redis-cli ping +``` + +Expected: `PONG` + +## Getting Help + +### Before Contacting Support + +Collect the following information: + + + + ```bash + docker version + docker compose version + nvidia-smi + uname -a + ``` + + + + ```bash + docker compose ps > status.txt + docker stats --no-stream > resources.txt + ``` + + + + ```bash + docker compose logs > all-logs.txt + ``` + + + + Sanitize and include: + - docker-compose.yml + - .env (remove license key) + + + +### Contact Support + +Email: **support@smallest.ai** + +Include: +- Description of the issue +- Steps to reproduce +- System information +- Logs and configuration +- License key (via secure channel) + +## What's Next? + + + + Advanced configuration options + + + + Integrate with your applications + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/credentials.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/credentials.mdx new file mode 100644 index 0000000..26698df --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/credentials.mdx @@ -0,0 +1,51 @@ +--- +title: Credentials & Access +description: License keys and registry credentials for TTS Docker deployment +--- + +## Required Credentials + +Before installation, obtain the following from Smallest.ai: + + + + Your unique license key for validation + + Contact: **support@smallest.ai** + + + + Credentials to pull Docker images: + - Registry URL: `quay.io` + - Username + - Password + - Email + + Contact: **support@smallest.ai** + + + + Download URLs for TTS models (if required) + + Contact: **support@smallest.ai** + + + +## Login to Container Registry + +Once you have your credentials, authenticate with the registry: + +```bash +docker login quay.io -u -p +``` + +## Environment Variables + +You'll need to set these in your deployment: + +```bash +export LICENSE_KEY="your-license-key" +export QUAY_USERNAME="your-username" +export QUAY_PASSWORD="your-password" +``` + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/hardware-requirements.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/hardware-requirements.mdx new file mode 100644 index 0000000..6c29386 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/hardware-requirements.mdx @@ -0,0 +1,52 @@ +--- +title: Hardware Requirements +description: Hardware specifications for deploying Text-to-Speech with Docker +--- + +## Minimum Specifications + + + + **4 cores** minimum + + 8+ cores recommended for production + + + + **16 GB** minimum + + 32+ GB recommended for production + + + + **NVIDIA GPU required** + + - L40s or A10 (recommended) + - A10, A100, H100, T4, L4 (supported) + - Minimum 16GB VRAM + + + + **100 GB** minimum + + - 50 GB for models + - 50 GB for logs and data + + + +## Network Requirements + +Ensure the following ports are available: + +| Port | Service | Purpose | +|------|---------|---------| +| 7100 | API Server | Client API requests | +| 8876 | Lightning TTS | TTS service endpoint | +| 3369 | License Proxy | Internal license validation | +| 6379 | Redis | Internal caching | + + +The License Proxy requires outbound HTTPS access to Smallest's license servers for validation. Ensure your firewall allows outbound connections to: +- `api.smallest.ai` (port 443) + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/software-requirements.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/software-requirements.mdx new file mode 100644 index 0000000..250c4b0 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/software-requirements.mdx @@ -0,0 +1,180 @@ +--- +title: Software Requirements +description: Software and dependencies for deploying Text-to-Speech with Docker +--- + +## Operating System + + + + ```bash + Ubuntu 20.04 LTS or later + Ubuntu 22.04 LTS (recommended) + Debian 11 or later + ``` + + + + ```bash + CentOS 8 or later + RHEL 8 or later + Rocky Linux 8 or later + ``` + + + + Most modern Linux distributions with kernel 4.15+ + + + +## Required Software + + + + Docker Engine 20.10 or later + + ```bash + docker --version + ``` + + Expected output: `Docker version 20.10.0 or higher` + + + + Docker Compose 2.0 or later + + ```bash + docker compose version + ``` + + Expected output: `Docker Compose version v2.0.0 or higher` + + + + NVIDIA Driver 525+ for newer GPUs (A10, A100, L4) + + NVIDIA Driver 470+ for older GPUs (T4, V100) + + ```bash + nvidia-smi + ``` + + Should display GPU information without errors + + + + Required for GPU access in containers + + ```bash + nvidia-container-cli --version + ``` + + + +## Install Docker + + + + ```bash + sudo apt-get update + sudo apt-get install -y ca-certificates curl gnupg + + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \ + sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \ + https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io \ + docker-buildx-plugin docker-compose-plugin + + sudo usermod -aG docker $USER + newgrp docker + ``` + + + + ```bash + sudo yum install -y yum-utils + sudo yum-config-manager --add-repo \ + https://download.docker.com/linux/centos/docker-ce.repo + + sudo yum install -y docker-ce docker-ce-cli containerd.io \ + docker-buildx-plugin docker-compose-plugin + + sudo systemctl start docker + sudo systemctl enable docker + + sudo usermod -aG docker $USER + newgrp docker + ``` + + + +## Install NVIDIA Driver + + + + ```bash + sudo apt-get update + sudo apt-get install -y ubuntu-drivers-common + + sudo ubuntu-drivers autoinstall + + sudo reboot + ``` + + After reboot, verify: + ```bash + nvidia-smi + ``` + + + + ```bash + sudo yum install -y kernel-devel kernel-headers gcc make + + distribution=$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.//g') + + sudo yum-config-manager --add-repo \ + http://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-$distribution.repo + + sudo yum clean all + sudo yum -y install nvidia-driver-latest-dkms + + sudo reboot + ``` + + + +## Install NVIDIA Container Toolkit + +```bash +distribution=$(. /etc/os-release;echo $ID$VERSION_ID) +curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | \ + sudo apt-key add - +curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + +sudo apt-get update +sudo apt-get install -y nvidia-container-toolkit + +sudo systemctl restart docker +``` + +## Verify GPU Access + +Test that Docker can access the GPU: + +```bash +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi +``` + +You should see your GPU information displayed. + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/verification.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/verification.mdx new file mode 100644 index 0000000..da2665c --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/prerequisites/verification.mdx @@ -0,0 +1,94 @@ +--- +title: Verification Checklist +description: Verify all prerequisites before deploying TTS with Docker +--- + +## Pre-Deployment Checklist + +Before proceeding to installation, verify each item: + + + + ```bash + docker ps + ``` + Should execute without errors + + + + ```bash + docker compose version + ``` + Should show version 2.0 or higher + + + + ```bash + docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi + ``` + Should display GPU information + + + + ```bash + docker login quay.io + ``` + Should show "Login Succeeded" + + + + - [ ] License key obtained + - [ ] Container registry username and password + - [ ] Model download URLs (if required) + + + + ```bash + sudo netstat -tuln | grep -E '(7100|8876|3369|6379)' + ``` + Should return no results (ports free) + + + + ```bash + df -h / + ``` + Should show at least 100 GB available + + + +## Quick Verification Script + +Run this script to check all prerequisites at once: + +```bash +#!/bin/bash +echo "=== Docker TTS Prerequisites Check ===" + +echo -n "Docker: " +docker --version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Docker Compose: " +docker compose version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "NVIDIA Driver: " +nvidia-smi &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "NVIDIA Container Toolkit: " +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Port 7100 (API Server): " +netstat -tuln 2>/dev/null | grep -q ':7100 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 8876 (Lightning TTS): " +netstat -tuln 2>/dev/null | grep -q ':8876 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 3369 (License Proxy): " +netstat -tuln 2>/dev/null | grep -q ':3369 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 6379 (Redis): " +netstat -tuln 2>/dev/null | grep -q ':6379 ' && echo "IN USE" || echo "FREE" + +echo "=== Check Complete ===" +``` + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/quick-start.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/quick-start.mdx new file mode 100644 index 0000000..1d3bdb1 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/quick-start.mdx @@ -0,0 +1,344 @@ +--- +title: Quick Start +description: Deploy Smallest Self-Host Text-to-Speech with Docker Compose in under 15 minutes +--- + +## Overview + +This guide walks you through deploying Smallest Self-Host Text-to-Speech (TTS) using Docker Compose. You'll have a fully functional text-to-speech service running in under 15 minutes. + + +Ensure you've completed all [prerequisites](/waves/self-host/docker-setup/tts-deployment/prerequisites/hardware-requirements) before starting this guide. + + +## Step 1: Create Project Directory + +Create a directory for your deployment: + +```bash +mkdir -p ~/smallest-tts +cd ~/smallest-tts +``` + +## Step 2: Login to Container Registry + +Authenticate with the Smallest container registry using credentials provided by support: + +```bash +docker login quay.io +``` + +Enter your username and password when prompted. + + +Save your credentials securely. You'll need them if you restart or redeploy the containers. + + +## Step 3: Create Environment File + +Create a `.env` file with your license key: + +```bash +cat > .env << 'EOF' +LICENSE_KEY=your-license-key-here +EOF +``` + +Replace `your-license-key-here` with the actual license key provided by Smallest.ai. + + +Never commit your `.env` file to version control. Add it to `.gitignore` if using git. + + +## Step 4: Create Docker Compose File + +Create a `docker-compose.yml` file for TTS deployment: + +```yaml docker-compose.yml +version: "3.8" + +services: + lightning-tts: + image: quay.io/smallestinc/lightning-tts:latest + ports: + - "8876:8876" + environment: + - LICENSE_KEY=${LICENSE_KEY} + - REDIS_HOST=redis + - REDIS_PORT=6379 + - PORT=8876 + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + restart: unless-stopped + networks: + - smallest-network + + api-server: + image: quay.io/smallestinc/self-hosted-api-server:latest + container_name: api-server + environment: + - LICENSE_KEY=${LICENSE_KEY} + - LIGHTNING_TTS_BASE_URL=http://lightning-tts:8876 + - API_BASE_URL=http://license-proxy:3369 + - REDIS_HOST=redis + - REDIS_PORT=6379 + ports: + - "7100:7100" + networks: + - smallest-network + restart: unless-stopped + depends_on: + - lightning-tts + - license-proxy + + license-proxy: + image: quay.io/smallestinc/license-proxy:latest + container_name: license-proxy + environment: + - LICENSE_KEY=${LICENSE_KEY} + - PORT=3369 + networks: + - smallest-network + restart: unless-stopped + + redis: + image: redis:7-alpine + container_name: redis-server + ports: + - "6379:6379" + networks: + - smallest-network + restart: unless-stopped + command: redis-server --appendonly yes + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 + +networks: + smallest-network: + driver: bridge + name: smallest-network +``` + +## Step 5: Start Services + +Launch all services with Docker Compose: + +```bash +docker compose up -d +``` + + + + First startup will take 3-5 minutes as the system: + 1. Pulls container images (~15-25 GB, includes TTS models) + 2. Initializes GPU and loads models + + Models are embedded in the container - no separate download needed. + + + + After the first run, startup takes 30-60 seconds as images are cached. + + + +## Step 6: Monitor Startup + +Watch the logs to monitor startup progress: + +```bash +docker compose logs -f +``` + +Look for these success indicators: + + + + ``` + redis-server | Ready to accept connections + ``` + + + + ``` + license-proxy | License validated successfully + license-proxy | Server listening on port 3369 + ``` + + + + ``` + lightning-tts | Model loaded successfully + lightning-tts | Server ready on port 8876 + ``` + + + + ``` + api-server | Connected to Lightning TTS + api-server | API server listening on port 7100 + ``` + + + +Press `Ctrl+C` to stop following logs. + +## Step 7: Verify Installation + +Check that all containers are running: + +```bash +docker compose ps +``` + +Expected output: + +``` +NAME IMAGE STATUS +api-server quay.io/smallestinc/self-hosted-api-server Up +license-proxy quay.io/smallestinc/license-proxy Up +lightning-tts quay.io/smallestinc/lightning-tts Up +redis-server redis:7-alpine Up (healthy) +``` + +## Step 8: Test API + +Test the API with a sample request: + +```bash +curl -X POST http://localhost:7100/v1/speak \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Hello, this is a test of the text-to-speech service.", + "voice": "default" + }' +``` + + +Or use the health check endpoint first: + +```bash +curl http://localhost:7100/health +``` + +Expected response: `{"status": "healthy"}` + + +## Common Startup Issues + + + + **Error:** `could not select device driver "nvidia"` + + **Solution:** + ```bash + sudo systemctl restart docker + docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi + ``` + + If this fails, reinstall NVIDIA Container Toolkit. + + + + **Error:** `License validation failed` + + **Solution:** + - Verify LICENSE_KEY in `.env` is correct + - Check internet connectivity + - Ensure firewall allows HTTPS to api.smallest.ai + + + + **Error:** `port is already allocated` + + **Solution:** + Check what's using the port: + ```bash + sudo lsof -i :7100 + ``` + + Either stop the conflicting service or change the port in docker-compose.yml + + + +## Managing Your Deployment + +### Stop Services + +```bash +docker compose stop +``` + +### Restart Services + +```bash +docker compose restart +``` + +### View Logs + +```bash +docker compose logs -f [service-name] +``` + +Examples: +```bash +docker compose logs -f api-server +docker compose logs -f lightning-tts +``` + +### Update Images + +Pull latest images and restart: + +```bash +docker compose pull +docker compose up -d +``` + +### Remove Deployment + +Stop and remove all containers: + +```bash +docker compose down +``` + +Remove containers and volumes: + +```bash +docker compose down -v +``` + + +Using `-v` flag will delete all data. Models will need to be re-downloaded on next startup. + + +## What's Next? + + + + Customize your TTS deployment with advanced configuration options + + + + Learn about each TTS service component in detail + + + + Debug common issues and optimize performance + + + + Integrate with your applications using the API + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/services-overview.mdx b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/services-overview.mdx new file mode 100644 index 0000000..6861ce0 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/docker/tts/services-overview.mdx @@ -0,0 +1,532 @@ +--- +title: Services Overview +description: Detailed breakdown of each service component in the TTS Docker deployment +--- + +## Architecture + +The TTS Docker deployment consists of four main services that work together: + +```mermaid +graph LR + Client[Client] -->|HTTP/WebSocket| API[API Server :7100] + API -->|HTTP| TTS[Lightning TTS :8876] + API -->|HTTP| License[License Proxy :3369] + TTS -->|HTTP| License + TTS -->|Cache| Redis[Redis :6379] + License -->|HTTPS| External[Smallest License Server] + + style API fill:#07C983 + style TTS fill:#0D9373 + style License fill:#1E90FF + style Redis fill:#DC382D +``` + +## API Server + +The API Server is the main entry point for all client requests. + +### Purpose + +- Routes incoming API requests to Lightning TTS workers +- Manages WebSocket connections for streaming +- Handles request queuing and load balancing +- Provides unified API interface + +### Container Details + + + `quay.io/smallestinc/self-hosted-api-server:latest` + + + + `7100` - Main API endpoint + + + + - CPU: 0.5-2 cores + - Memory: 512 MB - 2 GB + - No GPU required + + +### Key Endpoints + + + + + + + + + + + + + + + + + + + + + + + + + + +
EndpointMethodPurpose
/healthGETHealth check
/v1/speakPOSTSynchronous text-to-speech
/v1/speak/streamWebSocketStreaming text-to-speech
+ +### Environment Variables + +```yaml +LICENSE_KEY: Your license key +LIGHTNING_TTS_BASE_URL: Internal URL to Lightning TTS +API_BASE_URL: Internal URL to License Proxy +``` + +### Logs + +Key log messages: + +``` +✓ Connected to Lightning TTS at http://lightning-tts:8876 +✓ License validation successful +✓ API server listening on port 7100 +``` + +### Dependencies + +- Requires Lightning TTS to be running +- Requires License Proxy for validation +- Optionally uses Redis for request coordination + +## Lightning TTS + +The core text-to-speech engine powered by GPU acceleration. + +### Purpose + +- Converts text to high-quality speech audio +- Processes both batch and streaming requests +- Manages GPU resources and model inference +- Handles voice synthesis and audio generation + +### Container Details + + + `quay.io/smallestinc/lightning-tts:latest` + + + + `8876` - TTS service endpoint + + + + - CPU: 4-8 cores + - Memory: 12-16 GB + - **GPU: 1x NVIDIA GPU (16+ GB VRAM)** + + +### GPU Requirements + +Lightning TTS requires NVIDIA GPU with CUDA support: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GPU ModelVRAMPerformance
A10040-80 GBExcellent
A1024 GBExcellent
L424 GBVery Good
T416 GBGood
+ +### Environment Variables + +```yaml +LICENSE_KEY: Your license key +REDIS_URL: Redis connection string +PORT: Service port (default 8876) +GPU_DEVICE_ID: GPU to use (for multi-GPU) +``` + +### Model Loading + +On first startup, Lightning TTS: + +1. Loads TTS models from container (embedded) +2. Validates model integrity +3. Loads model into GPU memory +4. Performs warmup inference + + +Models are embedded in the container - no separate download needed. + + +### Logs + +Key log messages: + +``` +✓ GPU detected: NVIDIA A10 (24GB) +✓ Model loaded successfully +✓ Warmup completed in 3.2s +✓ Server ready on port 8876 +``` + +### Performance + +Typical performance metrics: + + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricValue
Real-time Factor0.1-0.3x
Cold Start30-60 seconds
Warm Inference100-300ms latency
Throughput50+ hours/hour (A10)
+ +### Dependencies + +- Requires License Proxy for validation +- Requires Redis for request coordination +- Requires NVIDIA GPU + +## License Proxy + +Validates license keys and reports usage to Smallest servers. + +### Purpose + +- Validates license keys on startup +- Reports usage metadata to Smallest +- Provides grace period for offline operation +- Acts as licensing gateway for all services + +### Container Details + + + `quay.io/smallestinc/license-proxy:latest` + + + + `3369` - License validation endpoint (internal) + + + + - CPU: 0.25-1 core + - Memory: 256-512 MB + - No GPU required + + +### Environment Variables + +```yaml +LICENSE_KEY: Your license key +``` + +### Network Requirements + + +License Proxy requires outbound HTTPS access to: +- `api.smallest.ai` on port 443 + +Ensure your firewall allows these connections. + + +### Validation Process + +1. On startup, validates license key with Smallest servers +2. Receives license terms and quotas +3. Caches validation (valid for grace period) +4. Periodically reports usage metadata + +### Usage Reporting + +License Proxy reports only metadata: + + + + + + + + + + + + + + + + + + + + + + + + + + +
Data ReportedExample
Audio duration3600 seconds
Request count150 requests
Features usedstreaming, voice selection
Response codes200, 400, 500
+ + +**No audio or transcript data is transmitted** to Smallest servers. + + +### Offline Mode + +If connection to license server fails: + +- Uses cached validation (24-hour grace period) +- Continues serving requests +- Logs warning messages +- Retries connection periodically + +### Logs + +Key log messages: + +``` +✓ License validated successfully +✓ License valid until: 2024-12-31 +✓ Server listening on port 3369 +⚠ Connection to license server failed, using cached validation +``` + +## Redis + +Provides caching and state management for the system. + +### Purpose + +- Request queuing and coordination +- Session state for streaming connections +- Caching of frequent requests +- Performance optimization + +### Container Details + + + `redis:latest` or `redis:7-alpine` + + + + `6379` - Redis protocol + + + + - CPU: 0.5-1 core + - Memory: 512 MB - 1 GB + - No GPU required + + +### Configuration Options + + + + Default configuration with minimal setup: + ```yaml + redis: + image: redis:latest + ports: + - "6379:6379" + ``` + + + + Enable data persistence: + ```yaml + redis: + image: redis:latest + command: redis-server --appendonly yes + volumes: + - redis-data:/data + ``` + + + + Add password protection: + ```yaml + redis: + image: redis:latest + command: redis-server --requirepass ${REDIS_PASSWORD} + ``` + + + + Use external Redis instance: + ```yaml + environment: + REDIS_URL: redis://external-host:6379 + ``` + Remove Redis service from docker-compose.yml + + + +### Data Stored + +Redis stores: + +- Request queue state +- WebSocket session data +- Temporary audio chunks (streaming) +- Worker status and health + + +Data in Redis is temporary and can be safely cleared. No persistent state is stored. + + +### Health Check + +Built-in health check: + +```yaml +healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 3s + retries: 5 +``` + +## Service Dependencies + +Startup order and dependencies: + +```mermaid +graph TD + Redis[Redis] --> TTS[Lightning TTS] + License[License Proxy] --> TTS + TTS --> API[API Server] + License --> API + + style Redis fill:#DC382D + style License fill:#1E90FF + style TTS fill:#0D9373 + style API fill:#07C983 +``` + +### Recommended Startup Sequence + +1. **Redis** - Starts immediately (5 seconds) +2. **License Proxy** - Validates license (10-15 seconds) +3. **Lightning TTS** - Loads models (30-60 seconds) +4. **API Server** - Connects to services (5-10 seconds) + +## Resource Planning + +### Minimum Configuration + +For development/testing: + +```yaml +Total Resources: + CPU: 6 cores + Memory: 16 GB + GPU: 1x T4 (16 GB VRAM) + Storage: 100 GB +``` + +### Production Configuration + +For production workloads: + +```yaml +Total Resources: + CPU: 12 cores + Memory: 32 GB + GPU: 1x A10 (24 GB VRAM) + Storage: 200 GB +``` + +## Monitoring + +### Container Health + +Check container status: + +```bash +docker compose ps +``` + +### Resource Usage + +Monitor resource consumption: + +```bash +docker stats +``` + +### GPU Usage + +Monitor GPU utilization: + +```bash +watch -n 1 nvidia-smi +``` + +### Logs + +View service logs: + +```bash +docker compose logs -f [service-name] +``` + +## What's Next? + + + + Customize service configuration and resource allocation + + + + Debug issues and optimize performance + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/getting-started/architecture.mdx b/fern/products/waves/pages/v4.0.0/on-prem/getting-started/architecture.mdx new file mode 100644 index 0000000..7dfd9df --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/getting-started/architecture.mdx @@ -0,0 +1,86 @@ +--- +title: Architecture Overview +description: Understanding the components and architecture of Smallest Self-Host deployments +--- + +## System Architecture + +```mermaid +graph TB + Client[Client Applications] -->|HTTP/WebSocket| API[API Server] + API -->|STT Requests| ASR[Lightning ASR] + API -->|TTS Requests| TTS[Lightning TTS] + API -->|Validate License| LP[License Proxy] + LP -->|Report Usage| LS[Smallest License Server] + + subgraph YourInfrastructure[Your Infrastructure] + API + ASR + TTS + LP + end + + subgraph SmallestCloud[Smallest Cloud] + LS + end + + style ASR fill:#0D9373 + style TTS fill:#0D9373 + style API fill:#07C983 + style LP fill:#1E90FF + style LS fill:#FF6B6B +``` + +## Components + + + + Routes requests to Lightning ASR/TTS workers, manages WebSocket connections, and provides a unified REST API interface. + + **Resources:** 0.5-2 CPU cores, 512 MB - 2 GB RAM, no GPU + + + + GPU-accelerated speech-to-text engine with 0.05-0.15x real-time factor. Supports real-time and batch transcription. + + **Resources:** 4-8 CPU cores, 12-16 GB RAM, 1x NVIDIA GPU (16+ GB VRAM) + + + + GPU-accelerated text-to-speech engine for natural voice synthesis. Supports streaming and batch generation. + + **Resources:** 4-8 CPU cores, 12-16 GB RAM, 1x NVIDIA GPU (16+ GB VRAM) + + + + Validates license keys and reports usage metadata. Supports offline grace periods. + + **Resources:** 0.25-1 CPU core, 256-512 MB RAM, no GPU + + + + Request queuing, session state, and caching. Can use embedded or external (ElastiCache). + + **Resources:** 0.5-1 CPU core, 512 MB - 2 GB RAM, no GPU + + + +## Data Flow + +1. **Client Request** — Your application sends audio (STT) or text (TTS) via HTTP or WebSocket +2. **API Server** — Routes the request to the appropriate worker and validates the license +3. **Worker Processing** — Lightning ASR or TTS processes the request on GPU +4. **Response** — Results stream back through the API server to your application + +All processing happens within your infrastructure. Only license validation metadata is sent to Smallest Cloud. + +## What's Next? + + + + License key, credentials, and infrastructure requirements + + + Benefits of self-hosting for your use case + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/getting-started/introduction.mdx b/fern/products/waves/pages/v4.0.0/on-prem/getting-started/introduction.mdx new file mode 100644 index 0000000..bce6352 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/getting-started/introduction.mdx @@ -0,0 +1,43 @@ +--- +title: Introduction +description: Deploy high-performance speech-to-text and text-to-speech models in your own infrastructure +--- + +Smallest Self-Host enables you to get the same powerful TTS and STT capabilities as our cloud service while keeping your data under complete control. + +## Deployment Options + + + + Deploy speech-to-text with Docker. Best for development, testing, and small-scale production. + + + Deploy text-to-speech with Docker. Quick setup for voice synthesis workloads. + + + Production-grade STT with autoscaling and high availability on Kubernetes. + + + + + Kubernetes deployment is currently available for **STT only**. TTS Kubernetes support is coming soon. + + +## Resources + + + + System components and data flow + + + Benefits of self-hosting + + + Requirements and credentials + + + +## Support + +- **Email**: [support@smallest.ai](mailto:support@smallest.ai) +- **Discord**: [Join our community](https://discord.gg/5evETqguJs) diff --git a/fern/products/waves/pages/v4.0.0/on-prem/getting-started/prerequisites.mdx b/fern/products/waves/pages/v4.0.0/on-prem/getting-started/prerequisites.mdx new file mode 100644 index 0000000..1a69168 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/getting-started/prerequisites.mdx @@ -0,0 +1,176 @@ +--- +title: Prerequisites +description: What you need before deploying Smallest Self-Host +--- + +## Overview + +Before deploying Smallest Self-Host, you'll need credentials from Smallest.ai and infrastructure with GPU support. + +## Credentials from Smallest.ai + +Contact **support@smallest.ai** to obtain the following: + + + + Your unique license key for validation. This is required for all deployments. + + You'll add this to your configuration: + ```yaml + global: + licenseKey: "your-license-key-here" + ``` + + Or as an environment variable: + ```bash + LICENSE_KEY=your-license-key-here + ``` + + + + Credentials to pull Docker images from `quay.io`: + - **Username** + - **Password** + - **Email** + + Login to the registry: + ```bash + docker login quay.io + ``` + + For Kubernetes, you'll add these to your `values.yaml`: + ```yaml + global: + imageCredentials: + create: true + registry: quay.io + username: "your-username" + password: "your-password" + email: "your-email@example.com" + ``` + + + + Download URLs for the AI models (STT and/or TTS). + + For Docker deployments, add to your `.env`: + ```bash + MODEL_URL=your-model-url-here + ``` + + For Kubernetes, add to `values.yaml`: + ```yaml + models: + asrModelUrl: "your-asr-model-url" + ttsModelUrl: "your-tts-model-url" + ``` + + + +## Infrastructure Requirements + + + + - **NVIDIA GPU** with 16+ GB VRAM + - Recommended: A10, L4, L40s, T4, or A100 + - NVIDIA Driver 525+ (for A10, A100, L4) + - NVIDIA Driver 470+ (for T4, V100) + + + + - Docker 20.10+ or Podman 4.0+ + - NVIDIA Container Toolkit + - For Kubernetes: GPU Operator or Device Plugin + + + +### Minimum Resources + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ComponentCPUMemoryGPUStorage
Lightning ASR4-8 cores12-16 GB1x NVIDIA (16+ GB VRAM)50+ GB
Lightning TTS4-8 cores12-16 GB1x NVIDIA (16+ GB VRAM)20+ GB
API Server0.5-2 cores512 MB - 2 GBNone1 GB
License Proxy0.25-1 core256-512 MBNone100 MB
Redis0.5-1 core512 MB - 2 GBNone1 GB
+ +## Network Requirements + +The License Proxy requires outbound HTTPS access to validate licenses: + + + + + + + + + + + + + + + + +
EndpointPortPurpose
api.smallest.ai443License validation and usage reporting
+ + +Ensure your firewall and network policies allow outbound HTTPS traffic to `api.smallest.ai`. + + +## Next Steps + +Choose your deployment method and follow the specific prerequisites: + + + + Setup requirements for Docker deployments including NVIDIA Container Toolkit installation. + + + + Cluster requirements, GPU node setup, and Helm configuration for Kubernetes deployments. + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/getting-started/why-self-host.mdx b/fern/products/waves/pages/v4.0.0/on-prem/getting-started/why-self-host.mdx new file mode 100644 index 0000000..862d978 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/getting-started/why-self-host.mdx @@ -0,0 +1,181 @@ +--- +title: Why Self-Host? +description: Understand when self-hosting our models makes sense for your organization +--- + +## Overview + +Using Smallest as a managed service has many benefits: it's fast to start developing with, requires no infrastructure setup, and eliminates all hardware, installation, configuration, backup, and maintenance-related costs. However, there are situations where a self-hosted deployment makes more sense. + +## Performance Requirements + +Certain use cases have very sensitive latency and load requirements. If you need ultra-low latency with voice AI services colocated with your other services, self-hosting can meet these requirements. + + + + - **Real-time AI voicebots** requiring sub-100ms response times + - **Live transcription systems** for broadcasts or conferences + - **High-volume processing** with predictable costs + - **Edge deployments** with limited internet connectivity + + + + - Colocate speech services with your application infrastructure + - Scale independently based on your specific workload patterns + - Zero network latency to external APIs + - Consistent performance regardless of internet conditions + + + +### Zero Network Latency + +When you self-host, your speech services run within your own infrastructure—whether that's the same data center, VPC, or even the same machine as your application. This eliminates the round-trip time to external APIs entirely. + + + + + + + + + + + + + + + + + + + + + + + + + + +
ScenarioNetwork Latency
Self-hosted1-5ms
Same region20-50ms
Cross-region100-200ms
Edge/on-premises200-500ms+
+ +For real-time voice applications like AI agents, every millisecond matters. Self-hosting keeps your latency predictable and minimal, regardless of where your users are located or the state of the public internet. + +### Security & Data Privacy + +One of the most common use cases for self-hosting Smallest is to satisfy security or data privacy requirements. In a typical self-hosted deployment, no audio, transcripts, or other identifying markers of the request content are sent to Smallest servers. + + + + - **Healthcare applications** requiring HIPAA compliance + - **Financial services** with strict data governance + - **Government and defense** applications + - **Enterprise environments** with air-gapped networks + + + + - Your audio data never leaves your infrastructure + - Transcripts remain entirely within your control + - No data stored beyond the duration of the API request + - Self-hosted deployments do not persist request/response data + + + +### What Data is Reported? + + +In a typical self-hosted deployment, no audio or transcript data is sent to Smallest servers. Only usage metadata is reported to the license server for validation and billing purposes. + + +**Metadata reported:** +- Audio duration and character count +- Features requested (diarization, timestamps, etc.) +- Success/error response codes + +**Never reported:** +- Audio content +- Transcripts or synthesis output +- Personally identifiable information + +### Cost Optimization + +For high-volume or predictable workloads, self-hosting can be more cost-effective than per-request API pricing. + + + + + + + + + + + + + + + + + + + + + + +
BenefitDescription
Predictable costsInfrastructure-based pricing, not usage-based
Efficient utilizationPredictable autoscaling maximizes resource efficiency
Long-term savingsSignificant cost reduction for sustained high volumes
+ +### Reliability & Grace Periods + +Self-hosted deployments include built-in resilience against unforeseen network errors and temporary outages. The deployment won't suddenly stop working due to a transient network issue or external service disruption. + +This means: +- **Continuous operation** during network interruptions or license server maintenance +- **Protection against unforeseen errors** — your services keep running while issues are resolved +- **Time to recover** — grace periods provide a buffer to restore connectivity without impacting your users + + +The License Proxy supports **grace periods** that allow your deployment to continue operating even if connectivity to the Smallest license server is temporarily lost. + + +## Customization & Control + +Self-hosting provides complete control over your deployment: + + + + Optimize compute resources for your specific workload patterns. Allocate more GPU power during peak hours and scale down during off-peak times. + + + + Upgrade on your schedule. Test new versions in staging before production rollout. Roll back instantly if needed. + + + + Deploy in private networks, VPCs, or air-gapped environments. Full control over ingress and egress traffic. + + + + Direct integration with your monitoring, logging, and alerting infrastructure. Custom Prometheus metrics, Grafana dashboards, and alerting rules. + + + +## When to Use Managed Service Instead + +Self-hosting isn't always the right choice. Consider the managed Smallest API if: + +- You're building a prototype or MVP +- Your audio processing volume is low or unpredictable +- You don't have DevOps resources to manage infrastructure +- You need to get started quickly without infrastructure setup + +## Ready to Self-Host? + + + + Return to the introduction for deployment options + + + + Deploy in 15 minutes with Docker + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/cluster-autoscaler.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/cluster-autoscaler.mdx new file mode 100644 index 0000000..c063c6a --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/cluster-autoscaler.mdx @@ -0,0 +1,548 @@ +--- +title: Cluster Autoscaler +description: Automatically scale EKS cluster nodes based on pod resource requirements +--- + +## Overview + +The Cluster Autoscaler automatically adjusts the number of nodes in your EKS cluster based on pending pods and resource utilization. When combined with HPA, it provides end-to-end autoscaling from application load to infrastructure capacity. + +## How It Works + +```mermaid +graph TD + HPA[HPA] -->|Scales Pods| Deployment[Deployment] + Deployment -->|Creates| Pods[New Pods] + Pods -->|Status: Pending| CA[Cluster Autoscaler] + CA -->|Checks| ASG[Auto Scaling Group] + CA -->|Adds Nodes| ASG + ASG -->|Provisions| Nodes[EC2 Instances] + Nodes -->|Registers| K8s[Kubernetes] + K8s -->|Schedules| Pods + + style CA fill:#0D9373 + style HPA fill:#07C983 +``` + +**Flow**: +1. HPA scales pods based on metrics +2. New pods enter "Pending" state (insufficient resources) +3. Cluster Autoscaler detects pending pods +4. Adds nodes to Auto Scaling Group +5. Pods scheduled on new nodes +6. After scale-down period, removes underutilized nodes + +## Prerequisites + + + + Create IAM role with autoscaling permissions (see [IAM & IRSA](/waves/self-host/kubernetes-setup/aws/iam-irsa)) + + + + Ensure node groups have proper tags: + + ``` + k8s.io/cluster-autoscaler/: owned + k8s.io/cluster-autoscaler/enabled: true + ``` + + + + IRSA-enabled service account for Cluster Autoscaler + + + +## Installation + +### Using Helm Chart + +The Smallest Self-Host chart includes Cluster Autoscaler as a dependency: + +```yaml values.yaml +cluster-autoscaler: + enabled: true + rbac: + serviceAccount: + name: cluster-autoscaler + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::YOUR_ACCOUNT_ID:role/cluster-autoscaler-role + autoDiscovery: + clusterName: smallest-cluster + awsRegion: us-east-1 + + extraArgs: + balance-similar-node-groups: true + skip-nodes-with-system-pods: false + scale-down-delay-after-add: 5m + scale-down-unneeded-time: 10m +``` + +Deploy: + +```bash +helm upgrade --install smallest-self-host smallest-self-host/smallest-self-host \ + -f values.yaml \ + --namespace smallest +``` + +### Standalone Installation + +Install Cluster Autoscaler separately: + +```bash +helm repo add autoscaler https://kubernetes.github.io/autoscaler +helm repo update + +helm install cluster-autoscaler autoscaler/cluster-autoscaler \ + --namespace kube-system \ + --set autoDiscovery.clusterName=smallest-cluster \ + --set awsRegion=us-east-1 \ + --set rbac.serviceAccount.annotations."eks\.amazonaws\.com/role-arn"=arn:aws:iam::ACCOUNT_ID:role/cluster-autoscaler-role +``` + +## Configuration + +### Auto-Discovery + +Auto-discover Auto Scaling Groups by cluster name: + +```yaml +autoDiscovery: + clusterName: smallest-cluster + tags: + - k8s.io/cluster-autoscaler/enabled + - k8s.io/cluster-autoscaler/smallest-cluster +``` + +### Manual Configuration + +Explicitly specify Auto Scaling Groups: + +```yaml +autoscalingGroups: + - name: eks-cpu-nodes + minSize: 1 + maxSize: 10 + - name: eks-gpu-nodes + minSize: 0 + maxSize: 20 +``` + +### Scale-Down Configuration + +Control when and how nodes are removed: + +```yaml +extraArgs: + scale-down-enabled: true + scale-down-delay-after-add: 10m + scale-down-unneeded-time: 10m + scale-down-utilization-threshold: 0.5 + max-graceful-termination-sec: 600 +``` + +**Parameters**: +- `scale-down-delay-after-add`: Wait time after adding node before considering scale-down +- `scale-down-unneeded-time`: How long node must be underutilized before removal +- `scale-down-utilization-threshold`: CPU/memory threshold (0.5 = 50%) +- `max-graceful-termination-sec`: Max time for pod eviction + +### Node Group Priorities + +Scale specific node groups first: + +```yaml +extraArgs: + expander: priority + +priorityConfigMapAnnotations: + cluster-autoscaler.kubernetes.io/expander-priorities: | + 10: + - .*-spot-.* + 50: + - .*-ondemand-.* +``` + +Priorities: +- Higher number = higher priority +- Regex patterns match node group names +- Useful for preferring spot instances + +## Verify Installation + +### Check Cluster Autoscaler Pod + +```bash +kubectl get pods -n kube-system -l app.kubernetes.io/name=aws-cluster-autoscaler +``` + +### Check Logs + +```bash +kubectl logs -n kube-system -l app.kubernetes.io/name=aws-cluster-autoscaler -f +``` + +Look for: +``` +Starting cluster autoscaler +Auto-discovery enabled +Discovered node groups: [eks-gpu-nodes, eks-cpu-nodes] +``` + +### Verify IAM Permissions + +```bash +kubectl logs -n kube-system -l app.kubernetes.io/name=aws-cluster-autoscaler | grep -i "error\|permission" +``` + +Should show no permission errors. + +## Testing Cluster Autoscaler + +### Trigger Scale-Up + +Create pods that exceed cluster capacity: + +```bash +kubectl run test-scale-up-1 \ + --image=nginx \ + --requests='cpu=1,memory=1Gi' \ + --replicas=20 \ + --namespace=smallest +``` + +Watch nodes: + +```bash +watch -n 5 'kubectl get nodes' +``` + +Watch Cluster Autoscaler: + +```bash +kubectl logs -n kube-system -l app.kubernetes.io/name=aws-cluster-autoscaler -f +``` + +Expected behavior: +1. Pods enter "Pending" state +2. Cluster Autoscaler detects pending pods +3. Logs show: "Scale-up: setting group size to X" +4. New nodes appear in `kubectl get nodes` +5. Pods transition to "Running" + +### Trigger Scale-Down + +Delete test pods: + +```bash +kubectl delete deployment test-scale-up-1 -n smallest +``` + +After `scale-down-unneeded-time` (default 10 minutes): +1. Cluster Autoscaler marks underutilized nodes +2. Drains pods gracefully +3. Terminates EC2 instances +4. Node count decreases + +## GPU Node Scaling + +### Configure GPU Node Groups + +Tag GPU node groups for autoscaling: + +```yaml cluster-config.yaml +managedNodeGroups: + - name: gpu-nodes + instanceType: g5.xlarge + minSize: 0 + maxSize: 10 + desiredCapacity: 1 + tags: + k8s.io/cluster-autoscaler/smallest-cluster: "owned" + k8s.io/cluster-autoscaler/enabled: "true" + k8s.io/cluster-autoscaler/node-template/label/workload: "gpu" +``` + +### Prevent Cluster Autoscaler on GPU Nodes + +Run Cluster Autoscaler on CPU nodes to avoid wasting GPU: + +```yaml values.yaml +cluster-autoscaler: + nodeSelector: + workload: cpu + + tolerations: [] +``` + +### Scale to Zero + +Allow GPU nodes to scale to zero during off-hours: + +```yaml +managedNodeGroups: + - name: gpu-nodes + minSize: 0 + maxSize: 10 +``` + +Cluster Autoscaler will: +- Add GPU nodes when Lightning ASR pods are pending +- Remove GPU nodes when all GPU workloads complete + + +First startup after scale-to-zero takes longer (node provisioning + model download). + + +## Spot Instance Integration + +### Mixed Instance Groups + +Use spot and on-demand instances: + +```yaml cluster-config.yaml +managedNodeGroups: + - name: gpu-nodes-mixed + minSize: 1 + maxSize: 10 + instancesDistribution: + onDemandBaseCapacity: 1 + onDemandPercentageAboveBaseCapacity: 20 + spotAllocationStrategy: capacity-optimized + instanceTypes: + - g5.xlarge + - g5.2xlarge + - g4dn.xlarge +``` + +**Configuration**: +- Base capacity: 1 on-demand node always +- Additional capacity: 20% on-demand, 80% spot +- Multiple instance types increase spot availability + +### Handle Spot Interruptions + +Configure Cluster Autoscaler for spot: + +```yaml +extraArgs: + balance-similar-node-groups: true + skip-nodes-with-local-storage: false + max-node-provision-time: 15m +``` + +Add AWS Node Termination Handler: + +```bash +helm repo add eks https://aws.github.io/eks-charts +helm install aws-node-termination-handler eks/aws-node-termination-handler \ + --namespace kube-system \ + --set enableSpotInterruptionDraining=true +``` + +## Advanced Configuration + +### Multiple Node Groups + +Scale different workloads independently: + +```yaml +cluster-autoscaler: + autoscalingGroups: + - name: cpu-small + minSize: 2 + maxSize: 10 + - name: cpu-large + minSize: 0 + maxSize: 5 + - name: gpu-a10 + minSize: 0 + maxSize: 10 + - name: gpu-t4 + minSize: 0 + maxSize: 5 +``` + +### Scale-Up Policies + +Control scale-up behavior: + +```yaml +extraArgs: + max-nodes-total: 50 + max-empty-bulk-delete: 10 + new-pod-scale-up-delay: 0s + scan-interval: 10s +``` + +### Resource Limits + +Prevent runaway scaling: + +```yaml +extraArgs: + cores-total: "0:512" + memory-total: "0:2048" + max-nodes-total: 100 +``` + +## Monitoring + +### CloudWatch Metrics + +View Auto Scaling Group metrics in CloudWatch: +- `GroupDesiredCapacity` +- `GroupInServiceInstances` +- `GroupPendingInstances` +- `GroupTerminatingInstances` + +### Kubernetes Events + +```bash +kubectl get events -n smallest --sort-by='.lastTimestamp' | grep -i scale +``` + +### Cluster Autoscaler Status + +```bash +kubectl get configmap cluster-autoscaler-status -n kube-system -o yaml +``` + +### Grafana Dashboard + +Import Cluster Autoscaler dashboard: + +Dashboard ID: 3831 + +See [Grafana Dashboards](/waves/self-host/kubernetes-setup/autoscaling/grafana-dashboards) + +## Troubleshooting + +### Nodes Not Scaling Up + +**Check pending pods**: + +```bash +kubectl get pods --all-namespaces --field-selector=status.phase=Pending +``` + +**Check Cluster Autoscaler logs**: + +```bash +kubectl logs -n kube-system -l app.kubernetes.io/name=aws-cluster-autoscaler --tail=100 +``` + +**Common issues**: +- Max nodes reached (`max-nodes-total`) +- IAM permission denied +- Auto Scaling Group at max capacity +- Node group not tagged properly + +### Nodes Not Scaling Down + +**Check node utilization**: + +```bash +kubectl top nodes +``` + +**Check for blocking conditions**: + +```bash +kubectl describe node | grep -i "scale-down disabled" +``` + +**Common causes**: +- Pods without PodDisruptionBudget +- Pods with local storage +- System pods (unless `skip-nodes-with-system-pods: false`) +- Nodes below utilization threshold + +### Permission Errors + +**Check service account**: + +```bash +kubectl describe sa cluster-autoscaler -n kube-system +``` + +**Verify IAM role**: + +```bash +kubectl logs -n kube-system -l app.kubernetes.io/name=aws-cluster-autoscaler | grep AccessDenied +``` + +Update IAM policy if needed (see [IAM & IRSA](/waves/self-host/kubernetes-setup/aws/iam-irsa)) + +## Best Practices + + + + Always tag Auto Scaling Groups: + + ``` + k8s.io/cluster-autoscaler/smallest-cluster: owned + k8s.io/cluster-autoscaler/enabled: true + ``` + + + + Configure appropriate min/max for each node group: + + ```yaml + gpu-nodes: + minSize: 0 # Save costs + maxSize: 10 # Prevent runaway + ``` + + + + Protect critical workloads during scale-down: + + ```yaml + apiVersion: policy/v1 + kind: PodDisruptionBudget + metadata: + name: lightning-asr-pdb + spec: + minAvailable: 1 + selector: + matchLabels: + app: lightning-asr + ``` + + + + Track scaling decisions in Grafana + + Set alerts for scale failures + + + + Periodically test scale-up and scale-down: + + ```bash + kubectl scale deployment lightning-asr --replicas=20 + ``` + + Watch for proper node addition/removal + + + +## What's Next? + + + + Configure pod-level autoscaling + + + + Set up Prometheus metrics + + + + Visualize autoscaling behavior + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/grafana-dashboards.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/grafana-dashboards.mdx new file mode 100644 index 0000000..fe99d89 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/grafana-dashboards.mdx @@ -0,0 +1,535 @@ +--- +title: Grafana Dashboards +description: Visualize metrics, autoscaling behavior, and system performance +--- + +## Overview + +Grafana provides powerful visualization of Lightning ASR metrics, autoscaling behavior, and system performance. This guide covers accessing Grafana, importing dashboards, and creating custom visualizations. + +## Access Grafana + +### Enable Grafana + +Ensure Grafana is enabled in your Helm values: + +```yaml values.yaml +scaling: + auto: + enabled: true + +kube-prometheus-stack: + grafana: + enabled: true + adminPassword: "admin-password" +``` + +### Port Forward + +Access Grafana locally: + +```bash +kubectl port-forward -n default svc/smallest-prometheus-stack-grafana 3000:80 +``` + +Open http://localhost:3000 in your browser. + +### Default Credentials + +- **Username**: `admin` +- **Password**: `prom-operator` (or custom password from `adminPassword`) + + +Change the default password immediately in production: + +```yaml +grafana: + adminPassword: "your-secure-password" +``` + + +### Expose Externally + +For permanent access, expose via LoadBalancer or Ingress: + + + + ```yaml values.yaml + kube-prometheus-stack: + grafana: + service: + type: LoadBalancer + ``` + + + + ```yaml + apiVersion: networking.k8s.io/v1 + kind: Ingress + metadata: + name: grafana + namespace: default + spec: + rules: + - host: grafana.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: smallest-prometheus-stack-grafana + port: + number: 80 + ``` + + + +## Import ASR Dashboard + +The Smallest Self-Host repository includes a pre-built ASR dashboard. + +### Import from File + + + + The dashboard is available at `grafana/dashboards/asr-dashboard.json` in the repository. + + + + Navigate to Grafana → Dashboards → Import + + + + - Click "Upload JSON file" + - Select `asr-dashboard.json` + - Click "Load" + + + + - Select Prometheus data source: `Prometheus` + - Click "Import" + + + +### Import via ConfigMap + +Automatically load dashboard on Grafana startup: + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: asr-dashboard + namespace: default + labels: + grafana_dashboard: "1" +data: + asr-dashboard.json: | + { + "dashboard": ..., + "overwrite": true + } +``` + +Or enable via Helm: + +```yaml values.yaml +kube-prometheus-stack: + grafana: + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: 'default' + folder: 'Smallest' + type: file + options: + path: /var/lib/grafana/dashboards/default + + dashboards: + default: + asr-dashboard: + file: dashboards/asr-dashboard.json +``` + +## ASR Dashboard Overview + +The pre-built dashboard includes the following panels: + +### Active Requests + +Shows current requests being processed: + +- **Metric**: `asr_active_requests` +- **Visualization**: Stat panel with thresholds +- **Colors**: + - Green: 0-5 requests + - Yellow: 5-10 requests + - Orange: 10-20 requests + - Red: 20+ requests + +### Request Rate + +Requests per second over time: + +- **Metric**: `rate(asr_total_requests[5m])` +- **Visualization**: Time series graph +- **Use**: Track traffic patterns + +### Error Rate + +Failed requests percentage: + +- **Metric**: `rate(asr_failed_requests[5m]) / rate(asr_total_requests[5m]) * 100` +- **Visualization**: Stat panel + time series +- **Alert**: Warning if > 5% + +### Response Time + +Request duration percentiles: + +- **Metrics**: + - P50: `histogram_quantile(0.50, asr_request_duration_seconds_bucket)` + - P95: `histogram_quantile(0.95, asr_request_duration_seconds_bucket)` + - P99: `histogram_quantile(0.99, asr_request_duration_seconds_bucket)` +- **Visualization**: Time series graph + +### Pod Count + +Number of Lightning ASR replicas: + +- **Metric**: `count(asr_active_requests)` +- **Visualization**: Stat panel +- **Use**: Monitor autoscaling + +### GPU Utilization + +GPU usage per pod: + +- **Metric**: `asr_gpu_utilization` +- **Visualization**: Time series graph +- **Use**: Ensure GPUs are utilized + +### GPU Memory + +GPU memory usage: + +- **Metric**: `asr_gpu_memory_used_bytes / 1024 / 1024 / 1024` +- **Visualization**: Gauge + time series +- **Use**: Monitor memory leaks + +## Create Custom Dashboards + +### Add New Dashboard + + + + Grafana → Dashboards → New Dashboard + + + + Click "Add panel" + + + + - Data source: Prometheus + - Metric: `asr_active_requests` + - Legend: `{{pod}}` + + + + - Choose visualization type (Time series, Stat, Gauge, etc.) + - Configure thresholds + - Set units and decimals + + + + Click "Save dashboard" + + Enter name: "Custom ASR Dashboard" + + + +### Useful Queries + +#### Average Active Requests + +```promql +avg(asr_active_requests) +``` + +#### Total Throughput (requests/hour) + +```promql +sum(rate(asr_total_requests[1h])) * 3600 +``` + +#### Pod Resource Usage + +```promql +sum(container_memory_usage_bytes{pod=~"lightning-asr.*"}) by (pod) / 1024 / 1024 / 1024 +``` + +#### Autoscaling Events + +```promql +kube_deployment_status_replicas{deployment="lightning-asr"} +``` + +#### GPU Temperature + +```promql +asr_gpu_temperature_celsius +``` + +## Dashboard Variables + +Add variables for dynamic filtering: + +### Namespace Variable + + + + Click gear icon → Variables → Add variable + + + + - **Name**: `namespace` + - **Type**: Query + - **Data source**: Prometheus + - **Query**: `label_values(asr_active_requests, namespace)` + - **Multi-value**: Enabled + + + + Update panels to use variable: + ```promql + asr_active_requests{namespace="$namespace"} + ``` + + + +### Pod Variable + +``` +label_values(asr_active_requests{namespace="$namespace"}, pod) +``` + +### Time Range Variable + +``` +$__interval +``` + +Use in queries for dynamic aggregation. + +## Alerting + +### Configure Alert Rules + + + + Open panel → Alert tab + + + + - **Name**: High Active Requests + - **Evaluate every**: 1m + - **For**: 5m + + + + ``` + WHEN avg() OF query(A, 5m, now) IS ABOVE 20 + ``` + + + + - Choose notification channel + - Add message template + + + +### Alert Notification Channels + +Configure notifications: + + + + Grafana → Alerting → Notification channels → Add channel + + - **Type**: Email + - **Addresses**: ops@example.com + + + + - **Type**: Slack + - **Webhook URL**: https://hooks.slack.com/... + - **Channel**: #alerts + + + + - **Type**: PagerDuty + - **Integration Key**: Your key + + + +## Pre-Built Dashboard Examples + +### System Overview Dashboard + +```json +{ + "title": "Smallest Self-Host Overview", + "panels": [ + { + "title": "Active Requests", + "targets": [{"expr": "sum(asr_active_requests)"}] + }, + { + "title": "Request Rate", + "targets": [{"expr": "sum(rate(asr_total_requests[5m]))"}] + }, + { + "title": "Pod Count", + "targets": [{"expr": "count(asr_active_requests)"}] + }, + { + "title": "Error Rate %", + "targets": [{"expr": "sum(rate(asr_failed_requests[5m])) / sum(rate(asr_total_requests[5m])) * 100"}] + } + ] +} +``` + +### Autoscaling Dashboard + +Track HPA behavior: + +```promql +kube_deployment_status_replicas{deployment="lightning-asr"} +kube_deployment_status_replicas_available{deployment="lightning-asr"} +kube_horizontalpodautoscaler_status_desired_replicas{horizontalpodautoscaler="lightning-asr"} +kube_horizontalpodautoscaler_status_current_replicas{horizontalpodautoscaler="lightning-asr"} +``` + +### Cost Dashboard + +Monitor resource costs: + +```promql +sum(kube_pod_container_resource_requests{pod=~"lightning-asr.*"}) by (resource) +count(kube_node_info{node=~".*gpu.*"}) * 1.00 +``` + +## Best Practices + + + + Organize dashboards by category: + + - **Smallest Overview**: High-level metrics + - **Lightning ASR**: Detailed ASR metrics + - **Infrastructure**: Node and cluster metrics + - **Autoscaling**: HPA and scaling behavior + + + + Default time ranges for different views: + + - **Real-time monitoring**: Last 15 minutes + - **Troubleshooting**: Last 1 hour + - **Analysis**: Last 24 hours + - **Trends**: Last 7 days + + + + Mark important events: + + - Deployments + - Scaling events + - Incidents + - Configuration changes + + + + Create template dashboards for: + + - Different environments (dev, staging, prod) + - Different namespaces + - Different models + + + + Save dashboard JSON to git: + + ```bash + kubectl get configmap asr-dashboard -o jsonpath='{.data.asr-dashboard\.json}' > asr-dashboard.json + git add asr-dashboard.json + git commit -m "Update ASR dashboard" + ``` + + + +## Troubleshooting + +### Grafana Not Showing Data + +**Check Prometheus data source**: + +Grafana → Configuration → Data Sources → Prometheus + +- **URL**: `http://smallest-prometheus-stack-prometheus:9090` +- **Access**: Server (default) + +Test connection with "Save & Test" button. + +**Check Prometheus is running**: + +```bash +kubectl get pods -l app.kubernetes.io/name=prometheus +``` + +### Queries Returning No Data + +**Verify metric exists in Prometheus**: + +```bash +kubectl port-forward svc/smallest-prometheus-stack-prometheus 9090:9090 +``` + +Open http://localhost:9090 and query the metric. + +**Check time range**: Ensure time range includes data. + +### Dashboard Not Loading + +**Check Grafana logs**: + +```bash +kubectl logs -l app.kubernetes.io/name=grafana +``` + +**Increase memory if needed**: + +```yaml +kube-prometheus-stack: + grafana: + resources: + limits: + memory: 512Mi +``` + +## What's Next? + + + + Use metrics for autoscaling + + + + Configure Prometheus metrics + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/hpa-configuration.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/hpa-configuration.mdx new file mode 100644 index 0000000..b1173d5 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/hpa-configuration.mdx @@ -0,0 +1,499 @@ +--- +title: HPA Configuration +description: Configure Horizontal Pod Autoscaling based on custom metrics +--- + +## Overview + +Horizontal Pod Autoscaling (HPA) automatically adjusts the number of Lightning ASR and API Server pods based on workload demand. This guide covers configuring HPA using custom metrics like active request count. + +## How HPA Works + +```mermaid +graph LR + Prometheus[Prometheus] -->|Scrapes| ASR[Lightning ASR Pods] + ASR -->|Metrics| Prometheus + Prometheus -->|Provides| Adapter[Prometheus Adapter] + Adapter -->|Custom Metrics| HPA[HPA Controller] + HPA -->|Scales| Deployment[Lightning ASR Deployment] + Deployment -->|Creates/Removes| ASR + + style HPA fill:#0D9373 + style ASR fill:#07C983 +``` + +Lightning ASR exports the `asr_active_requests` metric, which tracks the number of requests currently being processed. HPA uses this to scale pods up or down. + +## Prerequisites + + + + Install kube-prometheus-stack (included in Helm chart): + + ```yaml values.yaml + scaling: + auto: + enabled: true + + kube-prometheus-stack: + prometheus: + prometheusSpec: + serviceMonitorSelectorNilUsesHelmValues: false + prometheusOperator: + enabled: true + grafana: + enabled: true + ``` + + + + Install prometheus-adapter (included in Helm chart): + + ```yaml values.yaml + prometheus-adapter: + prometheus: + url: http://smallest-prometheus-stack-prometheus.default.svc + port: 9090 + ``` + + + + Enable ServiceMonitor for Lightning ASR: + + ```yaml values.yaml + scaling: + auto: + lightningAsr: + servicemonitor: + enabled: true + ``` + + + +## Enable HPA + +### Lightning ASR HPA + +Configure autoscaling for Lightning ASR based on active requests: + +```yaml values.yaml +scaling: + auto: + enabled: true + lightningAsr: + hpa: + enabled: true + minReplicas: 1 + maxReplicas: 10 + targetActiveRequests: 5 + scaleUpStabilizationWindowSeconds: 0 + scaleDownStabilizationWindowSeconds: 300 +``` + +**Parameters**: +- `minReplicas`: Minimum number of pods (never scales below) +- `maxReplicas`: Maximum number of pods (never scales above) +- `targetActiveRequests`: Target active requests per pod (scales when exceeded) +- `scaleUpStabilizationWindowSeconds`: Delay before scaling up (0 = immediate) +- `scaleDownStabilizationWindowSeconds`: Delay before scaling down (prevents flapping) + +### API Server HPA + +Configure autoscaling for API Server based on Lightning ASR replicas: + +```yaml values.yaml +scaling: + auto: + enabled: true + apiServer: + hpa: + enabled: true + minReplicas: 1 + maxReplicas: 10 + lightningAsrToApiServerRatio: 2 + scaleUpStabilizationWindowSeconds: 30 + scaleDownStabilizationWindowSeconds: 60 +``` + +**Parameters**: +- `lightningAsrToApiServerRatio`: Ratio of Lightning ASR to API Server pods (2 = 2 ASR pods per 1 API pod) + +## Advanced Scaling Behavior + +### Custom Scaling Policies + +Fine-tune scaling behavior: + +```yaml values.yaml +scaling: + auto: + lightningAsr: + hpa: + enabled: true + minReplicas: 2 + maxReplicas: 20 + targetActiveRequests: 5 + behavior: + scaleUp: + stabilizationWindowSeconds: 5 + policies: + - type: Percent + value: 100 + periodSeconds: 15 + - type: Pods + value: 2 + periodSeconds: 15 + selectPolicy: Max + scaleDown: + stabilizationWindowSeconds: 300 + policies: + - type: Percent + value: 50 + periodSeconds: 60 + - type: Pods + value: 1 + periodSeconds: 60 + selectPolicy: Min +``` + +**Scale Up Policies**: +- Add up to 100% more pods every 15 seconds +- OR add up to 2 pods every 15 seconds +- Use whichever is higher (`selectPolicy: Max`) + +**Scale Down Policies**: +- Remove up to 50% of pods every 60 seconds +- OR remove up to 1 pod every 60 seconds +- Use whichever is lower (`selectPolicy: Min`) + +### Multi-Metric HPA + +Scale based on multiple metrics: + +```yaml +spec: + metrics: + - type: Pods + pods: + metric: + name: asr_active_requests + target: + type: AverageValue + averageValue: "5" + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 +``` + +## Verify HPA Configuration + +### Check HPA Status + +```bash +kubectl get hpa -n smallest +``` + +Expected output: +``` +NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE +lightning-asr Deployment/lightning-asr 3/5 1 10 2 5m +api-server Deployment/api-server 2/4 1 10 1 5m +``` + +### Describe HPA + +```bash +kubectl describe hpa lightning-asr -n smallest +``` + +Look for: +``` +Metrics: + "asr_active_requests" on pods: + Current: 3 + Target: 5 (average) +Events: + Normal SuccessfulRescale 1m horizontal-pod-autoscaler New size: 2; reason: pods metric asr_active_requests above target +``` + +### Check Custom Metrics + +Verify prometheus-adapter is providing metrics: + +```bash +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . +``` + +Should show `asr_active_requests` in the list. + +Query specific metric: + +```bash +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/smallest/pods/*/asr_active_requests" | jq . +``` + +## Testing HPA + +### Load Testing + +Generate load to trigger scaling: + +```bash +for i in {1..100}; do + curl -X POST http://api-server.smallest.svc.cluster.local:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"url": "https://example.com/test-audio.wav"}' & +done +``` + +Watch scaling in action: + +```bash +kubectl get hpa -n smallest -w +``` + +### Monitor Pod Count + +In another terminal: + +```bash +watch -n 2 kubectl get pods -l app=lightning-asr -n smallest +``` + +You should see: +1. Active requests increase +2. HPA detects load above target +3. New pods created +4. Load distributed across pods +5. After load decreases, pods scale down (after stabilization window) + +## Scaling Scenarios + +### Scenario 1: Traffic Spike + +**Situation**: Sudden increase in requests + +**HPA Response**: +1. Detects `asr_active_requests` > 5 per pod +2. Immediately scales up (stabilization: 0s) +3. Adds pods based on policy (2 pods or 100%, whichever is higher) +4. Repeats every 15 seconds until load is distributed + +**Configuration**: +```yaml +scaleUpStabilizationWindowSeconds: 0 +behavior: + scaleUp: + policies: + - type: Percent + value: 100 + - type: Pods + value: 2 + selectPolicy: Max +``` + +### Scenario 2: Gradual Traffic Decline + +**Situation**: Traffic decreases after peak hours + +**HPA Response**: +1. Detects `asr_active_requests` < 5 per pod +2. Waits 300 seconds (5 minutes) before scaling down +3. Gradually removes pods (1 pod or 50%, whichever is lower) +4. Prevents premature scale-down + +**Configuration**: +```yaml +scaleDownStabilizationWindowSeconds: 300 +behavior: + scaleDown: + policies: + - type: Percent + value: 50 + - type: Pods + value: 1 + selectPolicy: Min +``` + +### Scenario 3: Off-Hours + +**Situation**: No traffic during night + +**HPA Response**: +1. Scales down to `minReplicas: 1` +2. Keeps one pod ready for incoming requests +3. Scales up immediately when traffic resumes + +**Configuration**: +```yaml +minReplicas: 1 +maxReplicas: 10 +``` + + +For complete cost savings during off-hours, use [Cluster Autoscaler](/waves/self-host/kubernetes-setup/autoscaling/cluster-autoscaler) to scale nodes to zero. + + +## Troubleshooting + +### HPA Shows "Unknown" + +**Symptom**: +``` +NAME TARGETS MINPODS MAXPODS +lightning-asr /5 1 10 +``` + +**Diagnosis**: + +Check prometheus-adapter logs: + +```bash +kubectl logs -n kube-system -l app.kubernetes.io/name=prometheus-adapter +``` + +Check ServiceMonitor: + +```bash +kubectl get servicemonitor -n smallest +kubectl describe servicemonitor lightning-asr -n smallest +``` + +Check Prometheus is scraping: + +```bash +kubectl port-forward -n default svc/smallest-prometheus-stack-prometheus 9090:9090 +``` + +Open http://localhost:9090 and query: `asr_active_requests` + +**Solutions**: + +1. Ensure ServiceMonitor is created +2. Verify Prometheus is scraping Lightning ASR pods +3. Check prometheus-adapter configuration + +### HPA Not Scaling + +**Symptom**: Metrics show high load but pods not increasing + +**Check**: + +```bash +kubectl describe hpa lightning-asr -n smallest +``` + +Look for events explaining why scaling didn't occur: + +``` +Events: + Warning FailedGetPodsMetric 1m horizontal-pod-autoscaler unable to get metric asr_active_requests +``` + +**Common causes**: +- Metrics not available (see above) +- Already at `maxReplicas` +- Insufficient cluster resources +- Stabilization window preventing scale-up + +### Pods Scaling Too Aggressively + +**Symptom**: Pods constantly scaling up and down + +**Solution**: Increase stabilization windows: + +```yaml +scaleUpStabilizationWindowSeconds: 30 +scaleDownStabilizationWindowSeconds: 600 +``` + +### Scale-Down Too Slow + +**Symptom**: Pods remain after traffic drops + +**Solution**: Reduce scale-down stabilization: + +```yaml +scaleDownStabilizationWindowSeconds: 120 +``` + +Be careful: too aggressive scale-down causes flapping. + +## Best Practices + + + + Choose `targetActiveRequests` based on your model performance: + + - Larger models (slower inference): Lower target (e.g., 3) + - Smaller models (faster inference): Higher target (e.g., 10) + + Test with load to find optimal value. + + + + Scale up quickly, scale down slowly: + + ```yaml + scaleUpStabilizationWindowSeconds: 0 + scaleDownStabilizationWindowSeconds: 300 + ``` + + Prevents request failures during traffic fluctuations. + + + + Consider cluster capacity when setting `maxReplicas`: + + ```yaml + maxReplicas: 10 # If cluster has 10 GPU nodes + ``` + + Don't set higher than available GPU resources. + + + + Use Grafana to visualize: + - Current vs target metrics + - Pod count over time + - Scale-up/down events + + See [Grafana Dashboards](/waves/self-host/kubernetes-setup/autoscaling/grafana-dashboards) + + + + Regularly load test to verify HPA behavior: + + ```bash + kubectl run load-test --image=williamyeh/hey -it --rm -- \ + -z 5m -c 50 http://api-server:7100/health + ``` + + + +## What's Next? + + + + Scale cluster nodes automatically + + + + Configure Prometheus and custom metrics + + + + Visualize metrics and scaling behavior + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/metrics-setup.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/metrics-setup.mdx new file mode 100644 index 0000000..56f4d54 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/autoscaling/metrics-setup.mdx @@ -0,0 +1,569 @@ +--- +title: Metrics Setup +description: Configure Prometheus, ServiceMonitor, and custom metrics for autoscaling +--- + +## Overview + +The metrics setup enables autoscaling by collecting Lightning ASR metrics with Prometheus and exposing them to Kubernetes HPA through the Prometheus Adapter. + +## Architecture + +```mermaid +graph LR + ASR[Lightning ASR] -->|Exports| Metrics + Metrics[/metrics Endpoint/] -->|Discovered By| SM[ServiceMonitor] + SM -->|Scraped By| Prom[Prometheus] + Prom -->|Queried By| Adapter[Prometheus Adapter] + Adapter -->|Supplies Metrics| HPA[HPA Controller] + HPA -->|Scales| ASR + + style Prom fill:#E6522C + style ASR fill:#0D9373 +``` +/* The original had a syntax error in Mermaid—edges must connect nodes, not labels. +"Metrics" is now a node, and edge directions/names are consistent. +*/ + +## Components + +### Prometheus + +Collects and stores metrics from Lightning ASR pods. + +**Included in chart**: +```yaml values.yaml +scaling: + auto: + enabled: true + +kube-prometheus-stack: + prometheus: + prometheusSpec: + serviceMonitorSelectorNilUsesHelmValues: false + retention: 7d + resources: + requests: + memory: 2Gi +``` + +### ServiceMonitor + +CRD that tells Prometheus which services to scrape. + +**Enabled for Lightning ASR**: +```yaml values.yaml +scaling: + auto: + lightningAsr: + servicemonitor: + enabled: true +``` + +### Prometheus Adapter + +Converts Prometheus metrics to Kubernetes custom metrics API. + +**Configuration**: +```yaml values.yaml +prometheus-adapter: + prometheus: + url: http://smallest-prometheus-stack-prometheus.default.svc + port: 9090 + rules: + custom: + - seriesQuery: "asr_active_requests" + resources: + overrides: + namespace: {resource: "namespace"} + pod: {resource: "pod"} + name: + matches: "^(.*)$" + as: "${1}" + metricsQuery: "asr_active_requests{<<.LabelMatchers>>}" +``` + +## Available Metrics + +Lightning ASR exposes the following metrics: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricTypeDescription
asr_active_requestsGaugeCurrent number of active transcription requests
asr_total_requestsCounterTotal requests processed
asr_failed_requestsCounterTotal failed requests
asr_request_duration_secondsHistogramRequest processing time
asr_model_load_time_secondsGaugeTime to load model on startup
asr_gpu_utilizationGaugeGPU utilization percentage
asr_gpu_memory_used_bytesGaugeGPU memory used
+ +## Verify Metrics Setup + +### Check Prometheus + +Forward Prometheus port: + +```bash +kubectl port-forward -n default svc/smallest-prometheus-stack-prometheus 9090:9090 +``` + +Open http://localhost:9090 and verify: + +1. **Status → Targets**: Lightning ASR endpoints should be "UP" +2. **Graph**: Query `asr_active_requests` - should return data +3. **Status → Service Discovery**: Should show ServiceMonitor + +### Check ServiceMonitor + +```bash +kubectl get servicemonitor -n smallest +``` + +Expected output: +``` +NAME AGE +lightning-asr 5m +``` + +Describe ServiceMonitor: + +```bash +kubectl describe servicemonitor lightning-asr -n smallest +``` + +Should show: +```yaml +Spec: + Endpoints: + Port: metrics + Path: /metrics + Selector: + Match Labels: + app: lightning-asr +``` + +### Check Prometheus Adapter + +Verify custom metrics are available: + +```bash +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq -r '.resources[].name' | grep asr +``` + +Expected output: +``` +pods/asr_active_requests +pods/asr_total_requests +pods/asr_failed_requests +``` + +Query specific metric: + +```bash +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/smallest/pods/*/asr_active_requests" | jq . +``` + +## Custom Metric Configuration + +### Add New Custom Metrics + +To expose additional metrics to HPA: + +```yaml values.yaml +prometheus-adapter: + rules: + custom: + - seriesQuery: "asr_active_requests" + resources: + overrides: + namespace: {resource: "namespace"} + pod: {resource: "pod"} + name: + matches: "^(.*)$" + as: "${1}" + metricsQuery: "asr_active_requests{<<.LabelMatchers>>}" + + - seriesQuery: "asr_gpu_utilization" + resources: + overrides: + namespace: {resource: "namespace"} + pod: {resource: "pod"} + name: + as: "gpu_utilization" + metricsQuery: "avg_over_time(asr_gpu_utilization{<<.LabelMatchers>>}[2m])" +``` + +### External Metrics + +For cluster-wide metrics: + +```yaml values.yaml +prometheus-adapter: + rules: + external: + - seriesQuery: 'kube_deployment_status_replicas{deployment="lightning-asr"}' + metricsQuery: 'sum(kube_deployment_status_replicas{deployment="lightning-asr"})' + name: + as: "lightning_asr_replica_count" + resources: + overrides: + namespace: {resource: "namespace"} +``` + +Use in HPA: + +```yaml +spec: + metrics: + - type: External + external: + metric: + name: lightning_asr_replica_count + target: + type: Value + value: "5" +``` + +## Prometheus Configuration + +### Retention Policy + +Configure how long metrics are stored: + +```yaml values.yaml +kube-prometheus-stack: + prometheus: + prometheusSpec: + retention: 15d + retentionSize: "50GB" +``` + +### Storage + +Persist Prometheus data: + +```yaml values.yaml +kube-prometheus-stack: + prometheus: + prometheusSpec: + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: gp3 + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 100Gi +``` + +### Scrape Interval + +Adjust how frequently metrics are collected: + +```yaml values.yaml +kube-prometheus-stack: + prometheus: + prometheusSpec: + scrapeInterval: 30s + evaluationInterval: 30s +``` + + +Lower intervals (e.g., 15s) provide faster HPA response but increase storage. + + +## Recording Rules + +Pre-compute expensive queries: + +```yaml +kube-prometheus-stack: + prometheus: + prometheusSpec: + additionalScrapeConfigs: + - job_name: 'lightning-asr-aggregated' + scrape_interval: 15s + static_configs: + - targets: ['lightning-asr:2269'] + + additionalPrometheusRulesMap: + asr-rules: + groups: + - name: asr_aggregations + interval: 30s + rules: + - record: asr:requests:rate5m + expr: rate(asr_total_requests[5m]) + + - record: asr:requests:active_avg + expr: avg(asr_active_requests) by (namespace) + + - record: asr:gpu:utilization_avg + expr: avg(asr_gpu_utilization) by (namespace) +``` + +Use recording rules in HPA for better performance. + +## Alerting Rules + +Create alerts for anomalies: + +```yaml +kube-prometheus-stack: + prometheus: + prometheusSpec: + additionalPrometheusRulesMap: + asr-alerts: + groups: + - name: asr_alerts + rules: + - alert: HighErrorRate + expr: rate(asr_failed_requests[5m]) > 0.1 + for: 5m + labels: + severity: warning + annotations: + summary: "High ASR error rate" + description: "Error rate is {{ $value }} errors/sec" + + - alert: HighQueueLength + expr: asr_active_requests > 50 + for: 2m + labels: + severity: warning + annotations: + summary: "ASR queue backing up" + description: "{{ $value }} requests queued" + + - alert: GPUMemoryHigh + expr: asr_gpu_memory_used_bytes / 24000000000 > 0.9 + for: 5m + labels: + severity: warning + annotations: + summary: "GPU memory usage high" + description: "GPU memory at {{ $value | humanizePercentage }}" +``` + +## Debugging Metrics + +### Check Metrics Endpoint + +Directly query Lightning ASR metrics: + +```bash +kubectl port-forward -n smallest svc/lightning-asr 2269:2269 +curl http://localhost:2269/metrics +``` + +Expected output: +``` +# HELP asr_active_requests Current active requests +# TYPE asr_active_requests gauge +asr_active_requests{pod="lightning-asr-xxx"} 3 + +# HELP asr_total_requests Total requests processed +# TYPE asr_total_requests counter +asr_total_requests{pod="lightning-asr-xxx"} 1523 + +... +``` + +### Test Prometheus Query + +Access Prometheus UI and test queries: + +```promql +asr_active_requests +rate(asr_total_requests[5m]) +histogram_quantile(0.95, asr_request_duration_seconds_bucket) +``` + +### Check Prometheus Targets + +```bash +kubectl port-forward -n default svc/smallest-prometheus-stack-prometheus 9090:9090 +``` + +Navigate to: http://localhost:9090/targets + +Verify Lightning ASR targets are "UP" + +### View Prometheus Logs + +```bash +kubectl logs -n default -l app.kubernetes.io/name=prometheus --tail=100 +``` + +Look for scrape errors. + +## Troubleshooting + +### Metrics Not Appearing + +**Check ServiceMonitor is created**: + +```bash +kubectl get servicemonitor -n smallest +``` + +**Check Prometheus is discovering**: + +```bash +kubectl logs -n default -l app.kubernetes.io/name=prometheus | grep lightning-asr +``` + +**Check service has metrics port**: + +```bash +kubectl get svc lightning-asr -n smallest -o yaml +``` + +Should show: +```yaml +ports: + - name: metrics + port: 2269 +``` + +### Custom Metrics Not Available + +**Check Prometheus Adapter logs**: + +```bash +kubectl logs -n kube-system -l app.kubernetes.io/name=prometheus-adapter +``` + +**Verify adapter configuration**: + +```bash +kubectl get configmap prometheus-adapter -n kube-system -o yaml +``` + +**Test API manually**: + +```bash +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . +``` + +### High Cardinality Issues + +If Prometheus is using too much memory: + +1. Reduce label cardinality +2. Increase retention limits +3. Use recording rules for complex queries + +```yaml +kube-prometheus-stack: + prometheus: + prometheusSpec: + resources: + requests: + memory: 4Gi + limits: + memory: 8Gi +``` + +## Best Practices + + + + Pre-compute expensive queries: + + ```yaml + - record: asr:requests:rate5m + expr: rate(asr_total_requests[5m]) + ``` + + Then use in HPA instead of raw query + + + + Balance responsiveness vs storage: + + - Fast autoscaling: 15s + - Normal: 30s + - Cost-optimized: 60s + + + + Always persist Prometheus data: + + ```yaml + storageSpec: + volumeClaimTemplate: + spec: + resources: + requests: + storage: 100Gi + ``` + + + + Track Prometheus performance: + + - Query duration + - Scrape duration + - Memory usage + - TSDB size + + + + Don't rely on Prometheus UI + + Use Grafana dashboards for ops + + See [Grafana Dashboards](/waves/self-host/kubernetes-setup/autoscaling/grafana-dashboards) + + + +## What's Next? + + + + Use metrics for autoscaling + + + + Visualize metrics + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/eks-setup.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/eks-setup.mdx new file mode 100644 index 0000000..6168a12 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/eks-setup.mdx @@ -0,0 +1,529 @@ +--- +title: AWS EKS Setup +description: Create and configure an EKS cluster for Smallest Self-Host with GPU support +--- + +## Overview + +This guide walks you through creating an Amazon EKS cluster optimized for running Smallest Self-Host with GPU acceleration. + +## Prerequisites + + + + Install and configure AWS CLI: + ```bash + aws --version + aws configure + ``` + + + + Install eksctl (EKS cluster management tool): + ```bash + brew install eksctl + ``` + + Verify: + ```bash + eksctl version + ``` + + + + Install kubectl: + ```bash + brew install kubectl + ``` + + + + Ensure your AWS user/role has permissions to: + - Create EKS clusters + - Manage EC2 instances + - Create IAM roles + - Manage VPC resources + + + +## Cluster Configuration + +### Option 1: Quick Start with eksctl + +Create a cluster with GPU nodes using a single command: + +```bash +eksctl create cluster \ + --name smallest-cluster \ + --region us-east-1 \ + --version 1.28 \ + --nodegroup-name cpu-nodes \ + --node-type t3.large \ + --nodes 2 \ + --nodes-min 1 \ + --nodes-max 3 \ + --managed +``` + +Then add GPU node group: + +```bash +eksctl create nodegroup \ + --cluster smallest-cluster \ + --region us-east-1 \ + --name gpu-nodes \ + --node-type g5.xlarge \ + --nodes 1 \ + --nodes-min 0 \ + --nodes-max 5 \ + --managed \ + --node-labels "workload=gpu,nvidia.com/gpu=true" \ + --node-taints "nvidia.com/gpu=true:NoSchedule" +``` + + +This creates a cluster with separate CPU and GPU node groups, allowing for cost-effective scaling. + + +### Option 2: Using Cluster Config File + +Create a cluster configuration file for more control: + +```yaml cluster-config.yaml +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: smallest-cluster + region: us-east-1 + version: "1.28" + +iam: + withOIDC: true + +managedNodeGroups: + - name: cpu-nodes + instanceType: t3.large + minSize: 1 + maxSize: 3 + desiredCapacity: 2 + volumeSize: 50 + ssh: + allow: false + labels: + workload: cpu + tags: + Environment: production + Application: smallest-self-host + + - name: gpu-nodes + instanceType: g5.xlarge + minSize: 0 + maxSize: 5 + desiredCapacity: 1 + volumeSize: 100 + ssh: + allow: false + labels: + workload: gpu + nvidia.com/gpu: "true" + node.kubernetes.io/instance-type: g5.xlarge + taints: + - key: nvidia.com/gpu + value: "true" + effect: NoSchedule + tags: + Environment: production + Application: smallest-self-host + NodeType: gpu + iam: + withAddonPolicies: + autoScaler: true + ebs: true + efs: true + +addons: + - name: vpc-cni + - name: coredns + - name: kube-proxy + - name: aws-ebs-csi-driver +``` + +Create the cluster: + +```bash +eksctl create cluster -f cluster-config.yaml +``` + + +Cluster creation takes 15-20 minutes. Monitor progress in the AWS CloudFormation console. + + +## GPU Instance Types + +Choose the right GPU instance type for your workload: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance TypeGPUVRAMvCPUsRAM$/hour*Recommended For
g5.xlarge1x A10G24 GB416 GB$1.00Development, testing
g5.2xlarge1x A10G24 GB832 GB$1.21Small production
g5.4xlarge1x A10G24 GB1664 GB$1.63Medium production
g5.12xlarge4x A10G96 GB48192 GB$5.67High-volume production
p3.2xlarge1x V10016 GB861 GB$3.06Legacy workloads
+ +* Approximate on-demand pricing in us-east-1, subject to change + + +**Recommendation**: Start with `g5.xlarge` for development and testing. Scale to `g5.2xlarge` or higher for production. + + +## Verify Cluster + +### Check Cluster Status + +```bash +eksctl get cluster --name smallest-cluster --region us-east-1 +``` + +### Verify Node Groups + +```bash +eksctl get nodegroup --cluster smallest-cluster --region us-east-1 +``` + +### Configure kubectl + +```bash +aws eks update-kubeconfig --name smallest-cluster --region us-east-1 +``` + +Verify access: + +```bash +kubectl get nodes +``` + +Expected output: +``` +NAME STATUS ROLES AGE VERSION +ip-xxx-cpu-1 Ready 5m v1.28.x +ip-xxx-cpu-2 Ready 5m v1.28.x +ip-xxx-gpu-1 Ready 5m v1.28.x +``` + +### Verify GPU Nodes + +Check GPU availability: + +```bash +kubectl get nodes -l workload=gpu -o json | \ + jq '.items[].status.capacity' +``` + +Look for `nvidia.com/gpu` in the output: +```json +{ + "cpu": "4", + "memory": "15944904Ki", + "nvidia.com/gpu": "1", + "pods": "29" +} +``` + +## Install NVIDIA Device Plugin + +The NVIDIA device plugin enables GPU scheduling in Kubernetes. + +### Using Helm (Recommended) + +The Smallest Self-Host chart includes the NVIDIA GPU Operator. Enable it in your values: + +```yaml values.yaml +gpu-operator: + enabled: true +``` + +### Manual Installation + +If installing separately: + +```bash +kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0/nvidia-device-plugin.yml +``` + +Verify: + +```bash +kubectl get pods -n kube-system | grep nvidia +``` + +## Install EBS CSI Driver + +Required for persistent volumes: + +### Using eksctl + +```bash +eksctl create addon \ + --name aws-ebs-csi-driver \ + --cluster smallest-cluster \ + --region us-east-1 +``` + +### Using AWS Console + +1. Navigate to EKS → Clusters → smallest-cluster → Add-ons +2. Click "Add new" +3. Select "Amazon EBS CSI Driver" +4. Click "Add" + +### Verify EBS CSI Driver + +```bash +kubectl get pods -n kube-system -l app=ebs-csi-controller +``` + +## Install EFS CSI Driver (Optional) + +Recommended for shared model storage across pods. + +### Create IAM Policy + +```bash +curl -o iam-policy.json https://raw.githubusercontent.com/kubernetes-sigs/aws-efs-csi-driver/master/docs/iam-policy-example.json + +aws iam create-policy \ + --policy-name AmazonEKS_EFS_CSI_Driver_Policy \ + --policy-document file://iam-policy.json +``` + +### Create IAM Service Account + +```bash +eksctl create iamserviceaccount \ + --cluster smallest-cluster \ + --region us-east-1 \ + --namespace kube-system \ + --name efs-csi-controller-sa \ + --attach-policy-arn arn:aws:iam::YOUR_ACCOUNT_ID:policy/AmazonEKS_EFS_CSI_Driver_Policy \ + --approve +``` + +Replace `YOUR_ACCOUNT_ID` with your AWS account ID. + +### Install EFS CSI Driver + +```bash +kubectl apply -k "github.com/kubernetes-sigs/aws-efs-csi-driver/deploy/kubernetes/overlays/stable/?ref=release-1.7" +``` + +Verify: + +```bash +kubectl get pods -n kube-system -l app=efs-csi-controller +``` + +## Enable Cluster Autoscaler + +See the [Cluster Autoscaler](/waves/self-host/kubernetes-setup/autoscaling/cluster-autoscaler) guide for detailed setup. + +Quick setup: + +```bash +eksctl create iamserviceaccount \ + --cluster smallest-cluster \ + --region us-east-1 \ + --namespace kube-system \ + --name cluster-autoscaler \ + --attach-policy-arn arn:aws:iam::aws:policy/AutoScalingFullAccess \ + --approve \ + --override-existing-serviceaccounts +``` + +## Cost Optimization + +### Use Spot Instances for GPU Nodes + +Reduce costs by up to 70% with Spot instances: + +```yaml cluster-config.yaml +managedNodeGroups: + - name: gpu-nodes-spot + instanceType: g5.xlarge + minSize: 0 + maxSize: 5 + desiredCapacity: 1 + spot: true + instancesDistribution: + maxPrice: 0.50 + instanceTypes: ["g5.xlarge", "g5.2xlarge"] + onDemandBaseCapacity: 0 + onDemandPercentageAboveBaseCapacity: 0 + spotAllocationStrategy: capacity-optimized +``` + + +Spot instances can be interrupted with 2-minute warning. Ensure your application handles graceful shutdowns. + + +### Right-Size Node Groups + +Start small and scale based on metrics: + +```yaml +managedNodeGroups: + - name: gpu-nodes + minSize: 0 + maxSize: 10 + desiredCapacity: 1 +``` + +Set `minSize: 0` to scale down to zero during off-hours. + +### Enable Cluster Autoscaler + +Automatically adjust node count based on demand: + +```yaml values.yaml +cluster-autoscaler: + enabled: true + autoDiscovery: + clusterName: smallest-cluster + awsRegion: us-east-1 +``` + +## Security Best Practices + +### Enable Private Endpoint + +```bash +eksctl utils update-cluster-endpoint \ + --cluster smallest-cluster \ + --region us-east-1 \ + --private-access=true \ + --public-access=false +``` + +### Enable Logging + +```bash +eksctl utils update-cluster-logging \ + --cluster smallest-cluster \ + --region us-east-1 \ + --enable-types all \ + --approve +``` + +### Update Security Groups + +Restrict inbound access to API server: + +```bash +aws ec2 describe-security-groups \ + --filters "Name=tag:aws:eks:cluster-name,Values=smallest-cluster" +``` + +Update rules to allow only specific IPs. + +## Troubleshooting + +### GPU Nodes Not Ready + +Check NVIDIA device plugin: + +```bash +kubectl get pods -n kube-system | grep nvidia +kubectl describe node +``` + +### Pods Stuck in Pending + +Check node capacity: + +```bash +kubectl describe pod +kubectl get nodes -o json | jq '.items[].status.allocatable' +``` + +### EBS Volumes Not Mounting + +Verify EBS CSI driver: + +```bash +kubectl get pods -n kube-system -l app=ebs-csi-controller +kubectl logs -n kube-system -l app=ebs-csi-controller +``` + +## What's Next? + + + + Configure IAM roles for service accounts + + + + Advanced GPU node configuration and optimization + + + + Set up shared file storage for models + + + + Enable automatic node scaling + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/gpu-nodes.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/gpu-nodes.mdx new file mode 100644 index 0000000..be7daa8 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/gpu-nodes.mdx @@ -0,0 +1,617 @@ +--- +title: GPU Nodes Configuration +description: Advanced GPU node setup and optimization for AWS EKS +--- + +## Overview + +This guide covers advanced configuration and optimization for GPU nodes in AWS EKS, including node taints, tolerations, labels, and performance tuning. + +## GPU Node Configuration + +### Node Labels + +Labels help Kubernetes schedule pods on the correct nodes. + +#### Automatic Labels + +EKS automatically adds these labels to GPU nodes: + +```yaml +node.kubernetes.io/instance-type: g5.xlarge +beta.kubernetes.io/instance-type: g5.xlarge +topology.kubernetes.io/zone: us-east-1a +topology.kubernetes.io/region: us-east-1 +``` + +#### Custom Labels + +Add custom labels when creating node groups: + +```yaml cluster-config.yaml +managedNodeGroups: + - name: gpu-nodes + instanceType: g5.xlarge + labels: + workload: gpu + nvidia.com/gpu: "true" + gpu-type: a10 + cost-tier: spot +``` + +Or add labels to existing nodes: + +```bash +kubectl label nodes workload=gpu +kubectl label nodes gpu-type=a10 +``` + +### Node Taints + +Taints prevent non-GPU workloads from running on expensive GPU nodes. + +#### Add Taints During Node Group Creation + +```yaml cluster-config.yaml +managedNodeGroups: + - name: gpu-nodes + instanceType: g5.xlarge + taints: + - key: nvidia.com/gpu + value: "true" + effect: NoSchedule +``` + +#### Add Taints to Existing Nodes + +```bash +kubectl taint nodes nvidia.com/gpu=true:NoSchedule +``` + +### Tolerations in Pod Specs + +Pods must have matching tolerations to run on tainted nodes: + +```yaml values.yaml +lightningAsr: + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + - key: nvidia.com/gpu + operator: Equal + value: "true" + effect: NoSchedule +``` + +## Node Selectors + +### Using Instance Type + +Most common approach for AWS: + +```yaml values.yaml +lightningAsr: + nodeSelector: + node.kubernetes.io/instance-type: g5.xlarge +``` + +### Using Custom Labels + +```yaml values.yaml +lightningAsr: + nodeSelector: + workload: gpu + gpu-type: a10 +``` + +### Multiple Selectors + +Combine multiple selectors for precise placement: + +```yaml values.yaml +lightningAsr: + nodeSelector: + node.kubernetes.io/instance-type: g5.xlarge + topology.kubernetes.io/zone: us-east-1a + cost-tier: on-demand +``` + +## NVIDIA Device Plugin + +The NVIDIA device plugin makes GPUs available to Kubernetes pods. + +### Installation via GPU Operator + +The recommended approach is using the NVIDIA GPU Operator (included in the Smallest Helm chart): + +```yaml values.yaml +gpu-operator: + enabled: true + driver: + enabled: true + toolkit: + enabled: true + devicePlugin: + enabled: true +``` + +### Manual Installation + +Alternatively, install the device plugin directly: + +```bash +kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.0/nvidia-device-plugin.yml +``` + +### Verify Device Plugin + +```bash +kubectl get pods -n kube-system | grep nvidia-device-plugin +kubectl logs -n kube-system -l name=nvidia-device-plugin +``` + +### Check GPU Availability + +```bash +kubectl get nodes -o json | \ + jq -r '.items[] | select(.status.capacity."nvidia.com/gpu" != null) | + "\(.metadata.name)\t\(.status.capacity."nvidia.com/gpu")"' +``` + +## GPU Resource Limits + +### Request GPU in Pod Spec + +The Lightning ASR deployment automatically requests GPU: + +```yaml +resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 +``` + +### Multiple GPUs + +For pods that need multiple GPUs: + +```yaml +resources: + limits: + nvidia.com/gpu: 2 + requests: + nvidia.com/gpu: 2 +``` + + +Smallest Self-Host Lightning ASR is optimized for single GPU per pod. Use multiple pods for scaling rather than multiple GPUs per pod. + + +## GPU Performance Optimization + +### Enable GPU Persistence Mode + +GPU persistence mode keeps the NVIDIA driver loaded, reducing initialization time: + +```yaml +gpu-operator: + enabled: true + driver: + enabled: true + env: + - name: NVIDIA_DRIVER_CAPABILITIES + value: "compute,utility" + - name: NVIDIA_REQUIRE_CUDA + value: "cuda>=11.8" + toolkit: + enabled: true + env: + - name: NVIDIA_MPS_ENABLED + value: "1" +``` + +### Use DaemonSet for GPU Configuration + +Create a DaemonSet to configure GPU settings on all GPU nodes: + +```yaml gpu-config-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: gpu-config + namespace: kube-system +spec: + selector: + matchLabels: + name: gpu-config + template: + metadata: + labels: + name: gpu-config + spec: + hostPID: true + nodeSelector: + nvidia.com/gpu: "true" + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + containers: + - name: gpu-config + image: nvidia/cuda:11.8.0-base-ubuntu22.04 + command: + - /bin/bash + - -c + - | + nvidia-smi -pm 1 + nvidia-smi --auto-boost-default=DISABLED + nvidia-smi -ac 1215,1410 + sleep infinity + securityContext: + privileged: true + volumeMounts: + - name: sys + mountPath: /sys + volumes: + - name: sys + hostPath: + path: /sys +``` + +Apply: + +```bash +kubectl apply -f gpu-config-daemonset.yaml +``` + +### Monitor GPU Utilization + +Deploy NVIDIA DCGM exporter for Prometheus metrics: + +```bash +helm repo add gpu-helm-charts https://nvidia.github.io/dcgm-exporter/helm-charts +helm repo update + +helm install dcgm-exporter gpu-helm-charts/dcgm-exporter \ + --namespace kube-system \ + --set serviceMonitor.enabled=true +``` + +## Multi-GPU Strategies + +### Strategy 1: One Pod per GPU (Recommended) + +Scale horizontally with one pod per GPU: + +```yaml values.yaml +scaling: + auto: + enabled: true + lightningAsr: + hpa: + enabled: true + minReplicas: 1 + maxReplicas: 10 + +lightningAsr: + resources: + limits: + nvidia.com/gpu: 1 +``` + +### Strategy 2: GPU Sharing (Time-Slicing) + +Allow multiple pods to share a single GPU (reduces isolation): + +```yaml +gpu-operator: + enabled: true + devicePlugin: + config: + name: time-slicing-config + default: any + sharing: + timeSlicing: + replicas: 4 +``` + + +GPU sharing reduces isolation and can impact performance. Use only if cost is more critical than performance. + + +### Strategy 3: Multi-Instance GPU (MIG) + +For A100 and A30 GPUs, use MIG to partition GPUs: + +```bash +nvidia-smi mig -cgi 9,9,9,9,9,9,9 -C +``` + +Configure pods to use MIG instances: + +```yaml +resources: + limits: + nvidia.com/mig-1g.5gb: 1 +``` + +## Node Auto-Scaling + +### Configure Auto-Scaling Groups + +When creating node groups, enable auto-scaling: + +```yaml cluster-config.yaml +managedNodeGroups: + - name: gpu-nodes + instanceType: g5.xlarge + minSize: 0 + maxSize: 10 + desiredCapacity: 1 + tags: + k8s.io/cluster-autoscaler/enabled: "true" + k8s.io/cluster-autoscaler/smallest-cluster: "owned" +``` + +### Install Cluster Autoscaler + +See [Cluster Autoscaler](/waves/self-host/kubernetes-setup/autoscaling/cluster-autoscaler) for full setup. + +Quick enable: + +```yaml values.yaml +cluster-autoscaler: + enabled: true + autoDiscovery: + clusterName: smallest-cluster + awsRegion: us-east-1 + nodeSelector: + workload: cpu +``` + + +Run Cluster Autoscaler on CPU nodes, not GPU nodes, to avoid wasting GPU resources. + + +## Cost Optimization + +### Use Spot Instances + +Save up to 70% with Spot instances: + +```yaml cluster-config.yaml +managedNodeGroups: + - name: gpu-nodes-spot + instanceType: g5.xlarge + minSize: 0 + maxSize: 10 + desiredCapacity: 1 + spot: true + instancesDistribution: + maxPrice: 0.50 + instanceTypes: ["g5.xlarge", "g5.2xlarge"] + onDemandBaseCapacity: 0 + onDemandPercentageAboveBaseCapacity: 0 + spotAllocationStrategy: capacity-optimized + labels: + capacity-type: spot + taints: + - key: nvidia.com/gpu + value: "true" + effect: NoSchedule +``` + +### Handle Spot Interruptions + +Add pod disruption budget: + +```yaml +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: lightning-asr-pdb +spec: + minAvailable: 1 + selector: + matchLabels: + app: lightning-asr +``` + +Configure graceful shutdown: + +```yaml values.yaml +lightningAsr: + terminationGracePeriodSeconds: 120 +``` + +### Mixed On-Demand and Spot + +Combine both for reliability: + +```yaml cluster-config.yaml +managedNodeGroups: + - name: gpu-nodes-ondemand + instanceType: g5.xlarge + minSize: 1 + maxSize: 3 + labels: + capacity-type: on-demand + + - name: gpu-nodes-spot + instanceType: g5.xlarge + minSize: 0 + maxSize: 10 + spot: true + labels: + capacity-type: spot +``` + +Use pod affinity to prefer spot: + +```yaml values.yaml +lightningAsr: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: capacity-type + operator: In + values: + - spot +``` + +## Monitoring GPU Nodes + +### View GPU Node Status + +```bash +kubectl get nodes -l nvidia.com/gpu=true +``` + +### Check GPU Allocation + +```bash +kubectl describe nodes -l nvidia.com/gpu=true | grep -A 5 "Allocated resources" +``` + +### GPU Utilization + +Using NVIDIA SMI: + +```bash +kubectl run nvidia-smi --rm -it --restart=Never \ + --image=nvidia/cuda:11.8.0-base-ubuntu22.04 \ + --overrides='{"spec":{"nodeSelector":{"nvidia.com/gpu":"true"},"tolerations":[{"key":"nvidia.com/gpu","operator":"Exists"}]}}' \ + -- nvidia-smi +``` + +## Troubleshooting + +### GPU Not Detected + +**Check NVIDIA device plugin**: + +```bash +kubectl get pods -n kube-system | grep nvidia +kubectl logs -n kube-system -l name=nvidia-device-plugin +``` + +**Verify driver on node**: + +```bash +kubectl debug node/ -it --image=ubuntu +apt-get update && apt-get install -y nvidia-utils +nvidia-smi +``` + +### Pods Not Scheduling on GPU Nodes + +**Check tolerations**: + +```bash +kubectl describe pod | grep -A 5 Tolerations +``` + +**Check node selector**: + +```bash +kubectl get pod -o jsonpath='{.spec.nodeSelector}' +``` + +**Check node taints**: + +```bash +kubectl describe node | grep Taints +``` + +### GPU Out of Memory + +**Check pod resource limits**: + +```bash +kubectl describe pod | grep -A 5 Limits +``` + +**Monitor GPU memory**: + +```bash +kubectl exec -- nvidia-smi +``` + +## Best Practices + + + + Always use taints and tolerations to prevent non-GPU workloads from running on GPU nodes: + + ```yaml + taints: + - key: nvidia.com/gpu + value: "true" + effect: NoSchedule + ``` + + + + Always specify GPU resource requests and limits: + + ```yaml + resources: + limits: + nvidia.com/gpu: 1 + requests: + nvidia.com/gpu: 1 + ``` + + + + Configure auto-scaling to scale GPU nodes to zero during off-hours: + + ```yaml + minSize: 0 + maxSize: 10 + ``` + + + + Use DCGM exporter and Grafana to monitor GPU metrics: + + - GPU utilization + - Memory usage + - Temperature + - Power consumption + + + + Regularly test your application's response to spot interruptions: + + ```bash + kubectl drain --ignore-daemonsets --delete-emptydir-data + ``` + + + +## What's Next? + + + + Set up shared storage for model caching + + + + Configure pod autoscaling based on metrics + + + + Enable automatic node scaling + + + + Set up Grafana dashboards + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/iam-irsa.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/iam-irsa.mdx new file mode 100644 index 0000000..b4d3f8b --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/aws/iam-irsa.mdx @@ -0,0 +1,567 @@ +--- +title: IAM & IRSA +description: Configure IAM Roles for Service Accounts in EKS +--- + +## Overview + +IAM Roles for Service Accounts (IRSA) allows Kubernetes service accounts to assume AWS IAM roles, enabling secure access to AWS services without storing credentials in the cluster. + +This guide covers setting up IRSA for: +- Cluster Autoscaler +- EFS CSI Driver +- EBS CSI Driver + +## Prerequisites + + + + Your EKS cluster must have an OIDC provider enabled + + Check if enabled: + ```bash + aws eks describe-cluster \ + --name smallest-cluster \ + --region us-east-1 \ + --query "cluster.identity.oidc.issuer" \ + --output text + ``` + + + + ```bash + eksctl utils associate-iam-oidc-provider \ + --cluster smallest-cluster \ + --region us-east-1 \ + --approve + ``` + + + +## Cluster Autoscaler IRSA + +The Cluster Autoscaler needs permissions to modify Auto Scaling Groups. + +### Create IAM Policy + +Create a policy document: + +```json cluster-autoscaler-policy.json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "autoscaling:DescribeAutoScalingGroups", + "autoscaling:DescribeAutoScalingInstances", + "autoscaling:DescribeLaunchConfigurations", + "autoscaling:DescribeScalingActivities", + "autoscaling:DescribeTags", + "ec2:DescribeInstanceTypes", + "ec2:DescribeLaunchTemplateVersions" + ], + "Resource": ["*"] + }, + { + "Effect": "Allow", + "Action": [ + "autoscaling:SetDesiredCapacity", + "autoscaling:TerminateInstanceInAutoScalingGroup", + "ec2:DescribeImages", + "ec2:GetInstanceTypesFromInstanceRequirements", + "eks:DescribeNodegroup" + ], + "Resource": ["*"] + } + ] +} +``` + +Create the policy: + +```bash +aws iam create-policy \ + --policy-name AmazonEKSClusterAutoscalerPolicy \ + --policy-document file://cluster-autoscaler-policy.json +``` + +Note the policy ARN from the output. + +### Create Service Account with IAM Role + +Using eksctl: + +```bash +eksctl create iamserviceaccount \ + --cluster=smallest-cluster \ + --region=us-east-1 \ + --namespace=kube-system \ + --name=cluster-autoscaler \ + --attach-policy-arn=arn:aws:iam::YOUR_ACCOUNT_ID:policy/AmazonEKSClusterAutoscalerPolicy \ + --override-existing-serviceaccounts \ + --approve +``` + +Replace `YOUR_ACCOUNT_ID` with your AWS account ID. + +### Verify Service Account + +```bash +kubectl describe sa cluster-autoscaler -n kube-system +``` + +Look for the annotation: +``` +Annotations: eks.amazonaws.com/role-arn: arn:aws:iam::YOUR_ACCOUNT_ID:role/eksctl-smallest-cluster-addon-iamserviceaccount-... +``` + +### Update Helm Values + +Configure the Cluster Autoscaler to use this service account: + +```yaml values.yaml +cluster-autoscaler: + enabled: true + rbac: + serviceAccount: + name: cluster-autoscaler + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::YOUR_ACCOUNT_ID:role/eksctl-smallest-cluster-addon-iamserviceaccount-... + autoDiscovery: + clusterName: smallest-cluster + awsRegion: us-east-1 +``` + +## EFS CSI Driver IRSA + +Required for shared file storage (model caching). + +### Create IAM Policy + +Download the policy: + +```bash +curl -o efs-iam-policy.json https://raw.githubusercontent.com/kubernetes-sigs/aws-efs-csi-driver/master/docs/iam-policy-example.json +``` + +Create the policy: + +```bash +aws iam create-policy \ + --policy-name AmazonEKS_EFS_CSI_Driver_Policy \ + --policy-document file://efs-iam-policy.json +``` + +### Create Service Account with IAM Role + +```bash +eksctl create iamserviceaccount \ + --cluster=smallest-cluster \ + --region=us-east-1 \ + --namespace=kube-system \ + --name=efs-csi-controller-sa \ + --attach-policy-arn=arn:aws:iam::YOUR_ACCOUNT_ID:policy/AmazonEKS_EFS_CSI_Driver_Policy \ + --override-existing-serviceaccounts \ + --approve +``` + +### Install EFS CSI Driver + +```bash +kubectl apply -k "github.com/kubernetes-sigs/aws-efs-csi-driver/deploy/kubernetes/overlays/stable/?ref=release-1.7" +``` + +Update the deployment to use the service account: + +```bash +kubectl patch deployment efs-csi-controller \ + -n kube-system \ + -p '{"spec":{"template":{"spec":{"serviceAccountName":"efs-csi-controller-sa"}}}}' +``` + +### Verify + +```bash +kubectl get pods -n kube-system -l app=efs-csi-controller +kubectl describe sa efs-csi-controller-sa -n kube-system +``` + +## EBS CSI Driver IRSA + +Required for block storage (PersistentVolumes). + +### Create IAM Policy + +The policy is available from AWS: + +```bash +aws iam create-policy \ + --policy-name AmazonEKS_EBS_CSI_Driver_Policy \ + --policy-document '{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:CreateSnapshot", + "ec2:AttachVolume", + "ec2:DetachVolume", + "ec2:ModifyVolume", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeInstances", + "ec2:DescribeSnapshots", + "ec2:DescribeTags", + "ec2:DescribeVolumes", + "ec2:DescribeVolumesModifications" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateTags" + ], + "Resource": [ + "arn:aws:ec2:*:*:volume/*", + "arn:aws:ec2:*:*:snapshot/*" + ], + "Condition": { + "StringEquals": { + "ec2:CreateAction": [ + "CreateVolume", + "CreateSnapshot" + ] + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DeleteTags" + ], + "Resource": [ + "arn:aws:ec2:*:*:volume/*", + "arn:aws:ec2:*:*:snapshot/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateVolume" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "aws:RequestTag/ebs.csi.aws.com/cluster": "true" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:CreateVolume" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "aws:RequestTag/CSIVolumeName": "*" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DeleteVolume" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/ebs.csi.aws.com/cluster": "true" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DeleteVolume" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/CSIVolumeName": "*" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DeleteVolume" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/kubernetes.io/created-for/pvc/name": "*" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DeleteSnapshot" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/CSIVolumeSnapshotName": "*" + } + } + }, + { + "Effect": "Allow", + "Action": [ + "ec2:DeleteSnapshot" + ], + "Resource": "*", + "Condition": { + "StringLike": { + "ec2:ResourceTag/ebs.csi.aws.com/cluster": "true" + } + } + } + ] + }' +``` + +### Create Service Account with IAM Role + +```bash +eksctl create iamserviceaccount \ + --cluster=smallest-cluster \ + --region=us-east-1 \ + --namespace=kube-system \ + --name=ebs-csi-controller-sa \ + --attach-policy-arn=arn:aws:iam::YOUR_ACCOUNT_ID:policy/AmazonEKS_EBS_CSI_Driver_Policy \ + --override-existing-serviceaccounts \ + --approve +``` + +### Install EBS CSI Driver Addon + +```bash +eksctl create addon \ + --cluster smallest-cluster \ + --region us-east-1 \ + --name aws-ebs-csi-driver \ + --service-account-role-arn arn:aws:iam::YOUR_ACCOUNT_ID:role/eksctl-smallest-cluster-addon-iamserviceaccount-... +``` + +## Verify IRSA Configuration + +### Check Service Accounts + +List all service accounts with IAM role annotations: + +```bash +kubectl get sa -A -o jsonpath='{range .items[?(@.metadata.annotations.eks\.amazonaws\.com/role-arn)]}{.metadata.namespace}{"\t"}{.metadata.name}{"\t"}{.metadata.annotations.eks\.amazonaws\.com/role-arn}{"\n"}{end}' +``` + +### Test IAM Role Assumption + +Create a test pod: + +```yaml test-pod.yaml +apiVersion: v1 +kind: Pod +metadata: + name: test-irsa + namespace: kube-system +spec: + serviceAccountName: cluster-autoscaler + containers: + - name: aws-cli + image: amazon/aws-cli + command: ['sleep', '3600'] +``` + +Apply and exec: + +```bash +kubectl apply -f test-pod.yaml +kubectl exec -it test-irsa -n kube-system -- aws sts get-caller-identity +``` + +Should show the assumed role ARN. + +## Troubleshooting + +### Role Not Assumed + +**Check service account annotation**: + +```bash +kubectl describe sa -n +``` + +Should show: +``` +Annotations: eks.amazonaws.com/role-arn: arn:aws:iam::... +``` + +### Permission Denied + +**Verify IAM policy**: + +```bash +aws iam get-policy-version \ + --policy-arn arn:aws:iam::YOUR_ACCOUNT_ID:policy/PolicyName \ + --version-id v1 +``` + +Check trust relationship: + +```bash +aws iam get-role --role-name RoleName +``` + +Should include trust policy for OIDC provider. + +### OIDC Provider Issues + +**Verify OIDC provider exists**: + +```bash +aws iam list-open-id-connect-providers +``` + +**Re-associate if needed**: + +```bash +eksctl utils associate-iam-oidc-provider \ + --cluster smallest-cluster \ + --region us-east-1 \ + --approve +``` + +## Best Practices + + + + Grant only the minimum permissions required for each service account. + + Review and audit IAM policies regularly. + + + + Create separate IAM roles for each service account rather than sharing roles. + + This improves security and auditability. + + + + Monitor IAM role usage via CloudTrail: + + ```bash + aws cloudtrail lookup-events \ + --lookup-attributes AttributeKey=ResourceName,AttributeValue=role-name + ``` + + + + Tag IAM roles and policies for easier management: + + ```bash + aws iam tag-role \ + --role-name role-name \ + --tags Key=Environment,Value=production Key=Application,Value=smallest-self-host + ``` + + + +## Complete Example + +Here's a complete script to set up all IRSA roles: + +```bash setup-irsa.sh +#!/bin/bash + +CLUSTER_NAME="smallest-cluster" +REGION="us-east-1" +ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) + +echo "Setting up IRSA for cluster: $CLUSTER_NAME" +echo "AWS Account: $ACCOUNT_ID" +echo "Region: $REGION" + +eksctl utils associate-iam-oidc-provider \ + --cluster $CLUSTER_NAME \ + --region $REGION \ + --approve + +echo "Creating Cluster Autoscaler policy..." +cat > cluster-autoscaler-policy.json < + + Optimize GPU node configuration + + + + Configure cluster autoscaling + +
+ diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx new file mode 100644 index 0000000..81b7177 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx @@ -0,0 +1,508 @@ +--- +title: Kubernetes Troubleshooting +description: Debug common issues in Kubernetes deployments +--- + +## Overview + +This guide covers common issues encountered when deploying Smallest Self-Host on Kubernetes and how to resolve them. + +## Diagnostic Commands + +### Quick Status Check + +```bash +kubectl get all -n smallest +kubectl get pods -n smallest --show-labels +kubectl top pods -n smallest +kubectl top nodes +``` + +### Detailed Pod Information + +```bash +kubectl describe pod -n smallest +kubectl logs -n smallest +kubectl logs -n smallest --previous +kubectl logs -c -n smallest -f +``` + +### Events + +```bash +kubectl get events -n smallest --sort-by='.lastTimestamp' +kubectl get events -n smallest --field-selector type=Warning +``` + +## Common Issues + +### Pods Stuck in Pending + +**Symptoms**: +``` +NAME READY STATUS RESTARTS AGE +lightning-asr-xxx 0/1 Pending 0 5m +``` + +**Causes and Solutions**: + + + + **Check**: + ```bash + kubectl describe pod lightning-asr-xxx -n smallest + ``` + + Look for: `0/3 nodes are available: 3 Insufficient nvidia.com/gpu` + + **Solutions**: + - Add GPU nodes to cluster + - Check GPU nodes are ready: `kubectl get nodes -l nvidia.com/gpu=true` + - Verify GPU device plugin: `kubectl get pods -n kube-system -l name=nvidia-device-plugin` + - Reduce requested GPUs or add more nodes + + + + **Check**: + ```bash + kubectl get nodes --show-labels + kubectl describe pod lightning-asr-xxx -n smallest | grep "Node-Selectors" + ``` + + **Solutions**: + - Update nodeSelector in values.yaml to match actual node labels + - Remove nodeSelector if not needed + - Add labels to nodes: `kubectl label nodes workload=gpu` + + + + **Check**: + ```bash + kubectl describe pod lightning-asr-xxx -n smallest | grep -A5 "Tolerations" + kubectl describe node | grep "Taints" + ``` + + **Solutions**: + Update tolerations in values.yaml: + ```yaml + lightningAsr: + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + ``` + + + + **Check**: + ```bash + kubectl get pvc -n smallest + ``` + + Look for: `STATUS: Pending` + + **Solutions**: + - Check storage class exists: `kubectl get storageclass` + - Verify sufficient storage: `kubectl describe pvc -n smallest` + - Check EFS/EBS CSI driver running: `kubectl get pods -n kube-system -l app=efs-csi-controller` + + + +### ImagePullBackOff + +**Symptoms**: +``` +NAME READY STATUS RESTARTS AGE +lightning-asr-xxx 0/1 ImagePullBackOff 0 2m +``` + +**Diagnosis**: + +```bash +kubectl describe pod lightning-asr-xxx -n smallest +``` + +Look for errors in Events section. + +**Solutions**: + + + + **Error**: `unauthorized: authentication required` + + **Solutions**: + - Verify imageCredentials in values.yaml + - Check secret created: `kubectl get secrets -n smallest | grep registry` + - Test credentials locally: `docker login quay.io` + - Recreate secret: + ```bash + kubectl delete secret -n smallest + helm upgrade smallest-self-host ... -f values.yaml + ``` + + + + **Error**: `manifest unknown` or `not found` + + **Solutions**: + - Verify image name in values.yaml + - Check image exists: `docker pull quay.io/smallestinc/lightning-asr:latest` + - Contact support@smallest.ai for access + + + + **Error**: `rate limit exceeded` + + **Solutions**: + - Wait and retry + - Use authenticated pulls (imageCredentials) + + + +### CrashLoopBackOff + +**Symptoms**: +``` +NAME READY STATUS RESTARTS AGE +lightning-asr-xxx 0/1 CrashLoopBackOff 5 5m +``` + +**Diagnosis**: + +```bash +kubectl logs lightning-asr-xxx -n smallest +kubectl logs lightning-asr-xxx -n smallest --previous +kubectl describe pod lightning-asr-xxx -n smallest +``` + +**Common Causes**: + + + + **Error**: `License validation failed` or `Invalid license key` + + **Solutions**: + - Check License Proxy is running: `kubectl get pods -l app=license-proxy -n smallest` + - Verify license key in values.yaml + - Check License Proxy logs: `kubectl logs -l app=license-proxy -n smallest` + - Test License Proxy: `kubectl exec -it -- curl http://license-proxy:3369/health` + + + + **Error**: `Failed to download model` or `Connection timeout` + + **Solutions**: + - Verify MODEL_URL in values.yaml + - Check network connectivity + - Check disk space: `kubectl exec -it -- df -h` + - Test URL: `kubectl run test --rm -it --image=curlimages/curl -- curl -I $MODEL_URL` + + + + **Error**: Pod killed, exit code 137 + + **Solutions**: + - Check memory limits: + ```bash + kubectl describe pod lightning-asr-xxx -n smallest | grep -A5 Limits + ``` + - Increase memory: + ```yaml + lightningAsr: + resources: + limits: + memory: 16Gi + ``` + - Check node capacity: `kubectl describe node ` + + + + **Error**: `No CUDA-capable device` or `GPU not found` + + **Solutions**: + - Verify GPU available on node: `kubectl describe node | grep nvidia.com/gpu` + - Check NVIDIA device plugin: `kubectl get pods -n kube-system -l name=nvidia-device-plugin` + - Restart device plugin: `kubectl delete pod -n kube-system -l name=nvidia-device-plugin` + - Verify GPU driver on node + + + +### Service Not Accessible + +**Symptoms**: +- Cannot connect to API server +- Connection refused errors +- Timeouts + +**Diagnosis**: + +```bash +kubectl get svc -n smallest +kubectl describe svc api-server -n smallest +kubectl get endpoints -n smallest +``` + +**Solutions**: + + + + **Issue**: Service has no endpoints + + **Check**: + ```bash + kubectl get endpoints api-server -n smallest + ``` + + **Solutions**: + - Verify pods are running: `kubectl get pods -l app=api-server -n smallest` + - Check pod labels match service selector + - Check pods are ready: `kubectl get pods -l app=api-server -o wide` + + + + **Solutions**: + - Verify service port: + ```bash + kubectl get svc api-server -n smallest -o yaml + ``` + - Use correct port in connections (7100 for API Server) + + + + **Check**: + ```bash + kubectl get networkpolicy -n smallest + ``` + + **Solutions**: + - Review network policies + - Temporarily disable to test: + ```bash + kubectl delete networkpolicy -n smallest + ``` + + + +### HPA Not Scaling + +**Symptoms**: +- HPA shows `` for metrics +- Pods not scaling despite high load + +**Diagnosis**: + +```bash +kubectl get hpa -n smallest +kubectl describe hpa lightning-asr -n smallest +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . +``` + +**Solutions**: + + + + **Check**: + ```bash + kubectl get servicemonitor -n smallest + kubectl logs -n kube-system -l app.kubernetes.io/name=prometheus-adapter + ``` + + **Solutions**: + - Enable ServiceMonitor: + ```yaml + scaling: + auto: + lightningAsr: + servicemonitor: + enabled: true + ``` + - Verify Prometheus is scraping: + ```bash + kubectl port-forward svc/smallest-prometheus-stack-prometheus 9090:9090 + ``` + Query: `asr_active_requests` + + + + **Check**: + ```bash + kubectl get hpa lightning-asr -n smallest + ``` + + **Solutions**: + - Increase maxReplicas: + ```yaml + scaling: + auto: + lightningAsr: + hpa: + maxReplicas: 20 + ``` + + + + **Solutions**: + - Add more nodes + - Enable Cluster Autoscaler + - Check pending pods: `kubectl get pods --field-selector=status.phase=Pending` + + + +### Persistent Volume Issues + +**Symptoms**: +- PVC stuck in Pending +- Mount failures +- Permission denied + +**Solutions**: + + + + **Check**: + ```bash + kubectl get storageclass + ``` + + **Solutions**: + - Install EBS CSI driver (AWS) + - Install EFS CSI driver (AWS) + - Create storage class + + + + **Check**: + ```bash + kubectl describe pod | grep -A10 "Events" + ``` + + **Solutions**: + - Verify EFS file system ID + - Check security group allows NFS (port 2049) + - Verify EFS CSI driver: `kubectl get pods -n kube-system -l app=efs-csi-controller` + + + + **Solutions**: + - Check volume permissions + - Add fsGroup to pod securityContext: + ```yaml + securityContext: + fsGroup: 1000 + ``` + + + +## Performance Issues + +### Slow Response Times + +**Check**: + +```bash +kubectl top pods -n smallest +kubectl top nodes +kubectl logs -l app=lightning-asr -n smallest | grep -i "latency\|duration" +``` + +**Solutions**: + +- Increase pod resources +- Scale up replicas +- Check GPU utilization: `kubectl exec -it -- nvidia-smi` +- Review model configuration +- Check network latency + +### High CPU/Memory Usage + +**Check**: + +```bash +kubectl top pods -n smallest +kubectl describe pod -n smallest | grep -A5 "Limits" +``` + +**Solutions**: + +- Increase resource limits +- Scale horizontally (more pods) +- Investigate memory leaks in logs +- Enable monitoring with Grafana + +## Debugging Tools + +### Interactive Shell + +```bash +kubectl exec -it -n smallest -- /bin/sh +``` + +### Debug Container + +```bash +kubectl debug -n smallest -it --image=ubuntu -- bash +``` + +### Network Debugging + +```bash +kubectl run netdebug --rm -it --restart=Never \ + --image=nicolaka/netshoot \ + --namespace=smallest +``` + +Inside the debug pod: +```bash +nslookup api-server +curl http://api-server:7100/health +traceroute lightning-asr +``` + +### Copy Files + +```bash +kubectl cp :/path/to/file ./local-file -n smallest +kubectl cp ./local-file :/path/to/file -n smallest +``` + +## Getting Help + +### Collect Diagnostic Information + +Before contacting support, collect: + +```bash +kubectl get all -n smallest > status.txt +kubectl describe pods -n smallest > pods.txt +kubectl logs -l app=lightning-asr -n smallest --tail=500 > asr-logs.txt +kubectl logs -l app=api-server -n smallest --tail=500 > api-logs.txt +kubectl logs -l app=license-proxy -n smallest --tail=500 > license-logs.txt +kubectl get events -n smallest --sort-by='.lastTimestamp' > events.txt +kubectl top nodes > nodes.txt +kubectl top pods -n smallest > pod-resources.txt +helm get values smallest-self-host -n smallest > values.txt +``` + +### Contact Support + +Email: **support@smallest.ai** + +Include: +- Description of the issue +- Steps to reproduce +- Diagnostic files collected above +- Cluster information (EKS version, node types, etc.) +- Helm chart version + +## What's Next? + + + + Platform-agnostic troubleshooting guide + + + + API integration documentation + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/credentials.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/credentials.mdx new file mode 100644 index 0000000..cf7aad7 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/credentials.mdx @@ -0,0 +1,85 @@ +--- +title: Credentials & Access +description: License keys and registry credentials for Kubernetes STT deployment +--- + +## Required Credentials + +Obtain the following from Smallest.ai before installation: + + + + Your unique license key for validation + + **Contact**: support@smallest.ai + + You'll add this to `values.yaml`: + ```yaml + global: + licenseKey: "your-license-key-here" + ``` + + + + Credentials to pull Docker images from `quay.io`: + - Username + - Password + - Email + + **Contact**: support@smallest.ai + + You'll add these to `values.yaml`: + ```yaml + global: + imageCredentials: + username: "your-username" + password: "your-password" + email: "your-email" + ``` + + + + Download URL for ASR models + + **Contact**: support@smallest.ai + + You'll add this to `values.yaml`: + ```yaml + models: + asrModelUrl: "your-model-url" + ``` + + + +## Create Kubernetes Secret + +Alternatively, create a secret for registry credentials: + +```bash +kubectl create secret docker-registry smallest-registry \ + --docker-server=quay.io \ + --docker-username= \ + --docker-password= \ + --docker-email= \ + -n smallest +``` + +## Namespace Setup + + + + Deploy to the default namespace: + ```bash + kubectl config set-context --current --namespace=default + ``` + + + + Create and use a dedicated namespace: + ```bash + kubectl create namespace smallest + kubectl config set-context --current --namespace=smallest + ``` + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/hardware-requirements.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/hardware-requirements.mdx new file mode 100644 index 0000000..c3f8015 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/hardware-requirements.mdx @@ -0,0 +1,84 @@ +--- +title: Hardware Requirements +description: Cluster and hardware specifications for Kubernetes STT deployment +--- + +## Cluster Requirements + + + + **v1.19 or higher** + + v1.24+ recommended + + + + **Minimum 2 nodes** + + - 1 CPU node (control plane/general) + - 1 GPU node (Lightning ASR) + + + + **Minimum cluster capacity** + + - 8 CPU cores + - 32 GB RAM + - 1 NVIDIA GPU + + + + **Persistent volume support** + + - Storage class available + - 100 GB minimum capacity + + + + +We recommend using L4 or L40s GPUs for the best performance. + + +## Network Requirements + +Ensure the following ports are accessible within the cluster: + +| Port | Service | Purpose | +|------|---------|---------| +| 7100 | API Server | Client API requests | +| 2269 | Lightning ASR | Internal ASR processing | +| 3369 | License Proxy | Internal license validation | +| 6379 | Redis | Internal caching | + +### External Access + +The License Proxy requires outbound HTTPS access to: +- `api.smallest.ai` (port 443) + + +Ensure your cluster's network policies and security groups allow outbound HTTPS traffic from pods. + + +## Storage Requirements + +### Storage Class + +Verify a storage class is available: + +```bash +kubectl get storageclass +``` + +You should see at least one storage class marked as `(default)` or available. + +### For AWS Deployments + +If deploying on AWS EKS, you'll need: + +- **EBS CSI Driver** for block storage +- **EFS CSI Driver** for shared file storage (recommended for model storage) + + +See the [AWS EKS Setup](/waves/self-host/kubernetes-setup/aws/eks-setup) guide for detailed setup instructions. + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/software-requirements.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/software-requirements.mdx new file mode 100644 index 0000000..f351d3d --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/software-requirements.mdx @@ -0,0 +1,146 @@ +--- +title: Software Requirements +description: Tools and software for Kubernetes STT deployment +--- + +## Required Tools + +Install the following tools on your local machine. + +### Helm + +Helm 3.0 or higher is required. + + + + ```bash + brew install helm + ``` + + + + ```bash + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + ``` + + + + ```powershell + choco install kubernetes-helm + ``` + + + +Verify installation: +```bash +helm version +``` + +### kubectl + +Kubernetes CLI tool for cluster management. + + + + ```bash + brew install kubectl + ``` + + + + ```bash + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + sudo mv kubectl /usr/local/bin/ + ``` + + + + ```powershell + choco install kubernetes-cli + ``` + + + +Verify installation: +```bash +kubectl version --client +``` + +## Cluster Access + +### Configure kubectl + +Ensure kubectl is configured to access your cluster: + +```bash +kubectl cluster-info +kubectl get nodes +``` + +Expected output should show your cluster nodes. + +### Test Cluster Access + +Verify you have sufficient permissions: + +```bash +kubectl auth can-i create deployments +kubectl auth can-i create services +kubectl auth can-i create secrets +``` + +All should return `yes`. + +## GPU Support + +### NVIDIA GPU Operator + +For Kubernetes clusters, install the NVIDIA GPU Operator to manage GPU resources. + + +The Smallest Self-Host Helm chart includes the GPU Operator as an optional dependency. You can enable it during installation or install it separately. + + +#### Verify GPU Nodes + +Check that GPU nodes are properly labeled: + +```bash +kubectl get nodes -l node.kubernetes.io/instance-type +``` + +Verify GPU resources are available: + +```bash +kubectl get nodes -o json | jq '.items[].status.capacity' +``` + +Look for `nvidia.com/gpu` in the capacity. + +## Optional Components + +### Prometheus & Grafana + +For monitoring and autoscaling based on custom metrics: + +- **Prometheus Operator** (included in chart) +- **Grafana** (included in chart) +- **Prometheus Adapter** (included in chart) + +These are required for: +- Custom metrics-based autoscaling +- Advanced monitoring dashboards +- Performance visualization + +### Cluster Autoscaler + +For automatic node scaling on AWS EKS: + +- IAM role with autoscaling permissions +- IRSA (IAM Roles for Service Accounts) configured + + +See the [Cluster Autoscaler](/waves/self-host/kubernetes-setup/autoscaling/cluster-autoscaler) guide for setup. + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/verification.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/verification.mdx new file mode 100644 index 0000000..4677cda --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/prerequisites/verification.mdx @@ -0,0 +1,98 @@ +--- +title: Verification Checklist +description: Verify all prerequisites before deploying STT on Kubernetes +--- + +## Pre-Deployment Checklist + +Before proceeding, ensure each item passes: + + + + ```bash + kubectl get nodes + ``` + Shows all cluster nodes in Ready state + + + + ```bash + kubectl get nodes -o json | jq '.items[].status.capacity."nvidia.com/gpu"' + ``` + Shows GPU count for GPU nodes + + + + ```bash + helm version + ``` + Shows Helm 3.x + + + + ```bash + kubectl get storageclass + ``` + Shows at least one storage class + + + + - [ ] License key obtained + - [ ] Container registry credentials + - [ ] Model download URL + + + + ```bash + kubectl top nodes + ``` + Shows available resources for deployment + + + + ```bash + kubectl auth can-i create deployments + kubectl auth can-i create services + kubectl auth can-i create secrets + ``` + All return `yes` + + + +## Quick Verification Script + +Run this script to check all prerequisites at once: + +```bash +#!/bin/bash +echo "=== Kubernetes STT Prerequisites Check ===" + +echo -n "kubectl: " +kubectl version --client &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Helm: " +helm version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Cluster Access: " +kubectl cluster-info &>/dev/null && echo "OK" || echo "FAILED" + +echo -n "Nodes Ready: " +kubectl get nodes | grep -q "Ready" && echo "OK" || echo "FAILED" + +echo -n "Storage Class: " +kubectl get storageclass &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "GPU Resources: " +kubectl get nodes -o json | jq -e '.items[].status.capacity."nvidia.com/gpu"' &>/dev/null && echo "OK" || echo "NOT DETECTED" + +echo "=== Check Complete ===" +``` + +## AWS-Specific Prerequisites + +If deploying on AWS EKS, see: + + + Complete guide for setting up EKS cluster with GPU support + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/quick-start.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/quick-start.mdx new file mode 100644 index 0000000..899b4b2 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/quick-start.mdx @@ -0,0 +1,318 @@ +--- +title: Quick Start +description: Deploy Smallest Self-Host on Kubernetes with Helm +--- + + +Kubernetes deployment is currently available for **ASR (Speech-to-Text)** only. For TTS deployments, use [Docker](/waves/self-host/docker-setup/tts-deployment/quick-start). + + + +Ensure you've completed all [prerequisites](/waves/self-host/kubernetes-setup/prerequisites/hardware-requirements) before starting. + + +## Add Helm Repository + +```bash +helm repo add smallest-self-host https://smallest-inc.github.io/smallest-self-host +helm repo update +``` + +## Create Namespace + +```bash +kubectl create namespace smallest +kubectl config set-context --current --namespace=smallest +``` + +## Configure Values + +Create a `values.yaml` file: + +```yaml values.yaml +global: + licenseKey: "your-license-key-here" + imageCredentials: + create: true + registry: quay.io + username: "your-registry-username" + password: "your-registry-password" + email: "your-email@example.com" + +models: + asrModelUrl: "your-model-url-here" + +scaling: + replicas: + lightningAsr: 1 + licenseProxy: 1 + +lightningAsr: + nodeSelector: + tolerations: + +redis: + enabled: true + auth: + enabled: true +``` + + +Replace placeholder values with credentials provided by Smallest.ai support. + + +## Install + +```bash +helm install smallest-self-host smallest-self-host/smallest-self-host \ + -f values.yaml \ + --namespace smallest +``` + +Monitor the deployment: + +```bash +kubectl get pods -w +``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ComponentStartup TimeReady Indicator
Redis~30s1/1 Running
License Proxy~1m1/1 Running
Lightning ASR2-10m1/1 Running (model download on first run)
API Server~30s1/1 Running
+ + +Model downloads are cached when using shared storage (EFS). Subsequent starts complete in under a minute. + + +## Verify Installation + +```bash +kubectl get pods,svc +``` + +All pods should show `Running` status with the following services available: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ServicePortDescription
api-server7100REST API endpoint
lightning-asr-internal2269ASR inference service
license-proxy3369License validation
redis-master6379Request queue
+ +## Test the API + +Port forward and send a health check: + +```bash +kubectl port-forward svc/api-server 7100:7100 +``` + +```bash +curl http://localhost:7100/health +``` + +## Autoscaling + +Enable automatic scaling based on real-time inference load: + +```yaml values.yaml +scaling: + auto: + enabled: true +``` + +This deploys HorizontalPodAutoscalers that scale based on active requests: + + + + + + + + + + + + + + + + + + + + + + + + +
ComponentMetricDefault TargetBehavior
Lightning ASRasr_active_requests4 per podScales GPU workers based on inference queue depth
API Serverlightning_asr_replica_count2:1 ratioMaintains API capacity proportional to ASR workers
+ +### How It Works + +1. **Lightning ASR** exposes `asr_active_requests` metric on port 9090 +2. **Prometheus** scrapes this metric via ServiceMonitor +3. **Prometheus Adapter** makes it available to the Kubernetes metrics API +4. **HPA** scales pods when average requests per pod exceeds target + +### Configuration + +```yaml values.yaml +scaling: + auto: + enabled: true + lightningAsr: + hpa: + minReplicas: 1 + maxReplicas: 10 + targetActiveRequests: 4 +``` + +### Verify Autoscaling + +```bash +kubectl get hpa +``` + +``` +NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS +lightning-asr Deployment/lightning-asr 0/4 1 10 1 +api-server Deployment/api-server 1/2 1 10 1 +``` + +The `TARGETS` column shows `current/target`. When current exceeds target, pods scale up. + + +Autoscaling requires the Prometheus stack. It's included as a dependency and enabled by default. + + +## Helm Operations + + + +```bash Upgrade +helm upgrade smallest-self-host smallest-self-host/smallest-self-host \ + -f values.yaml -n smallest +``` + +```bash Rollback +helm rollback smallest-self-host -n smallest +``` + +```bash Uninstall +helm uninstall smallest-self-host -n smallest +``` + +```bash View Config +helm get values smallest-self-host -n smallest +``` + + + +## Troubleshooting + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
IssueCauseResolution
Pods PendingInsufficient resources or missing GPU nodesCheck kubectl describe pod <name> for scheduling errors
ImagePullBackOffInvalid registry credentialsVerify imageCredentials in values.yaml
CrashLoopBackOffInvalid license or insufficient memoryCheck logs with kubectl logs <pod> --previous
Slow model downloadLarge model size (~20GB)Use shared storage (EFS) for caching
+ +For detailed troubleshooting, see [Troubleshooting Guide](/waves/self-host/kubernetes-setup/k8s-troubleshooting). + +## Next Steps + + + + EKS-specific configuration + + + + Shared storage for faster cold starts + + + + Fine-tune scaling behavior and thresholds + + + + Grafana dashboards and alerting + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/efs-configuration.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/efs-configuration.mdx new file mode 100644 index 0000000..713084b --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/efs-configuration.mdx @@ -0,0 +1,470 @@ +--- +title: EFS Configuration +description: Set up Amazon EFS for shared storage in AWS EKS +--- + +## Overview + +Amazon Elastic File System (EFS) provides shared, persistent file storage for Kubernetes pods. This is ideal for storing AI models that can be shared across multiple Lightning ASR pods, eliminating duplicate downloads and reducing startup time. + +## Benefits of EFS + + + + Multiple pods can read/write simultaneously (ReadWriteMany) + + + + Storage grows and shrinks automatically + + + + Models cached once, used by all pods + + + + Pay only for storage used, no upfront provisioning + + + +## Prerequisites + + + + Install the EFS CSI driver (see [IAM & IRSA](/waves/self-host/kubernetes-setup/aws/iam-irsa) guide) + + ```bash + kubectl get pods -n kube-system -l app=efs-csi-controller + ``` + + + + Note your EKS cluster's VPC ID and subnet IDs: + + ```bash + aws eks describe-cluster \ + --name smallest-cluster \ + --region us-east-1 \ + --query 'cluster.resourcesVpcConfig.{vpcId:vpcId,subnetIds:subnetIds}' + ``` + + + + Note your cluster security group ID: + + ```bash + aws eks describe-cluster \ + --name smallest-cluster \ + --region us-east-1 \ + --query 'cluster.resourcesVpcConfig.clusterSecurityGroupId' + ``` + + + +## Create EFS File System + +### Using AWS Console + + + + Go to AWS Console → EFS → Create file system + + + + - **Name**: `smallest-models` + - **VPC**: Select your EKS cluster VPC + - **Availability and Durability**: Regional (recommended) + - Click "Customize" + + + + - **Performance mode**: General Purpose + - **Throughput mode**: Bursting (or Elastic for production) + - **Encryption**: Enable encryption at rest + - Click "Next" + + + + - Select all subnets where EKS nodes run + - Security group: Select cluster security group + - Click "Next" + + + + Review settings and click "Create" + + Note the **File system ID** (e.g., `fs-0123456789abcdef`) + + + +### Using AWS CLI + +```bash +VPC_ID=$(aws eks describe-cluster \ + --name smallest-cluster \ + --region us-east-1 \ + --query 'cluster.resourcesVpcConfig.vpcId' \ + --output text) + +SG_ID=$(aws eks describe-cluster \ + --name smallest-cluster \ + --region us-east-1 \ + --query 'cluster.resourcesVpcConfig.clusterSecurityGroupId' \ + --output text) + +FILE_SYSTEM_ID=$(aws efs create-file-system \ + --region us-east-1 \ + --performance-mode generalPurpose \ + --throughput-mode bursting \ + --encrypted \ + --tags Key=Name,Value=smallest-models \ + --query 'FileSystemId' \ + --output text) + +echo "Created EFS: $FILE_SYSTEM_ID" + +SUBNET_IDS=$(aws eks describe-cluster \ + --name smallest-cluster \ + --region us-east-1 \ + --query 'cluster.resourcesVpcConfig.subnetIds[*]' \ + --output text) + +for subnet in $SUBNET_IDS; do + aws efs create-mount-target \ + --file-system-id $FILE_SYSTEM_ID \ + --subnet-id $subnet \ + --security-groups $SG_ID \ + --region us-east-1 +done + +echo "EFS File System ID: $FILE_SYSTEM_ID" +``` + +## Configure Security Group + +Ensure the security group allows NFS traffic (port 2049) from cluster nodes: + +```bash +SG_ID=$(aws eks describe-cluster \ + --name smallest-cluster \ + --region us-east-1 \ + --query 'cluster.resourcesVpcConfig.clusterSecurityGroupId' \ + --output text) + +aws ec2 authorize-security-group-ingress \ + --group-id $SG_ID \ + --protocol tcp \ + --port 2049 \ + --source-group $SG_ID \ + --region us-east-1 +``` + + +If the rule already exists, you'll see an error. This is safe to ignore. + + +## Deploy with EFS in Helm + +Update your `values.yaml` to enable EFS: + +```yaml values.yaml +models: + asrModelUrl: "your-model-url-here" + volumes: + aws: + efs: + enabled: true + fileSystemId: "fs-0123456789abcdef" + namePrefix: "models" +``` + +Replace `fs-0123456789abcdef` with your actual EFS file system ID. + +### Deploy or Upgrade + +```bash +helm upgrade --install smallest-self-host smallest-self-host/smallest-self-host \ + -f values.yaml \ + --namespace smallest +``` + +## Verify EFS Configuration + +### Check Storage Class + +```bash +kubectl get storageclass +``` + +Should show: +``` +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE AGE +models-aws-efs-sc efs.csi.aws.com Delete Immediate 1m +``` + +### Check Persistent Volume + +```bash +kubectl get pv +``` + +Should show: +``` +NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM +models-aws-efs-pv 5Gi RWX Retain Bound smallest/models-aws-efs-pvc +``` + +### Check Persistent Volume Claim + +```bash +kubectl get pvc -n smallest +``` + +Should show: +``` +NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE +models-aws-efs-pvc Bound models-aws-efs-pv 5Gi RWX models-aws-efs-sc 1m +``` + +### Verify Mount in Pod + +```bash +kubectl get pods -l app=lightning-asr -n smallest +kubectl exec -it -n smallest -- df -h | grep efs +``` + +Should show the EFS mount: +``` +fs-0123456789abcdef.efs.us-east-1.amazonaws.com:/ 8.0E 0 8.0E 0% /app/models +``` + +## Test EFS + +Create a test file in one pod and verify it's visible in another: + +### Write test file: + +```bash +kubectl exec -it -n smallest -- sh -c "echo 'test' > /app/models/test.txt" +``` + +### Read from another pod: + +```bash +kubectl exec -it -n smallest -- cat /app/models/test.txt +``` + +Should output: `test` + +## How Model Caching Works + +With EFS enabled: + +1. **First Pod Startup**: + - Pod downloads model from `asrModelUrl` + - Saves model to `/app/models` (EFS mount) + - Takes 5-10 minutes (one-time download) + +2. **Subsequent Pod Startups**: + - Pod checks `/app/models` for existing model + - Finds model already downloaded + - Skips download, loads from EFS + - Takes 30-60 seconds + + +This is especially valuable when using autoscaling, as new pods start much faster. + + +## Performance Tuning + +### Choose Throughput Mode + + + + **Best for**: Development, testing, variable workloads + + - Throughput scales with storage size + - 50 MB/s per TB of storage + - Bursting to 100 MB/s + - Most cost-effective + + + + **Best for**: Production with unpredictable load + + - Automatically scales throughput + - Up to 3 GB/s for reads + - Up to 1 GB/s for writes + - Pay for throughput used + + Update via console or CLI: + ```bash + aws efs update-file-system \ + --file-system-id fs-0123456789abcdef \ + --throughput-mode elastic + ``` + + + + **Best for**: Production with consistent high throughput + + - Fixed throughput independent of size + - Up to 1 GB/s throughput + - Higher cost + + ```bash + aws efs update-file-system \ + --file-system-id fs-0123456789abcdef \ + --throughput-mode provisioned \ + --provisioned-throughput-in-mibps 100 + ``` + + + +### Enable Lifecycle Management + +Automatically move infrequently accessed files to lower-cost storage: + +```bash +aws efs put-lifecycle-configuration \ + --file-system-id fs-0123456789abcdef \ + --lifecycle-policies \ + '[{"TransitionToIA":"AFTER_30_DAYS"},{"TransitionToPrimaryStorageClass":"AFTER_1_ACCESS"}]' +``` + +## Cost Optimization + +### Monitor EFS Usage + +```bash +aws efs describe-file-systems \ + --file-system-id fs-0123456789abcdef \ + --query 'FileSystems[0].SizeInBytes' +``` + +### Estimate Costs + +EFS pricing (us-east-1): +- **Standard storage**: ~$0.30/GB/month +- **Infrequent Access**: ~$0.025/GB/month +- **Data transfer**: Free within same AZ + +For 50 GB model: +- Standard: ~$15/month +- With IA (after 30 days): ~$1.25/month + + +Use lifecycle policies to automatically move old models to Infrequent Access storage. + + +## Backup and Recovery + +### Enable AWS Backup + +```bash +aws backup create-backup-plan \ + --backup-plan '{ + "BackupPlanName": "smallest-efs-backup", + "Rules": [{ + "RuleName": "daily-backup", + "TargetBackupVaultName": "Default", + "ScheduleExpression": "cron(0 2 * * ? *)", + "Lifecycle": { + "DeleteAfterDays": 30 + } + }] + }' +``` + +### Manual Backup + +EFS automatically creates point-in-time backups. Access via AWS Console → EFS → Backups. + +## Troubleshooting + +### Mount Failed + +**Check EFS CSI driver**: + +```bash +kubectl get pods -n kube-system -l app=efs-csi-controller +kubectl logs -n kube-system -l app=efs-csi-controller +``` + +**Verify security group rules**: + +```bash +aws ec2 describe-security-groups --group-ids $SG_ID +``` + +Ensure port 2049 is open. + +### Slow Performance + +**Check throughput mode**: + +```bash +aws efs describe-file-systems \ + --file-system-id fs-0123456789abcdef \ + --query 'FileSystems[0].ThroughputMode' +``` + +Consider upgrading to Elastic or Provisioned. + +**Monitor CloudWatch metrics**: +- `PermittedThroughput` +- `BurstCreditBalance` +- `ClientConnections` + +### Permission Denied + +**Check mount options** in PV: + +```bash +kubectl get pv models-aws-efs-pv -o yaml +``` + +Should include: +```yaml +mountOptions: + - tls +``` + +## Alternative: EBS for Single Pod + +If you don't need shared storage (single replica only): + +```yaml values.yaml +models: + volumes: + aws: + efs: + enabled: false + +scaling: + replicas: + lightningAsr: 1 + +lightningAsr: + persistence: + enabled: true + storageClass: gp3 + size: 100Gi +``` + + +EBS volumes can only be attached to one pod at a time. This prevents horizontal scaling. + + +## What's Next? + + + + Optimize model storage and caching strategies + + + + Enable autoscaling with shared model storage + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/model-storage.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/model-storage.mdx new file mode 100644 index 0000000..b378d45 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/model-storage.mdx @@ -0,0 +1,496 @@ +--- +title: Model Storage +description: Optimize model storage and caching strategies for Lightning ASR +--- + +## Overview + +AI models for Lightning ASR are large files (20-30 GB) that significantly impact startup time. This guide covers strategies for efficient model storage and caching to minimize download time and enable fast scaling. + +## Storage Strategies + +### Strategy 1: Shared EFS Volume (Recommended) + +Best for production with autoscaling. + +**Advantages**: +- Models downloaded once, shared across all pods +- New pods start in 30-60 seconds +- No storage duplication +- Enables horizontal scaling + +**Implementation**: + +```yaml values.yaml +models: + asrModelUrl: "https://example.com/model.bin" + volumes: + aws: + efs: + enabled: true + fileSystemId: "fs-0123456789abcdef" + namePrefix: "models" + +scaling: + auto: + enabled: true + lightningAsr: + hpa: + enabled: true + maxReplicas: 10 +``` + +See [EFS Configuration](/waves/self-host/kubernetes-setup/storage-pvc/efs-configuration) for setup. + +### Strategy 2: Container Image with Baked Model + +Best for fixed deployments with infrequent updates. + +**Advantages**: +- Fastest startup (model pre-loaded) +- No external download required +- Works offline + +**Disadvantages**: +- Very large container images (20+ GB) +- Slow image pulls +- Updates require new image build + +**Implementation**: + +Build custom image: + +```dockerfile Dockerfile +FROM quay.io/smallestinc/lightning-asr:latest + +RUN wget -O /app/models/model.bin https://example.com/model.bin + +ENV MODEL_PATH=/app/models/model.bin +``` + +Build and push: + +```bash +docker build -t myregistry/lightning-asr:with-model . +docker push myregistry/lightning-asr:with-model +``` + +Update values: + +```yaml values.yaml +lightningAsr: + image: "myregistry/lightning-asr:with-model" + +models: + asrModelUrl: "" +``` + +### Strategy 3: EmptyDir Volume + +Best for development/testing only. + +**Advantages**: +- Simple configuration +- No external storage required + +**Disadvantages**: +- Model downloaded on every pod start +- Cannot scale beyond single node +- Data lost on pod restart + +**Implementation**: + +```yaml values.yaml +models: + asrModelUrl: "https://example.com/model.bin" + volumes: + aws: + efs: + enabled: false + +lightningAsr: + persistence: + enabled: false +``` + +Each pod downloads the model independently. + +### Strategy 4: Init Container with S3 + +Best for AWS deployments without EFS. + +**Advantages**: +- Fast downloads from S3 within AWS +- No EFS cost +- Works with ReadWriteOnce volumes + +**Disadvantages**: +- Each pod downloads independently +- Slower scaling than EFS +- Requires S3 bucket + +**Implementation**: + +Upload model to S3: + +```bash +aws s3 cp model.bin s3://my-bucket/models/model.bin +``` + +Create custom deployment with init container: + +```yaml +initContainers: + - name: download-model + image: amazon/aws-cli + command: + - sh + - -c + - | + if [ ! -f /models/model.bin ]; then + aws s3 cp s3://my-bucket/models/model.bin /models/model.bin + fi + volumeMounts: + - name: model-cache + mountPath: /models + env: + - name: AWS_REGION + value: us-east-1 +``` + +## Model Download Optimization + +### Parallel Downloads + +For multiple model files, download in parallel: + +```yaml +lightningAsr: + env: + - name: MODEL_DOWNLOAD_WORKERS + value: "4" +``` + +### Resume on Failure + +Enable download resume for interrupted downloads: + +```yaml +lightningAsr: + env: + - name: MODEL_DOWNLOAD_RESUME + value: "true" +``` + +### CDN Acceleration + +Use CloudFront for faster downloads: + +```yaml +models: + asrModelUrl: "https://d111111abcdef8.cloudfront.net/model.bin" +``` + +## Model Versioning + +### Multiple Models + +Support multiple model versions: + +```yaml values.yaml +models: + asrModelUrl: "https://example.com/model-v1.bin" + +lightningAsr: + env: + - name: MODEL_VERSION + value: "v1" + - name: MODEL_CACHE_DIR + value: "/app/models/v1" +``` + +### Blue-Green Deployments + +Deploy new model version alongside old: + +```bash +helm install smallest-v2 smallest-self-host/smallest-self-host \ + -f values.yaml \ + --set models.asrModelUrl="https://example.com/model-v2.bin" \ + --set lightningAsr.namePrefix="lightning-asr-v2" \ + --namespace smallest +``` + +Test v2, then switch traffic: + +```yaml +apiServer: + env: + - name: LIGHTNING_ASR_BASE_URL + value: "http://lightning-asr-v2:2269" +``` + +## Storage Quotas + +### Limit Model Cache Size + +Prevent unbounded growth: + +```yaml +lightningAsr: + persistence: + enabled: true + size: 100Gi + + env: + - name: MODEL_CACHE_MAX_SIZE + value: "50GB" + - name: MODEL_CACHE_EVICTION + value: "lru" +``` + +### Monitor Storage Usage + +Check PVC usage: + +```bash +kubectl get pvc -n smallest +kubectl describe pvc models-aws-efs-pvc -n smallest +``` + +Check actual usage in pod: + +```bash +kubectl exec -it -n smallest -- df -h /app/models +``` + +## Pre-warming Models + +### Pre-download Before Scaling + +Download models before peak traffic: + +```bash +kubectl create job model-preload \ + --image=quay.io/smallestinc/lightning-asr:latest \ + --namespace=smallest \ + -- sh -c "wget -O /app/models/model.bin $MODEL_URL && exit 0" +``` + +### Scheduled Pre-warming + +Use CronJob for regular pre-warming: + +```yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: model-preload + namespace: smallest +spec: + schedule: "0 8 * * *" + jobTemplate: + spec: + template: + spec: + containers: + - name: preload + image: quay.io/smallestinc/lightning-asr:latest + command: + - sh + - -c + - wget -O /app/models/model.bin $MODEL_URL || true + env: + - name: MODEL_URL + value: "https://example.com/model.bin" + volumeMounts: + - name: models + mountPath: /app/models + volumes: + - name: models + persistentVolumeClaim: + claimName: models-aws-efs-pvc + restartPolicy: OnFailure +``` + +## Model Integrity + +### Checksum Validation + +Verify model integrity after download: + +```yaml +lightningAsr: + env: + - name: MODEL_CHECKSUM + value: "sha256:abc123..." + - name: MODEL_VALIDATE + value: "true" +``` + +### Automatic Retry + +Retry failed downloads: + +```yaml +lightningAsr: + env: + - name: MODEL_DOWNLOAD_RETRIES + value: "3" + - name: MODEL_DOWNLOAD_TIMEOUT + value: "3600" +``` + +## Performance Comparison + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
StrategyFirst StartSubsequent StartsScaling SpeedCost
EFS Shared5-10 min30-60 secFastMedium
Baked Image3-5 min3-5 minMediumLow
EmptyDir5-10 min5-10 minSlowLow
S3 Init2-5 min2-5 minMediumLow
+ +## Best Practices + + + + Always use shared storage (EFS) for production deployments with autoscaling. + + The cost savings from reduced download time and faster scaling far outweigh EFS costs. + + + + Watch logs during first deployment: + + ```bash + kubectl logs -f -l app=lightning-asr -n smallest + ``` + + Look for download progress indicators. + + + + Ensure sufficient storage for models: + + ```yaml + models: + volumes: + aws: + efs: + enabled: true + + lightningAsr: + resources: + ephemeral-storage: "50Gi" + ``` + + + + Test new models in separate deployment before updating production: + + ```bash + helm install test smallest-self-host/smallest-self-host \ + --set models.asrModelUrl="new-model-url" \ + --namespace smallest-test + ``` + + + +## Troubleshooting + +### Model Download Stalled + +Check pod logs: + +```bash +kubectl logs -l app=lightning-asr -n smallest --tail=100 +``` + +Check network connectivity: + +```bash +kubectl exec -it -n smallest -- wget --spider $MODEL_URL +``` + +### Insufficient Storage + +Check available space: + +```bash +kubectl exec -it -n smallest -- df -h +``` + +Increase PVC size: + +```yaml +models: + volumes: + aws: + efs: + enabled: true + +lightningAsr: + persistence: + size: 200Gi +``` + +### Model Corruption + +Delete cached model and restart: + +```bash +kubectl exec -it -n smallest -- rm -rf /app/models/* +kubectl delete pod -n smallest +``` + +## What's Next? + + + + Set up EFS for shared model storage + + + + Configure Redis data persistence + + + + Enable autoscaling with fast pod startup + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/redis-persistence.mdx b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/redis-persistence.mdx new file mode 100644 index 0000000..5b5f0f7 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/kubernetes/storage/redis-persistence.mdx @@ -0,0 +1,589 @@ +--- +title: Redis Persistence +description: Configure Redis data persistence and high availability +--- + +## Overview + +Redis provides caching and state management for Smallest Self-Host. This guide covers configuring Redis persistence, high availability, and performance optimization. + +## Redis Deployment Options + +### Option 1: Embedded Redis (Default) + +Smallest Self-Host includes Redis as a subchart. + +**Advantages**: +- Simple setup +- Automatic configuration +- Included in Helm chart + +**Disadvantages**: +- Single point of failure +- No data persistence by default +- Limited to cluster resources + +**Configuration**: + +```yaml values.yaml +redis: + enabled: true + auth: + enabled: true + password: "your-secure-password" + master: + persistence: + enabled: false + replica: + replicaCount: 1 + persistence: + enabled: false +``` + +### Option 2: External Redis + +Use Amazon ElastiCache or self-managed Redis. + +**Advantages**: +- Managed service (ElastiCache) +- High availability +- Better performance +- Independent scaling + +**Disadvantages**: +- Additional cost +- More complex setup + +**Configuration**: + +```yaml values.yaml +redis: + enabled: false + externalHost: "my-redis.abc123.0001.use1.cache.amazonaws.com" + port: 6379 + ssl: false + auth: + enabled: true + password: "redis-password" +``` + +## Enable Redis Persistence + +### With Embedded Redis + +Enable AOF (Append-Only File) persistence: + +```yaml values.yaml +redis: + enabled: true + auth: + enabled: true + password: "your-secure-password" + master: + persistence: + enabled: true + storageClass: "gp3" + size: 8Gi + accessModes: + - ReadWriteOnce + replica: + replicaCount: 2 + persistence: + enabled: true + storageClass: "gp3" + size: 8Gi +``` + +This creates: +- 1 master pod with persistent volume +- 2 replica pods with persistent volumes +- Automatic failover + +### Verify Persistence + +Check PVCs created: + +```bash +kubectl get pvc -n smallest | grep redis +``` + +Expected output: +``` +redis-data-smallest-redis-master-0 Bound 8Gi +redis-data-smallest-redis-replicas-0 Bound 8Gi +redis-data-smallest-redis-replicas-1 Bound 8Gi +``` + +## High Availability + +### Sentinel Mode + +Redis Sentinel provides automatic failover: + +```yaml values.yaml +redis: + enabled: true + sentinel: + enabled: true + quorum: 2 + master: + persistence: + enabled: true + size: 8Gi + replica: + replicaCount: 2 + persistence: + enabled: true + size: 8Gi +``` + +### Cluster Mode + +For very high throughput: + +```yaml values.yaml +redis: + enabled: true + architecture: replication + master: + count: 3 + replica: + replicaCount: 2 +``` + +## AWS ElastiCache Integration + +### Create ElastiCache Cluster + +Using AWS Console: + + + + AWS Console → ElastiCache → Redis → Create + + + + - **Cluster mode**: Disabled (for simplicity) + - **Name**: smallest-redis + - **Engine version**: 7.0+ + - **Node type**: cache.r6g.large (or larger) + + + + Select subnet group in same VPC as EKS cluster + + + + - **Security group**: Allow port 6379 from EKS cluster + - **Encryption in transit**: Enabled + - **Encryption at rest**: Enabled + + + + - **Automatic backups**: Enabled + - **Retention**: 7 days + + + + Review and create (takes 10-15 minutes) + + Note the **Primary endpoint** + + + +### Configure Helm Chart + +```yaml values.yaml +redis: + enabled: false + externalHost: "smallest-redis.abc123.0001.use1.cache.amazonaws.com" + port: 6379 + ssl: true + auth: + enabled: false + +lightningAsr: + env: + - name: REDIS_URL + value: "rediss://smallest-redis.abc123.0001.use1.cache.amazonaws.com:6379" + - name: REDIS_TLS + value: "true" +``` + +## Performance Tuning + +### Memory Configuration + +Set memory limits for embedded Redis: + +```yaml values.yaml +redis: + master: + resources: + limits: + memory: 2Gi + requests: + memory: 1Gi + replica: + resources: + limits: + memory: 2Gi + requests: + memory: 1Gi +``` + +### Eviction Policy + +Configure memory eviction: + +```yaml values.yaml +redis: + master: + configuration: | + maxmemory-policy allkeys-lru + maxmemory 1gb +``` + +### Disable Persistence for Performance + +For non-critical data (faster performance): + +```yaml values.yaml +redis: + master: + configuration: | + save "" + appendonly no + persistence: + enabled: false +``` + + +Without persistence, all data is lost if Redis restarts. Only use for truly ephemeral data. + + +## Monitoring Redis + +### Check Redis Status + +```bash +kubectl get pods -l app.kubernetes.io/name=redis -n smallest +``` + +### Connect to Redis CLI + +```bash +kubectl exec -it -n smallest -- redis-cli +``` + +Inside redis-cli: + +```redis +AUTH your-password +INFO +DBSIZE +KEYS * +``` + +### Monitor Memory Usage + +```bash +kubectl exec -it -n smallest -- redis-cli INFO memory +``` + +### Monitor Performance + +```bash +kubectl exec -it -n smallest -- redis-cli INFO stats +``` + +## Backup and Recovery + +### Manual Backup + +Create snapshot: + +```bash +kubectl exec -it -n smallest -- redis-cli BGSAVE +``` + +Copy RDB file: + +```bash +kubectl cp :/data/dump.rdb ./redis-backup.rdb -n smallest +``` + +### Scheduled Backups + +Create CronJob for automatic backups: + +```yaml redis-backup-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: redis-backup + namespace: smallest +spec: + schedule: "0 2 * * *" + jobTemplate: + spec: + template: + spec: + containers: + - name: backup + image: redis:7-alpine + command: + - sh + - -c + - | + redis-cli -h smallest-redis-master BGSAVE + sleep 60 + kubectl cp smallest-redis-master-0:/data/dump.rdb /backup/redis-$(date +%Y%m%d).rdb + volumeMounts: + - name: backup + mountPath: /backup + volumes: + - name: backup + persistentVolumeClaim: + claimName: redis-backup-pvc + restartPolicy: OnFailure +``` + +### Restore from Backup + +```bash +kubectl cp ./redis-backup.rdb :/data/dump.rdb -n smallest + +kubectl exec -it -n smallest -- redis-cli SHUTDOWN NOSAVE + +kubectl delete pod -n smallest +``` + +Pod will restart and load from backup. + +## Security + +### Enable Authentication + +Always use password authentication: + +```yaml values.yaml +redis: + auth: + enabled: true + password: "strong-random-password" +``` + +Or use existing secret: + +```yaml values.yaml +redis: + auth: + enabled: true + existingSecret: "redis-secret" + existingSecretPasswordKey: "redis-password" +``` + +### Enable TLS + +For embedded Redis: + +```yaml values.yaml +redis: + tls: + enabled: true + authClients: true + certFilename: "tls.crt" + certKeyFilename: "tls.key" + certCAFilename: "ca.crt" +``` + +### Network Policies + +Restrict access to Redis: + +```yaml redis-network-policy.yaml +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: redis-policy + namespace: smallest +spec: + podSelector: + matchLabels: + app.kubernetes.io/name: redis + policyTypes: + - Ingress + ingress: + - from: + - podSelector: + matchLabels: + app: lightning-asr + - podSelector: + matchLabels: + app: api-server + ports: + - protocol: TCP + port: 6379 +``` + +## Scaling Redis + +### Vertical Scaling + +Increase resources: + +```yaml values.yaml +redis: + master: + resources: + limits: + memory: 4Gi + cpu: 2 +``` + +Restart pods: + +```bash +kubectl rollout restart statefulset smallest-redis-master -n smallest +``` + +### Horizontal Scaling + +Add more replicas: + +```yaml values.yaml +redis: + replica: + replicaCount: 3 +``` + +## Troubleshooting + +### Connection Refused + +Check Redis pod is running: + +```bash +kubectl get pods -l app.kubernetes.io/name=redis -n smallest +kubectl logs -l app.kubernetes.io/name=redis -n smallest +``` + +Test connection: + +```bash +kubectl run redis-test --rm -it --restart=Never \ + --image=redis:7-alpine \ + --command -- redis-cli -h smallest-redis-master -a your-password ping +``` + +### Out of Memory + +Check memory usage: + +```bash +kubectl exec -it -n smallest -- redis-cli INFO memory +``` + +Increase memory limit or enable eviction: + +```yaml +redis: + master: + resources: + limits: + memory: 4Gi + configuration: | + maxmemory-policy allkeys-lru +``` + +### Slow Performance + +Check latency: + +```bash +kubectl exec -it -n smallest -- redis-cli --latency +``` + +Check slow queries: + +```bash +kubectl exec -it -n smallest -- redis-cli SLOWLOG GET 10 +``` + +### Data Loss + +Check if persistence is enabled: + +```bash +kubectl exec -it -n smallest -- redis-cli CONFIG GET save +kubectl exec -it -n smallest -- redis-cli CONFIG GET appendonly +``` + +## Best Practices + + + + Enable password authentication even for internal Redis: + + ```yaml + redis: + auth: + enabled: true + password: "strong-password" + ``` + + + + Use AOF for maximum durability: + + ```yaml + redis: + master: + persistence: + enabled: true + configuration: | + appendonly yes + appendfsync everysec + ``` + + + + At least 2 replicas for high availability: + + ```yaml + redis: + replica: + replicaCount: 2 + ``` + + + + Use Redis exporter for Prometheus: + + ```bash + helm install redis-exporter prometheus-community/prometheus-redis-exporter \ + --set redisAddress=redis://smallest-redis-master:6379 + ``` + + + + Schedule automatic backups: + + - ElastiCache: Enable automatic backups + - Self-managed: Use CronJob for BGSAVE + + + +## What's Next? + + + + Configure autoscaling for Lightning ASR + + + + Set up Prometheus metrics collection + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/common-issues.mdx b/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/common-issues.mdx new file mode 100644 index 0000000..66afc48 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/common-issues.mdx @@ -0,0 +1,469 @@ +--- +title: Common Issues +description: Quick solutions to frequently encountered problems +--- + +## Overview + +This guide provides quick solutions to the most common issues encountered with Smallest Self-Host across Docker and Kubernetes deployments. + +## Installation Issues + +### License Key Invalid + +**Symptoms**: +- `License validation failed` +- `Invalid license key` +- Services fail to start + +**Quick Fix**: + + + + Check for extra spaces, quotes, or line breaks + + ```bash + echo $LICENSE_KEY | wc -c + ``` + + Should be exact length without whitespace + + + + Contact license server directly: + + ```bash + curl -H "Authorization: Bearer ${LICENSE_KEY}" https://api.smallest.ai/validate + ``` + + + + If key appears correct: + + Email: **support@smallest.ai** + + Include: License key, error logs + + + +### Image Pull Failed + +**Symptoms**: +- `ImagePullBackOff` +- `unauthorized: authentication required` +- `manifest unknown` + +**Quick Fix**: + + + + ```bash + docker login quay.io + Username: your-username + Password: your-password + + docker pull quay.io/smallestinc/lightning-asr:latest + ``` + + + + Verify secret exists: + ```bash + kubectl get secrets -n smallest | grep registry + ``` + + Recreate if needed: + ```bash + kubectl create secret docker-registry registry-secret \ + --docker-server=quay.io \ + --docker-username=your-username \ + --docker-password=your-password \ + --docker-email=your-email \ + -n smallest + ``` + + + +### Model Download Failed + +**Symptoms**: +- Lightning ASR stuck at startup +- `Failed to download model` +- `Connection timeout` + +**Quick Fix**: + +1. **Verify URL**: + ```bash + curl -I $MODEL_URL + ``` + +2. **Check disk space**: + ```bash + df -h + ``` + Need at least 30 GB free + +3. **Test network**: + ```bash + wget --spider $MODEL_URL + ``` + +4. **Increase timeout** (Kubernetes): + ```yaml + lightningAsr: + env: + - name: DOWNLOAD_TIMEOUT + value: "3600" + ``` + +## Runtime Issues + +### High Latency + +**Symptoms**: +- Requests taking >1 second +- Slow transcription +- Timeouts + +**Quick Fix**: + + + + ```bash + nvidia-smi + kubectl exec -it -- nvidia-smi + ``` + + **If GPU util < 50%**: + - Model not loaded properly + - CPU bottleneck + - Check logs for errors + + **If GPU util > 90%**: + - Scale up replicas + - Add more GPU nodes + + + + ```bash + kubectl describe pod | grep -A5 "Limits" + ``` + + Increase if needed: + ```yaml + lightningAsr: + resources: + limits: + memory: 16Gi + cpu: 8 + ``` + + + + ```bash + sudo nvidia-smi -pm 1 + ``` + + Or in Kubernetes: + ```yaml + gpu-operator: + driver: + env: + - name: NVIDIA_DRIVER_CAPABILITIES + value: "compute,utility" + ``` + + + +### Out of Memory + +**Symptoms**: +- Pod killed (exit code 137) +- `OOMKilled` status +- Memory errors in logs + +**Quick Fix**: + +1. **Increase memory limit**: + ```yaml + lightningAsr: + resources: + limits: + memory: 20Gi + requests: + memory: 16Gi + ``` + +2. **Check memory leaks**: + ```bash + kubectl top pod + ``` + +3. **Restart pod**: + ```bash + kubectl delete pod + ``` + +### Connection Refused + +**Symptoms**: +- Cannot connect to API +- `Connection refused` +- Service unavailable + +**Quick Fix**: + + + + ```bash + kubectl get pods -n smallest + docker compose ps + ``` + + All should be `Running` or `Up` + + + + ```bash + kubectl get endpoints -n smallest + curl http://localhost:7100/health + ``` + + + + ```bash + sudo iptables -L + netstat -tuln | grep 7100 + ``` + + + +## Performance Issues + +### Slow Autoscaling + +**Symptoms**: +- HPA not scaling fast enough +- Pods stuck in Pending +- Cluster Autoscaler delayed + +**Quick Fix**: + +1. **Reduce HPA stabilization**: + ```yaml + scaling: + auto: + lightningAsr: + hpa: + scaleUpStabilizationWindowSeconds: 0 + ``` + +2. **Check metrics available**: + ```bash + kubectl get hpa + kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" + ``` + +3. **Verify node capacity**: + ```bash + kubectl describe nodes | grep -A5 "Allocated resources" + ``` + +### Request Queue Building Up + +**Symptoms**: +- Increasing active requests +- Users experiencing delays +- HPA shows high metrics + +**Quick Fix**: + +1. **Manual scale up**: + ```bash + kubectl scale deployment lightning-asr --replicas=10 + ``` + +2. **Check autoscaling limits**: + ```yaml + scaling: + auto: + lightningAsr: + hpa: + maxReplicas: 20 + ``` + +3. **Add cluster capacity**: + ```bash + eksctl scale nodegroup --cluster=smallest-cluster --name=gpu-nodes --nodes=5 + ``` + +## Data Issues + +### Transcription Quality Poor + +**Symptoms**: +- Low confidence scores +- Incorrect transcriptions +- Missing words + +**Quick Fix**: + +1. **Check audio quality**: + - Sample rate: 16 kHz minimum (44.1 kHz recommended) + - Format: WAV or FLAC preferred + - Channels: Mono for best results + +2. **Enable punctuation**: + ```json + { + "url": "...", + "punctuate": true, + "language": "en" + } + ``` + +3. **Verify correct language**: + ```json + { + "url": "...", + "language": "es" + } + ``` + +### Missing Timestamps + +**Symptoms**: +- No word-level timing data +- Unable to sync with video + +**Quick Fix**: + +Enable timestamps in request: + +```json +{ + "url": "...", + "timestamps": true +} +``` + +Response will include: +```json +{ + "words": [ + {"word": "Hello", "start": 0.0, "end": 0.5} + ] +} +``` + +## Network Issues + +### Cannot Reach License Server + +**Symptoms**: +- `Grace period activated` +- `Connection to license server failed` +- Services still working but warnings + +**Quick Fix**: + +1. **Test connectivity**: + ```bash + curl -v https://api.smallest.ai + ``` + +2. **Check firewall rules**: + - Allow outbound HTTPS (port 443) + - Whitelist `api.smallest.ai` + +3. **Review network policies** (Kubernetes): + ```bash + kubectl get networkpolicy -n smallest + ``` + +4. **Monitor grace period**: + ```bash + kubectl logs -l app=license-proxy | grep -i "grace" + ``` + +### Slow Downloads + +**Symptoms**: +- Model download taking >30 minutes +- Audio file upload slow + +**Quick Fix**: + +1. **Use faster network**: + - AWS S3 in same region + - CloudFront CDN + +2. **Enable parallel downloads**: + ```yaml + lightningAsr: + env: + - name: DOWNLOAD_WORKERS + value: "4" + ``` + +3. **Use shared storage** (Kubernetes): + ```yaml + models: + volumes: + aws: + efs: + enabled: true + ``` + +## Quick Diagnostics + +### One-Command Health Check + +```bash +curl http://localhost:7100/health && \ + kubectl get pods -n smallest && \ + kubectl top nodes && \ + kubectl top pods -n smallest +``` + +### Collect All Logs + +```bash +kubectl logs -l app=lightning-asr -n smallest --tail=100 > asr-logs.txt +kubectl logs -l app=api-server -n smallest --tail=100 > api-logs.txt +kubectl logs -l app=license-proxy -n smallest --tail=100 > license-logs.txt +``` + +### Test Transcription + +```bash +curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"url": "https://www2.cs.uic.edu/~i101/SoundFiles/StarWars60.wav"}' +``` + +## Getting Help + +If issues persist: + + + + Advanced debugging techniques + + + + Interpret logs and error messages + + + + Email: **support@smallest.ai** + + Include: + - Error logs + - System information + - Steps to reproduce + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/debugging-guide.mdx b/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/debugging-guide.mdx new file mode 100644 index 0000000..e44b15c --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/debugging-guide.mdx @@ -0,0 +1,538 @@ +--- +title: Debugging Guide +description: Advanced debugging techniques for Smallest Self-Host +--- + +## Overview + +This guide covers advanced debugging techniques for troubleshooting complex issues with Smallest Self-Host. + +## Debugging Tools + +### Docker Debugging + +#### Enter Running Container + +```bash +docker exec -it /bin/bash +``` + +Inside the container: +```bash +ls -la +ps aux +df -h +nvidia-smi +env +``` + +#### Debug Failed Container + +View logs of crashed container: + +```bash +docker logs +docker logs --tail=100 --follow +``` + +Inspect container configuration: + +```bash +docker inspect +``` + +#### Network Debugging + +Check container networking: + +```bash +docker network ls +docker network inspect +docker exec ping license-proxy +docker exec curl http://license-proxy:3369/health +``` + +### Kubernetes Debugging + +#### Debug Pod + +Interactive debug container: + +```bash +kubectl debug -it --image=ubuntu --target= +``` + +Copy debug tools into pod: + +```bash +kubectl cp ./debug-script.sh :/tmp/debug.sh +kubectl exec -it -- bash /tmp/debug.sh +``` + +#### Ephemeral Debug Container + +Add temporary container to running pod: + +```bash +kubectl debug -it --image=nicolaka/netshoot --target=lightning-asr +``` + +Inside debug container: +```bash +nslookup license-proxy +curl http://api-server:7100/health +tcpdump -i eth0 +``` + +#### Get Previous Logs + +If pod crashed and restarted: + +```bash +kubectl logs --previous +kubectl logs -c --previous +``` + +## Network Debugging + +### Test Service Connectivity + +From inside cluster: + +```bash +kubectl run netdebug --rm -it --restart=Never \ + --image=nicolaka/netshoot \ + --namespace=smallest \ + -- bash +``` + +Inside debug pod: +```bash +nslookup api-server +nslookup license-proxy +nslookup lightning-asr + +curl http://api-server:7100/health +curl http://license-proxy:3369/health + +traceroute api-server +ping -c 3 lightning-asr +``` + +### DNS Resolution + +Check DNS is working: + +```bash +kubectl run dnstest --rm -it --restart=Never \ + --image=busybox \ + -- nslookup kubernetes.default +``` + +Check CoreDNS logs: + +```bash +kubectl logs -n kube-system -l k8s-app=kube-dns +``` + +### Network Policies + +List network policies: + +```bash +kubectl get networkpolicy -n smallest +kubectl describe networkpolicy -n smallest +``` + +Temporarily disable for testing: + +```bash +kubectl delete networkpolicy -n smallest +``` + + +Remember to recreate network policies after testing! + + +## Performance Debugging + +### Resource Usage + +Check pod resource consumption: + +```bash +kubectl top pods -n smallest +kubectl top pods -n smallest --sort-by=memory +kubectl top pods -n smallest --sort-by=cpu +``` + +Check node resource usage: + +```bash +kubectl top nodes +kubectl describe node | grep -A 10 "Allocated resources" +``` + +### GPU Debugging + +Check GPU availability in pod: + +```bash +kubectl exec -it -- nvidia-smi + +kubectl exec -it -- nvidia-smi dmon +``` + +Watch GPU utilization: + +```bash +kubectl exec -it -- watch -n 1 nvidia-smi +``` + +Check GPU events: + +```bash +kubectl exec -it -- nvidia-smi -q -d MEMORY,UTILIZATION,POWER,CLOCK,PERFORMANCE +``` + +### Application Profiling + +Profile Lightning ASR: + +```bash +kubectl exec -it -- sh -c 'apt-get update && apt-get install -y python3-pip && pip3 install py-spy' + +kubectl exec -it -- py-spy top --pid 1 +``` + +Memory profiling: + +```bash +kubectl exec -it -- sh -c 'cat /proc/1/status | grep -i mem' +``` + +## Log Analysis + +### Structured Log Parsing + +Extract errors from logs: + +```bash +kubectl logs | grep -i "error\|exception\|failed" +``` + +Count errors: + +```bash +kubectl logs | grep -i "error" | wc -l +``` + +Show errors with context: + +```bash +kubectl logs | grep -B 5 -A 5 "error" +``` + +### Log Aggregation + +Combine logs from all replicas: + +```bash +kubectl logs -l app=lightning-asr -n smallest --tail=100 --all-containers=true +``` + +Follow logs from multiple pods: + +```bash +kubectl logs -l app=lightning-asr -f --max-log-requests=10 +``` + +### Parse JSON Logs + +Using `jq`: + +```bash +kubectl logs | jq 'select(.level=="error")' +kubectl logs | jq 'select(.duration > 1000)' +kubectl logs | jq '.message' -r +``` + +## Database Debugging + +### Redis Debugging + +Connect to Redis: + +```bash +kubectl exec -it -- redis-cli +``` + +Inside Redis CLI: +```redis +AUTH your-password +INFO +DBSIZE +KEYS * +GET some_key +MONITOR +``` + +Check Redis memory: + +```redis +INFO memory +``` + +Check slow queries: + +```redis +SLOWLOG GET 10 +``` + +## API Debugging + +### Test API Endpoints + +Health check: + +```bash +kubectl port-forward svc/api-server 7100:7100 +curl http://localhost:7100/health +``` + +Test transcription: + +```bash +curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "Content-Type: application/json" \ + -d '{"url": "https://www2.cs.uic.edu/~i101/SoundFiles/StarWars60.wav"}' \ + -v +``` + +### Request Tracing + +Add request ID tracking: + +```bash +curl -X POST http://localhost:7100/v1/listen \ + -H "Authorization: Token ${LICENSE_KEY}" \ + -H "X-Request-ID: debug-123" \ + -H "Content-Type: application/json" \ + -d '{"url": "..."}' \ + -v +``` + +Grep logs for request: + +```bash +kubectl logs -l app=api-server | grep "debug-123" +kubectl logs -l app=lightning-asr | grep "debug-123" +``` + +### Packet Capture + +Capture network traffic: + +```bash +kubectl exec -it -- apt-get update && apt-get install -y tcpdump + +kubectl exec -it -- tcpdump -i any -w /tmp/capture.pcap port 7100 + +kubectl cp :/tmp/capture.pcap ./capture.pcap +``` + +Analyze with Wireshark or: + +```bash +tcpdump -r capture.pcap -A +``` + +## Event Debugging + +### Watch Events + +Real-time events: + +```bash +kubectl get events -n smallest --watch +``` + +Filter by type: + +```bash +kubectl get events -n smallest --field-selector type=Warning +``` + +Sort by timestamp: + +```bash +kubectl get events -n smallest --sort-by='.lastTimestamp' +``` + +### Event Analysis + +Count events by reason: + +```bash +kubectl get events -n smallest -o json | jq '.items | group_by(.reason) | map({reason: .[0].reason, count: length})' +``` + +## Metrics Debugging + +### Check Prometheus Metrics + +Port forward Prometheus: + +```bash +kubectl port-forward -n default svc/smallest-prometheus-stack-prometheus 9090:9090 +``` + +Query metrics: + +Open http://localhost:9090 and run: + +```promql +asr_active_requests +rate(asr_total_requests[5m]) +asr_gpu_utilization +``` + +### Check Custom Metrics + +Verify metrics available to HPA: + +```bash +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . +``` + +Query specific metric: + +```bash +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/smallest/pods/*/asr_active_requests" | jq . +``` + +## Debugging Checklists + +### Startup Issues Checklist + + + + ```bash + kubectl describe pod | grep -A 10 "Events" + ``` + + + + ```bash + kubectl get secrets -n smallest + kubectl describe secret + ``` + + + + ```bash + kubectl describe node | grep "Allocated resources" -A 10 + ``` + + + + ```bash + kubectl logs --all-containers=true + ``` + + + +### Performance Issues Checklist + + + + ```bash + kubectl top pods -n smallest + kubectl top nodes + ``` + + + + ```bash + kubectl exec -- nvidia-smi + ``` + + + + ```bash + kubectl get hpa + kubectl describe hpa lightning-asr + ``` + + + + ```bash + kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" + ``` + + + +## Advanced Techniques + +### Enable Debug Logging + +Increase log verbosity: + +```yaml +lightningAsr: + env: + - name: LOG_LEVEL + value: "DEBUG" +``` + +### Simulate Failures + +Test error handling: + +```bash +kubectl delete pod +kubectl drain --ignore-daemonsets +``` + +### Load Testing + +Generate load: + +```bash +kubectl run load-test --rm -it --image=williamyeh/hey \ + -- -z 5m -c 50 http://api-server:7100/health +``` + +### Chaos Engineering + +Test resilience (requires Chaos Mesh): + +```yaml +apiVersion: chaos-mesh.org/v1alpha1 +kind: PodChaos +metadata: + name: pod-failure +spec: + action: pod-failure + mode: one + selector: + namespaces: + - smallest + labelSelectors: + app: lightning-asr + duration: "30s" +``` + +## What's Next? + + + + Learn to interpret logs and errors + + + + Quick fixes for frequent problems + + + diff --git a/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/logs-analysis.mdx b/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/logs-analysis.mdx new file mode 100644 index 0000000..06c7d9e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/on-prem/troubleshooting/logs-analysis.mdx @@ -0,0 +1,556 @@ +--- +title: Logs Analysis +description: Interpret logs and error messages from Smallest Self-Host +--- + +## Overview + +Understanding log messages is crucial for diagnosing issues. This guide helps you interpret logs from each component and identify common error patterns. + +## Log Levels + +All components use standard log levels: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
LevelDescriptionExample
DEBUGDetailed diagnostic infoVariable values, function calls
INFONormal operation eventsRequest received, model loaded
WARNINGPotential issuesSlow response, retry attempt
ERRORError that needs attentionFailed request, connection error
CRITICALSevere errorService crash, unrecoverable error
+ +## Lightning ASR Logs + +### Successful Startup + +```log +INFO: Starting Lightning ASR v1.0.0 +INFO: GPU detected: NVIDIA A10 (24GB) +INFO: Downloading model from URL... +INFO: Model downloaded: 23.5GB +INFO: Loading model into GPU memory... +INFO: Model loaded successfully (5.2GB GPU memory) +INFO: Warmup inference completed in 3.2s +INFO: Server ready on port 2269 +``` + +### Request Processing + +```log +INFO: Request received: req_abc123 +DEBUG: Audio duration: 60.5s, sample_rate: 44100 +DEBUG: Preprocessing audio... +DEBUG: Running inference... +INFO: Transcription completed in 3.1s (RTF: 0.05x) +INFO: Confidence: 0.95 +``` + +### Common Errors + + + + ```log + ERROR: No CUDA-capable device detected + ERROR: nvidia-smi command not found + CRITICAL: Cannot initialize GPU, exiting + ``` + + **Cause**: GPU not available or drivers not installed + + **Solution**: + - Check `nvidia-smi` works + - Verify GPU device plugin (Kubernetes) + - Check NVIDIA Container Toolkit (Docker) + + + + ```log + ERROR: CUDA out of memory + ERROR: Tried to allocate 2.5GB but only 1.2GB available + WARNING: Reducing batch size + ``` + + **Cause**: Not enough GPU memory + + **Solution**: + - Reduce concurrent requests + - Use larger GPU (A10 vs T4) + - Scale horizontally (more pods) + + + + ```log + INFO: Downloading model from https://example.com/model.bin + WARNING: Download attempt 1 failed: Connection timeout + WARNING: Retrying download... + ERROR: Download failed after 3 attempts + ``` + + **Cause**: Network issues, invalid URL, disk full + + **Solution**: + - Verify MODEL_URL + - Check disk space: `df -h` + - Test URL: `curl -I $MODEL_URL` + - Use shared storage (EFS) + + + + ```log + ERROR: Failed to process audio: req_xyz789 + ERROR: Unsupported audio format: audio/webm + ERROR: Audio file corrupted or invalid + ``` + + **Cause**: Invalid audio file + + **Solution**: + - Verify audio format (WAV, MP3, FLAC supported) + - Check file is not corrupted + - Ensure proper sample rate (16kHz+) + + + +## API Server Logs + +### Successful Startup + +```log +INFO: Starting API Server v1.0.0 +INFO: Connecting to Lightning ASR at http://lightning-asr:2269 +INFO: Connected to Lightning ASR (2 replicas) +INFO: Connecting to License Proxy at http://license-proxy:3369 +INFO: License validated +INFO: API server listening on port 7100 +``` + +### Request Handling + +```log +INFO: POST /v1/listen from 10.0.1.5 +DEBUG: Request ID: req_abc123 +DEBUG: Audio URL: https://example.com/audio.wav +DEBUG: Routing to Lightning ASR pod: lightning-asr-0 +INFO: Response time: 3.2s +INFO: Status: 200 OK +``` + +### Common Errors + + + + ```log + WARNING: Invalid license key from 10.0.1.5 + WARNING: Missing Authorization header + ERROR: License validation failed: expired + ``` + + **Cause**: Invalid, missing, or expired license key + + **Solution**: + - Verify `Authorization: Token ` header + - Check license key is correct + - Renew expired license + + + + ```log + ERROR: No Lightning ASR workers available + WARNING: Request queued: req_abc123 + WARNING: Queue size: 15 + ``` + + **Cause**: All Lightning ASR pods busy or down + + **Solution**: + - Check Lightning ASR pods: `kubectl get pods` + - Scale up replicas + - Check HPA configuration + + + + ```log + ERROR: Request timeout after 300s + ERROR: Lightning ASR pod not responding: lightning-asr-0 + WARNING: Retrying with different pod + ``` + + **Cause**: Lightning ASR overloaded or crashed + + **Solution**: + - Check Lightning ASR logs + - Increase timeout + - Scale up pods + + + +## License Proxy Logs + +### Successful Validation + +```log +INFO: Starting License Proxy v1.0.0 +INFO: License key loaded +INFO: Connecting to api.smallest.ai +INFO: License validated successfully +INFO: License valid until: 2025-12-31T23:59:59Z +INFO: Grace period: 24 hours +INFO: Server listening on port 3369 +``` + +### Usage Reporting + +```log +DEBUG: Reporting usage batch: 150 requests +DEBUG: Total duration: 3600s +DEBUG: Features: [streaming, punctuation] +INFO: Usage reported successfully +``` + +### Common Errors + + + + ```log + ERROR: License validation failed: Invalid license key + ERROR: License server returned 401 Unauthorized + CRITICAL: Cannot start without valid license + ``` + + **Cause**: Invalid or expired license + + **Solution**: + - Verify LICENSE_KEY is correct + - Check license hasn't expired + - Contact support@smallest.ai + + + + ```log + WARNING: Connection to api.smallest.ai failed + WARNING: Connection timeout after 10s + INFO: Using cached validation + INFO: Grace period active (23h remaining) + ``` + + **Cause**: Network connectivity issue + + **Solution**: + - Test: `curl https://api.smallest.ai` + - Check firewall allows HTTPS + - Restore connectivity before grace period expires + + + + ```log + WARNING: Grace period expires in 1 hour + WARNING: Cannot connect to license server + ERROR: Grace period expired + CRITICAL: Service will stop accepting requests + ``` + + **Cause**: Extended network outage + + **Solution**: + - Restore network connectivity immediately + - Check firewall rules + - Contact support if persistent + + + +## Redis Logs + +### Normal Operation + +```log +Ready to accept connections +Client connected from 10.0.1.5:45678 +DB 0: 1523 keys (expires: 0) +``` + +### Common Errors + + + + ```log + WARNING: Memory usage: 95% + ERROR: OOM command not allowed when used memory > 'maxmemory' + ``` + + **Solution**: + - Increase memory limit + - Enable eviction policy + - Clear old keys + + + + ```log + ERROR: Failed writing the RDB file + ERROR: Disk is full + ``` + + **Solution**: + - Increase disk space + - Disable persistence if not needed + - Clean up old snapshots + + + +## Log Pattern Analysis + +### Error Rate Analysis + +Count errors in last 1000 lines: + +```bash +kubectl logs --tail=1000 | grep -c "ERROR" +``` + +Group errors by type: + +```bash +kubectl logs | grep "ERROR" | sort | uniq -c | sort -rn +``` + +### Performance Analysis + +Extract response times: + +```bash +kubectl logs | grep "Response time" | awk '{print $NF}' | sort -n +``` + +Calculate average: + +```bash +kubectl logs | grep "Response time" | awk '{sum+=$NF; count++} END {print sum/count}' +``` + +### Request Tracking + +Follow a specific request ID: + +```bash +kubectl logs | grep "req_abc123" +``` + +Across all pods: + +```bash +kubectl logs -l app=lightning-asr | grep "req_abc123" +``` + +## Log Aggregation + +### Using stern + +Install stern: + +```bash +brew install stern +``` + +Follow logs from all Lightning ASR pods: + +```bash +stern lightning-asr -n smallest +``` + +Filter by pattern: + +```bash +stern lightning-asr -n smallest --grep "ERROR" +``` + +### Using Loki (if installed) + +Query logs via LogQL: + +```logql +{app="lightning-asr"} |= "ERROR" +{app="api-server"} |= "req_abc123" +rate({app="lightning-asr"}[5m]) +``` + +## Structured Logging + +### Parse JSON Logs + +If logs are in JSON format: + +```bash +kubectl logs | jq 'select(.level=="ERROR")' +kubectl logs | jq 'select(.duration > 1000)' +kubectl logs | jq '.message' -r +``` + +### Filter by Field + +```bash +kubectl logs | jq 'select(.request_id=="req_abc123")' +kubectl logs | jq 'select(.component=="license_proxy")' +``` + +## Log Retention + +### Configure Log Rotation + +Docker: + +```yaml docker-compose.yml +services: + lightning-asr: + logging: + driver: "json-file" + options: + max-size: "10m" + max-file: "3" +``` + +Kubernetes: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: lightning-asr +spec: + containers: + - name: lightning-asr + imagePullPolicy: Always +``` + +Kubernetes automatically rotates logs via kubelet. + +### Export Logs + +Save logs for analysis: + +```bash +kubectl logs > logs.txt +kubectl logs --since=1h > logs-last-hour.txt +kubectl logs --since-time=2024-01-15T10:00:00Z > logs-since.txt +``` + +## Debugging Log Issues + +### No Logs Appearing + +Check pod is running: + +```bash +kubectl get pods -n smallest +kubectl describe pod +``` + +Check stdout/stderr: + +```bash +kubectl exec -it -- sh -c "ls -la /proc/1/fd/" +``` + +### Logs Truncated + +Increase log size limits: + +```yaml +apiVersion: v1 +kind: Pod +metadata: + annotations: + kubernetes.io/psp: privileged +spec: + containers: + - name: app + env: + - name: LOG_MAX_SIZE + value: "100M" +``` + +## Best Practices + + + + Prefer JSON format for easier parsing: + + ```json + { + "timestamp": "2024-01-15T10:30:00Z", + "level": "ERROR", + "message": "Request failed", + "request_id": "req_abc123", + "duration_ms": 3200 + } + ``` + + + + Always include relevant context in logs: + + - Request ID + - Component name + - Timestamp + - User/session info (if applicable) + + + + Use correct log levels: + + - DEBUG: Development only + - INFO: Normal operation + - WARNING: Potential issues + - ERROR: Actual problems + - CRITICAL: Service-breaking issues + + + + Use centralized logging: + + - ELK Stack (Elasticsearch, Logstash, Kibana) + - Loki + Grafana + - CloudWatch Logs (AWS) + - Cloud Logging (GCP) + + + +## What's Next? + + + + Quick solutions to frequent problems + + + + Advanced debugging techniques + + + diff --git a/fern/products/waves/pages/v4.0.0/product/projects.mdx b/fern/products/waves/pages/v4.0.0/product/projects.mdx new file mode 100644 index 0000000..752a469 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/product/projects.mdx @@ -0,0 +1,151 @@ +--- +title: 'Projects' +description: 'Create and manage your projects in Waves.' +icon: 'sheet-plastic' +--- + +Projects + +## Introduction + +Welcome to the official documentation for our text-to-speech (TTS) project. Our platform is a state-of-the-art audio synthesis tool designed to convert written text into high-quality, natural-sounding speech. It is particularly useful for content creators, authors, educators, and businesses looking to create voice-driven experiences efficiently. + +## Key Features + + + + Multiple Voices + Access a diverse selection of AI-generated voices tailored for different use cases. Choose from various genders, age groups, and accents to find the perfect match for your project. + +
    +
  • + Click on a voice avatar to preview it. +
  • +
  • + Click the + icon to add it to your project. +
  • +
+ +
+ + Drag-and-Drop Content Management + Easily organize your content with an intuitive block-based editing system. Simply click and drag to rearrange content blocks for a seamless editing experience. + + + Easily transform text into speech with flexible conversion options. Generate audio for the entire text or select specific blocks as needed. +
    +
  • + Click on the play button to preview the generated audio. +
  • +
  • + Click on the Generate Selected button to convert the selected text to speech. +
  • +
  • + Click on the Generate Till End button to convert the entire text to speech. +
  • +
+
+ + Organize your content into chapters for better management and navigation. + + + Easily integrate cloned voices into your projects. Simply add the cloned voice to your project and start using it in your content seamlessly. + + + Fine-tune your voice output with advanced settings. Use the gear icon to adjust speed, consistency, and enhancement options for a more customized experience. + + +{' '} + + +

+ Protect finalized content from unintended modifications by locking + blocks. Ensure important sections remain unchanged. +

+
+ + +

+ Easily download individual voice outputs with a single click. + Streamline your workflow with quick export options. +

+
+ +
+ +## Use Cases + +### Content Creation + +- Transform blog posts, articles, and scripts into engaging audio content. +- Enhance storytelling with dynamic voice narration. + +### Education and Accessibility + +- Convert textbooks and educational materials into audio formats. +- Improve accessibility for visually impaired users. + +### Business and Marketing + +- Create audio advertisements and voiceovers for promotional content. +- Generate automated voice responses for customer support systems. + +## Getting Started + +### Installation & Setup + +1. Register for an account and + log into the platform. +2. Create a new project or open an existing one. +3. Add or paste your text content to the project. +4. Select a voice, adjust settings, and generate speech. +5. Use the drag-and-drop editor to organize your content. +6. Export the final output in your preferred format. + +### Best Practices + +- Use chapters to organize your content. +- Lock finalized blocks to prevent accidental edits. +- Experiment with different voice settings for the best results. +- Use the preview to check the generated speech before exporting. +- Use the clone feature to create a new voice with your own style. +- Use the gear icon to adjust speed, consistency, and enhancement options for a more customized experience. + +### Get in Touch + + + + Drop a mail at support@smallest.ai to talk to sales if you are + looking for enterprise support. + + + +Join our community and stay connected with the latest developments: + +- **Support**: Reach out to our support team at [support@smallest.ai](mailto:support@smallest.ai) for any queries or assistance. +- **Community**: Join our [Discord server](https://discord.gg/5evETqguJs) to connect with other developers and get real-time support. +- **Blog**: Follow our [blog](https://smallest.ai/blog) for insights, tutorials, and updates. + +Thank you for choosing Waves. We look forward to helping you create amazing voice experiences! diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/evaluation-walkthrough.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/evaluation-walkthrough.mdx similarity index 97% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/evaluation-walkthrough.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/evaluation-walkthrough.mdx index b78d731..9c6242f 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/evaluation-walkthrough.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/evaluation-walkthrough.mdx @@ -1,5 +1,6 @@ --- title: 'Evaluation Walkthrough' +description: 'Step-by-step guide to evaluate Pulse STT accuracy and performance' --- Our evaluation guide outlines a repeatable process: choose representative audio, generate transcripts, compute WER/CER/latency, and document findings. Use the streamlined steps below (with ready-to-run snippets) to mirror that workflow. @@ -23,7 +24,7 @@ dataset = [ pip install smallestai jiwer whisper-normalizer pandas ``` -- `smallestai` → Lightning STT client +- `smallestai` → Pulse STT client - `jiwer` → WER/CER computation - `whisper-normalizer` → normalization that matches the official guidance diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/metrics-overview.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/metrics-overview.mdx similarity index 67% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/metrics-overview.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/metrics-overview.mdx index abe453c..7eb5247 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/metrics-overview.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/metrics-overview.mdx @@ -1,9 +1,9 @@ --- title: 'Metrics Overview' -description: 'Key Lightning STT metrics for quality and latency.' +description: 'Key Pulse STT metrics for quality and latency.' --- -Lightning STT evaluations revolve around four pillars: +Pulse STT evaluations revolve around four pillars: 1. **Accuracy** – how close transcripts are to the ground truth. 2. **Latency & throughput** – how quickly and efficiently results arrive. @@ -34,38 +34,38 @@ Lightning STT evaluations revolve around four pillars: ### Real-Time Factor (RTF) - `RTF = Processing Time / Audio Duration`. -- Values less than 1 indicate faster-than-real-time processing; Lightning STT typically runs near 0.4 RTF on clean inputs. +- Values less than 1 indicate faster-than-real-time processing; Pulse STT typically runs near 0.4 RTF on clean inputs. ## Enrichment quality - - - + + + - - - + + + - - - + + + - - - + + + - - - + + +
MetricWhat to watchWhy it mattersMetricWhat to watchWhy it matters
Diarization accuracy% of words with correct speaker_idCall-center QA, coaching, complianceDiarization accuracy% of words with correct speaker_idCall-center QA, coaching, compliance
Word timestamp driftGap between predicted and reference timestampsSubtitle alignment and editingWord timestamp driftGap between predicted and reference timestampsSubtitle alignment and editing
Sentence-level timestamps% of audio covered by utterances segmentsChaptering, meeting notesSentence-level timestamps% of audio covered by utterances segmentsChaptering, meeting notes
Emotion/age/gender precisionConfidence distributionRouting, analytics, compliance flagsEmotion/age/gender precisionConfidence distributionRouting, analytics, compliance flags
@@ -79,7 +79,7 @@ Lightning STT evaluations revolve around four pillars: ## Operational metrics - **Requests per second / concurrent sessions**: validate you stay within quota and plan scaling needs. -- **Cost per minute**: Lightning STT bills per second at $0.025/minute list price—include enrichment toggles when modeling cost. +- **Cost per minute**: Pulse STT bills per second at $0.025/minute list price—include enrichment toggles when modeling cost. - **Retry volume**: differentiate infrastructure retries (HTTP 5xx) from transcription failures to spot upstream vs downstream issues. ## Reporting checklist diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/performance.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/performance.mdx similarity index 65% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/performance.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/performance.mdx index 0a23429..26f442a 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/benchmarks/performance.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/benchmarks/performance.mdx @@ -1,14 +1,13 @@ --- title: 'Performance' +description: 'Latency, accuracy, and throughput benchmarks for Pulse STT' --- -This page provides performance benchmarks for Lightning STT, including latency, accuracy, and throughput metrics. - ## Latency Metrics ### Time-to-First-Transcript (TTFT) -Our Lightning model provides State of the art TTFT latency of ~**64ms**, which is one of the least in the world. +Our Pulse STT model provides State of the art TTFT latency of ~**64ms**, which is one of the least in the world. @@ -17,26 +16,26 @@ TTFT (Time to First Transcript) measures the latency between when a user stops s - - + + - - + + - - + + - - + + - - + +
ModelLatency (ms)ModelLatency (ms)
Smallest Lightning64Smallest Pulse STT64
Deepgram Nova 276Deepgram Nova 276
Deepgram Nova 371Deepgram Nova 371
Assembly AI Universal698Assembly AI Universal698
@@ -53,26 +52,26 @@ All models were evaluated on the FLEURS dataset, a standardised multilingual spe - - + + - - + + - - + + - - + + - - + +
LanguageWERLanguageWER
English5.1%English5.1%
Italian4.2%Italian4.2%
Spanish5.4%Spanish5.4%
Hindi11.4%Hindi11.4%
@@ -84,22 +83,22 @@ All models were evaluated on the FLEURS dataset, a standardised multilingual spe - - + + - - + + - - + + - - + +
Audio LengthHTTP POSTAudio LengthHTTP POST
Short (< 5s)50-100Short (< 5s)50-100
Medium (5-30s)20-50Medium (5-30s)20-50
Long (30s+)10-20Long (30s+)10-20
@@ -181,8 +180,8 @@ All models were evaluated on the FLEURS dataset, a standardised multilingual spe ## Next Steps -- [Metrics Overview](/v4.0.0/content/speech-to-text-new/benchmarks/metrics-overview). -- [Evaluation Walkthrough](/v4.0.0/content/speech-to-text-new/benchmarks/evaluation-walkthrough). -- [Best Practices](/v4.0.0/content/speech-to-text-new/pre-recorded/best-practices). +- [Metrics Overview](/waves/documentation/speech-to-text/benchmarks/metrics-overview). +- [Evaluation Walkthrough](/waves/documentation/speech-to-text/benchmarks/evaluation-walkthrough). +- [Best Practices](/waves/documentation/speech-to-text/pre-recorded/best-practices). diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/age-and-gender-detection.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/age-and-gender-detection.mdx similarity index 82% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/age-and-gender-detection.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/age-and-gender-detection.mdx index 74873b7..b26d8b4 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/age-and-gender-detection.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/age-and-gender-detection.mdx @@ -7,13 +7,13 @@ description: 'Predict demographic attributes alongside every transcription' ## Enabling age & gender detection -Append `age_detection=true` and/or `gender_detection=true` to your Lightning STT query string. You can toggle them independently as well. +Append `age_detection=true` and/or `gender_detection=true` to your Pulse STT query string. You can toggle them independently as well. ### Sample request ```bash curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&age_detection=true&gender_detection=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&age_detection=true&gender_detection=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/audio.wav" diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/diarization.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/diarization.mdx similarity index 70% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/diarization.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/diarization.mdx index 68e360f..b2a30f5 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/diarization.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/diarization.mdx @@ -9,11 +9,11 @@ description: 'Label each word and utterance with turn-by-turn speaker IDs' ### Pre-Recorded API -Pass `diarize=true` when calling the Lightning STT POST endpoint. The parameter can be combined with other enrichment options (timestamps, emotions, etc.) without changing your audio payload. +Pass `diarize=true` when calling the Pulse STT POST endpoint. The parameter can be combined with other enrichment options (timestamps, emotions, etc.) without changing your audio payload. ```bash curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&diarize=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&diarize=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/audio.wav" @@ -21,10 +21,10 @@ curl --request POST \ ### Real-Time WebSocket API -Add `diarize=true` to your WebSocket connection query parameters when connecting to the Lightning STT WebSocket API. +Add `diarize=true` to your WebSocket connection query parameters when connecting to the Pulse STT WebSocket API. ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -49,7 +49,7 @@ When enabled, every entry in `words` includes a `speaker` field (integer ID: `0` ```bash curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&diarize=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&diarize=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/two-speaker.wav" @@ -140,24 +140,24 @@ curl --request POST \ - - - - + + + + - - - - + + + + - - - - + + + +
FieldTypeWhen IncludedDescriptionFieldTypeWhen IncludedDescription
`speaker`integer (realtime) / string (pre-recorded)`diarize=true`Speaker label. Real-time API uses integer IDs (0, 1, ...), pre-recorded API uses string labels (speaker_0, speaker_1, ...)`speaker`integer (realtime) / string (pre-recorded)`diarize=true`Speaker label. Real-time API uses integer IDs (0, 1, ...), pre-recorded API uses string labels (speaker_0, speaker_1, ...)
`speaker_confidence`number`diarize=true` (realtime only)Confidence score for the speaker assignment (0.0 to 1.0)`speaker_confidence`number`diarize=true` (realtime only)Confidence score for the speaker assignment (0.0 to 1.0)
diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/emotion-detection.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/emotion-detection.mdx similarity index 76% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/emotion-detection.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/emotion-detection.mdx index da3f981..a6ebe0c 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/emotion-detection.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/emotion-detection.mdx @@ -1,19 +1,19 @@ --- title: 'Emotion detection' -description: 'Capture per-emotion confidence scores from Lightning STT responses' +description: 'Capture per-emotion confidence scores from Pulse STT responses' --- Pre-Recorded ## Enabling emotion detection -Include `emotion_detection=true` in your Lightning STT query parameters. +Include `emotion_detection=true` in your Pulse STT query parameters. ### Sample request ```bash curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&emotion_detection=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&emotion_detection=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/audio.wav" diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/full-transcript.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/full-transcript.mdx similarity index 63% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/full-transcript.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/full-transcript.mdx index bef1b91..9378a0c 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/full-transcript.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/full-transcript.mdx @@ -9,12 +9,12 @@ The Full Transcript feature provides a cumulative transcript that accumulates al ## Enabling Full Transcript -Add `full_transcript=true` to your WebSocket connection query parameters when connecting to the Lightning STT WebSocket API. The default is `false`. +Add `full_transcript=true` to your WebSocket connection query parameters when connecting to the Pulse STT WebSocket API. The default is `false`. ### Real-Time WebSocket API ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -50,30 +50,30 @@ The `full_transcript` field contains the complete transcription text accumulated - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + +
FieldTypeWhen IncludedDescriptionFieldTypeWhen IncludedDescription
`full_transcript`string`full_transcript=true` AND `is_final=true`Complete transcription text accumulated from the start of the session`full_transcript`string`full_transcript=true` AND `is_final=true`Complete transcription text accumulated from the start of the session
`transcript`stringAlwaysPartial or complete transcription text for the current segment`transcript`stringAlwaysPartial or complete transcription text for the current segment
`is_final`booleanAlwaysIndicates if this is the final transcription for the current segment`is_final`booleanAlwaysIndicates if this is the final transcription for the current segment
diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/language-detection.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/language-detection.mdx similarity index 91% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/language-detection.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/language-detection.mdx index c686381..b247bd3 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/language-detection.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/language-detection.mdx @@ -1,6 +1,6 @@ --- title: 'Language detection' -description: 'Automatically detect and transcribe 30+ languages with Lightning STT' +description: 'Automatically detect and transcribe 30+ languages with Pulse STT' --- Pre-Recorded Real-Time @@ -17,7 +17,7 @@ View the full list of [supported languages](../overview#supported-languages). ```bash curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=multi&word_timestamps=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=multi&word_timestamps=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/audio.wav" @@ -26,7 +26,7 @@ curl --request POST \ ### Real-Time WebSocket API ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "multi"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/numeric-formatting.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/numeric-formatting.mdx similarity index 96% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/numeric-formatting.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/numeric-formatting.mdx index b8c8811..0f49145 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/numeric-formatting.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/numeric-formatting.mdx @@ -18,7 +18,7 @@ Add a `numerals` parameter in the query string set to `true`, `false`, or `auto` ### Real-Time WebSocket API ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/redaction.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/redaction.mdx similarity index 75% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/redaction.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/redaction.mdx index a21d60d..edfb213 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/redaction.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/redaction.mdx @@ -5,7 +5,7 @@ description: 'Automatically redact sensitive information from transcriptions' Real-Time -Redaction allows you to identify and mask sensitive information from transcriptions to protect privacy and comply with data protection regulations. The Lightning STT API supports two types of redaction: PII (Personally Identifiable Information) and PCI (Payment Card Information). +Redaction allows you to identify and mask sensitive information from transcriptions to protect privacy and comply with data protection regulations. The Pulse STT API supports two types of redaction: PII (Personally Identifiable Information) and PCI (Payment Card Information). ## Enabling Redaction @@ -14,7 +14,7 @@ Add `redact_pii` and/or `redact_pci` parameters to your WebSocket connection que ### Real-Time WebSocket API ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -78,30 +78,30 @@ When redaction is enabled, the transcription text contains placeholder tokens in - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + +
FieldTypeWhen IncludedDescriptionFieldTypeWhen IncludedDescription
`redacted_entities`array`redact_pii=true` or `redact_pci=true`List of redacted entity placeholders (e.g., `[FIRSTNAME_1]`, `[CREDITCARDCVV_1]`)`redacted_entities`array`redact_pii=true` or `redact_pci=true`List of redacted entity placeholders (e.g., `[FIRSTNAME_1]`, `[CREDITCARDCVV_1]`)
`transcript`stringAlwaysTranscription text with redacted entities replaced by placeholder tokens`transcript`stringAlwaysTranscription text with redacted entities replaced by placeholder tokens
`full_transcript`string`full_transcript=true` AND `is_final=true`Cumulative transcript with redacted entities (when `full_transcript=true` is enabled)`full_transcript`string`full_transcript=true` AND `is_final=true`Cumulative transcript with redacted entities (when `full_transcript=true` is enabled)
diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/utterances.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/utterances.mdx similarity index 92% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/utterances.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/utterances.mdx index 61b8fad..c938c5b 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/utterances.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/utterances.mdx @@ -19,7 +19,7 @@ Sentence-level timestamps (utterances) are supported in both Pre-Recorded and Re ```bash curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&word_timestamps=true&diarize=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true&diarize=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/audio.wav" @@ -30,7 +30,7 @@ curl --request POST \ For the Real-Time WebSocket API, set `sentence_timestamps=true` as a query parameter when establishing the WebSocket connection. ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("sentence_timestamps", "true"); diff --git a/fern/products/waves/pages/v4.0.0/speech-to-text/features/word-boosting.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/word-boosting.mdx new file mode 100644 index 0000000..3367c54 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/word-boosting.mdx @@ -0,0 +1,93 @@ +--- +title: 'Keyword Boosting' +description: 'Boost specific words or phrases so the speech-to-text model recognizes them correctly' +icon: "arrow-up-right-dots" +--- + +Real-Time + +Keyword boosting lets you bias the Pulse speech-to-text model toward specific words or phrases — useful for proper nouns, brand names, technical terms, or domain-specific vocabulary that the model might otherwise misrecognize. + +## Format + +Keywords are passed as a **comma-separated string** in the `keywords` query parameter. Each keyword follows the format: + +``` +KEYWORD:INTENSIFIER +``` + +| Part | Required | Description | +|---|---|---| +| `KEYWORD` | Yes | The word or phrase to boost | +| `INTENSIFIER` | No | A number controlling boost strength. Defaults to `1.0` if omitted | + +## Intensifier Scale + +| Value | Effect | +|---|---| +| `1` | Mild boost (default if omitted) | +| `2-3` | Moderate boost — good for uncommon proper nouns | +| `4-6` | Strong boost — for rare terms the model struggles with | +| `7-10` | Very strong boost — use sparingly, can over-bias results | + +Higher values create a stronger bias toward that word in the output. Start low and increase if the word still isn't recognized correctly. + +## Enabling Keyword Boosting + +### Real-Time WebSocket API + +Add the `keywords` query parameter to your WebSocket connection URL with a comma-separated list of keywords and optional intensifiers. + +#### Single keyword + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("keywords", "NVIDIA:5"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +#### Multiple keywords + +``` +wss://api.smallest.ai/waves/v1/pulse/get_text?language=en&encoding=linear16&sample_rate=16000&keywords=Hansi:6,Muller:6,CVV:9 +``` + +#### Mix of boosted and default-intensity keywords + +``` +wss://api.smallest.ai/waves/v1/pulse/get_text?language=en&encoding=linear16&sample_rate=16000&keywords=CEO:3,NVIDIA:5,Jensen +``` + +`Jensen` with no intensifier defaults to `1.0`. + +## Examples + +### Boost names in a meeting transcript + +``` +wss://api.smallest.ai/waves/v1/pulse/get_text?language=en&encoding=linear16&sample_rate=16000&keywords=Jensen:4,NVIDIA:5,Blackwell:6,CUDA:3 +``` + +### Boost brand names and product terms + +``` +wss://api.smallest.ai/waves/v1/pulse/get_text?language=en&encoding=linear16&sample_rate=16000&keywords=Anthropic:5,Claude:4,Sonnet:3 +``` + +## Limits + +- Max **100 keywords** per session +- Intensifier must be a **non-negative number** +- Each keyword must be a **string** + + +Start with lower intensifier values (1–3) and increase gradually. Very high values (7–10) can over-bias the model and should be used sparingly. + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/word-timestamps.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/features/word-timestamps.mdx similarity index 64% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/features/word-timestamps.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/features/word-timestamps.mdx index 1f86679..84cff20 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/word-timestamps.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/features/word-timestamps.mdx @@ -1,6 +1,6 @@ --- title: 'Word timestamps' -description: 'Return word-level timing metadata from Lightning STT' +description: 'Return word-level timing metadata from Pulse STT' --- Pre-Recorded Real-Time @@ -11,13 +11,13 @@ Word timestamps provide precise timing information for each word in the transcri ### Pre-Recorded API -Add `word_timestamps=true` to your Lightning STT query parameters. This works for both raw-byte uploads (`Content-Type: audio/wav`) and JSON requests with hosted audio URLs. +Add `word_timestamps=true` to your Pulse STT query parameters. This works for both raw-byte uploads (`Content-Type: audio/wav`) and JSON requests with hosted audio URLs. #### Sample request ```bash curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&word_timestamps=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/audio.wav" @@ -25,10 +25,10 @@ curl --request POST \ ### Real-Time WebSocket API -Add `word_timestamps=true` to your WebSocket connection query parameters when connecting to the Lightning STT WebSocket API. +Add `word_timestamps=true` to your WebSocket connection query parameters when connecting to the Pulse STT WebSocket API. ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -112,48 +112,48 @@ When `diarize=true` is enabled, the `words` array also includes `speaker` (integ - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + +
FieldTypeWhen IncludedDescriptionFieldTypeWhen IncludedDescription
`word`string`word_timestamps=true`The transcribed word`word`string`word_timestamps=true`The transcribed word
`start`number`word_timestamps=true`Start time in seconds`start`number`word_timestamps=true`Start time in seconds
`end`number`word_timestamps=true`End time in seconds`end`number`word_timestamps=true`End time in seconds
`confidence`number`word_timestamps=true` (realtime only)Confidence score for the word (0.0 to 1.0)`confidence`number`word_timestamps=true` (realtime only)Confidence score for the word (0.0 to 1.0)
`speaker`integer (realtime) / string (pre-recorded)`diarize=true`Speaker label. Real-time API uses integer IDs (0, 1, ...), pre-recorded API uses string labels (speaker_0, speaker_1, ...)`speaker`integer (realtime) / string (pre-recorded)`diarize=true`Speaker label. Real-time API uses integer IDs (0, 1, ...), pre-recorded API uses string labels (speaker_0, speaker_1, ...)
`speaker_confidence`number`diarize=true` (realtime only)Confidence score for the speaker assignment (0.0 to 1.0)`speaker_confidence`number`diarize=true` (realtime only)Confidence score for the speaker assignment (0.0 to 1.0)
diff --git a/fern/products/waves/pages/v4.0.0/speech-to-text/model-cards/pulse.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/model-cards/pulse.mdx new file mode 100644 index 0000000..6b4830b --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/model-cards/pulse.mdx @@ -0,0 +1,266 @@ +--- +title: "Pulse" +description: "High-accuracy, low-latency speech-to-text with 39 languages, streaming and non-streaming support, speaker diarization, and PII redaction." +icon: "microphone" +--- + +High-accuracy, low-latency speech-to-text built for real-time transcription across 39 languages, with streaming and non-streaming support. + + + + TTFT at 1 concurrency + + + TTFT at 100 concurrency + + + Supported languages + + + Streaming + Non-streaming + + + +## Model Overview + +| | | +|---|---| +| **Developed by** | Smallest AI | +| **Model type** | Speech-to-Text | +| **Languages** | 39 supported | +| **License** | Proprietary | +| **Modes** | Streaming (WebSocket) and Non-streaming (HTTP) | +| **Console** | [app.smallest.ai](https://app.smallest.ai) | + +### Key Capabilities + + + + Ultra-low latency — 64ms TTFT at 1 concurrency, 300ms at 100 concurrent requests. Built for live transcription and conversational AI. + + + 39 languages across streaming and non-streaming modes, with automatic language detection and code-switching within a single session. + + + Built-in redaction of personal and payment card data, enterprise-ready for both streaming and non-streaming use cases. + + + + + + Automatic multi-speaker identification. Streaming diarization is enterprise-ready; non-streaming available with a cap of 4 speakers. + + + Background noise handling built into the model — enterprise-ready in streaming mode. + + + Supports multi-language audio within a single session. Best used by setting the known primary language. + + + +--- + +## Performance & Benchmarks + +Coming soon + +--- + +## Features — Non-streaming + +| Feature | Available | Notes | +|---|---|---| +| Speaker diarization | Yes | Capped at 4 speakers | +| Keyword boosting | Yes | Custom vocabulary enhancement | +| PII redaction | Yes | Personal info redaction | +| PCI redaction | Yes | Payment card data redaction | +| Word-level timestamps | Yes | Per-word timing | +| Sentence-level timestamps | Yes | Per-sentence timing | +| Punctuation | Yes | Auto punctuation | +| Profanity filter | Yes | Explicit content filtering | +| Language detection | Yes | Auto language ID | +| Code-switching | Yes | Multi-language in same audio | +| Noise reduction | Yes | Background noise handling | +| Emotion, age & gender detection | Yes | Provides confidence scores | + +## Features — Streaming + +| Feature | Available | Notes | +|---|---|---| +| Speaker diarization | Yes | Multi-speaker identification | +| Keyword boosting | Yes | Custom vocabulary enhancement | +| PII redaction | Yes | Personal info redaction | +| PCI redaction | Yes | Payment card data redaction | +| Word-level timestamps | Yes | Per-word timing | +| Sentence-level timestamps | Yes | Per-sentence timing | +| Punctuation | Yes | Auto punctuation | +| Profanity filter | No | Not available in streaming | +| Language detection | Yes | Auto language ID | +| Code-switching | Yes | Multi-language in same audio | +| Noise reduction | Yes | Background noise handling | + +--- + +## Supported Languages — Non-streaming + +| Language | Code | Available | +|---|---|---| +| English | `en` | Yes | +| Italian | `it` | Yes | +| Spanish | `es` | Yes | +| Portuguese | `pt` | Yes | +| Hindi | `hi` | Yes | +| German | `de` | Yes | +| French | `fr` | Yes | +| Ukrainian | `uk` | Yes | +| Russian | `ru` | Yes | +| Kannada | `kn` | Yes | +| Malayalam | `ml` | Yes | +| Polish | `pl` | Yes | +| Marathi | `mr` | Yes | +| Gujarati | `gu` | Yes | +| Czech | `cs` | Yes | +| Slovak | `sk` | Yes | +| Telugu | `te` | Yes | +| Oriya (Odia) | `or` | Yes | +| Dutch | `nl` | Yes | +| Bengali | `bn` | Yes | +| Latvian | `lv` | Yes | +| Estonian | `et` | Yes | +| Romanian | `ro` | Yes | +| Punjabi | `pa` | Yes | +| Finnish | `fi` | Yes | +| Swedish | `sv` | Yes | +| Bulgarian | `bg` | Yes | +| Tamil | `ta` | Yes | +| Hungarian | `hu` | Yes | +| Danish | `da` | Yes | +| Lithuanian | `lt` | Yes | +| Maltese | `mt` | Yes | +| Japanese | `ja` | Yes | +| Cantonese | `yue` | Yes | +| Mandarin | `zh` | Yes | +| Korean | `ko` | Yes | +| Tagalog | `tl` | Yes | +| Indonesian | `id` | Yes | +| Malay | `ms` | Yes | + +## Supported Languages — Streaming + +| Language | Code | Available | +|---|---|---| +| English | `en` | Yes | +| Italian | `it` | Yes | +| Spanish | `es` | Yes | +| Portuguese | `pt` | Yes | +| Hindi | `hi` | Yes | +| German | `de` | Yes | +| French | `fr` | Yes | +| Ukrainian | `uk` | Yes | +| Russian | `ru` | Yes | +| Kannada | `kn` | Yes | +| Malayalam | `ml` | Yes | +| Polish | `pl` | Yes | +| Marathi | `mr` | Yes | +| Gujarati | `gu` | Yes | +| Czech | `cs` | Yes | +| Slovak | `sk` | Yes | +| Telugu | `te` | Yes | +| Oriya (Odia) | `or` | Yes | +| Dutch | `nl` | Yes | +| Bengali | `bn` | Yes | +| Latvian | `lv` | Yes | +| Estonian | `et` | Yes | +| Romanian | `ro` | Yes | +| Punjabi | `pa` | Yes | +| Finnish | `fi` | Yes | +| Swedish | `sv` | Yes | +| Bulgarian | `bg` | Yes | +| Tamil | `ta` | Yes | +| Hungarian | `hu` | Yes | +| Danish | `da` | Yes | +| Lithuanian | `lt` | Yes | +| Maltese | `mt` | Yes | +| Japanese | `ja` | Yes | +| Cantonese | `yue` | Yes | +| Mandarin | `zh` | Yes | +| Korean | `ko` | Yes | +| Tagalog | `tl` | Yes | +| Indonesian | `id` | Yes | +| Malay | `ms` | Yes | + +--- + +## Best Practices + +**Specify the language parameter when known** + +When the language of the audio is known in advance, always set it explicitly rather than relying on automatic detection. This yields better transcription accuracy because the model can optimize directly for that language without needing to first identify it. + +For example, setting the language parameter to `es` (Spanish) tells the model to expect Spanish audio, which also handles English+Spanish code-switching scenarios. + +| Parameter | Use case | +|---|---| +| `en` | English | +| `es` | Spanish (handles English+Spanish) | +| `hi` | Hindi (handles English+Hindi) | +| `multi` | Unknown or mixed-language audio only | + +**When to use `multi`:** +- When the language is truly unknown beforehand +- When processing audio from varied or unpredictable sources + +**Use features only when needed** + +Enable optional features (diarization, PII redaction, timestamps) only when the use case requires them. Unnecessary features add latency. + +--- + +## Use Cases + +### Direct Use + +- Real-time call transcription +- Voice assistant input +- Meeting transcription +- Accessibility and captioning +- Customer support recording analysis + +### Downstream Use + +- Multi-turn conversational agents +- Voice-to-text pipelines +- Telephony and IVR systems +- Content indexing and search +- Compliance and audit logging + +--- + +## Limitations & Safety + +### Known Limitations + +- **Hindi** — still training on proper nouns and order IDs; not enterprise-ready for non-streaming +- **Low-resource languages** — Kannada, Malayalam, Marathi, Gujarati, Telugu, Oriya, Bengali, Punjabi, Tamil, Japanese, Cantonese, Mandarin, Korean, Tagalog, Indonesian, and Malay are available but not yet enterprise-ready +- **Language detection (`multi`)** — automatic language identification does not perform reliably enough for production workloads; specify the known language parameter instead +- **Non-streaming speaker diarization** — capped at 4 speakers; contact support for higher speaker count requirements +- **Audio quality** — transcription accuracy is directly affected by input audio quality; background noise, low bitrate, or overlapping speech may degrade results +- **Code-switching** — works best when the primary language is explicitly set + + + Pulse must **not** be used for recording or transcribing individuals without their explicit consent, surveillance, stalking, or any form of unauthorized monitoring, or any illegal or unethical purposes. + + +### Safety & Compliance + +- Usage is monitored for policy compliance +- For compliance documentation (GDPR, SOC2, HIPAA), contact [support@smallest.ai](mailto:support@smallest.ai) + +--- + +| Channel | Details | +|---------|---------| +| **Support** | [support@smallest.ai](mailto:support@smallest.ai) | +| **Documentation** | [docs.smallest.ai/waves](https://docs.smallest.ai/waves) | +| **Console** | [app.smallest.ai](https://app.smallest.ai) | +| **Community** | [Discord](https://discord.gg/5evETqguJs) | diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/overview.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/overview.mdx similarity index 56% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/overview.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/overview.mdx index 651ad31..960a1ac 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/overview.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/overview.mdx @@ -1,29 +1,35 @@ --- title: 'Overview' +description: 'Convert speech to text with the Pulse API - supporting real-time streaming and pre-recorded audio transcription with industry-leading latency' icon: 'microphone' --- +The Waves Speech To Text (STT) stack processes audio via `https://api.smallest.ai/waves/v1/pulse/get_text` and returns low-latency transcripts with configurable languages, formats, and pricing tiers suited for enterprise deployments. -> Fastest real-time speech-to-text transcription using the Lightning STT API. + + Get started in minutes. Learn how to get your API key and transcribe your first audio file. + -The Waves Speech-To-Text (STT) stack processes audio via `https://waves-api.smallest.ai/api/v1/lightning/get_text` and returns low-latency transcripts with configurable languages, formats, and pricing tiers suited for enterprise deployments. +## Transcription Modes + +We offer two transcription modes to cover a wide range of use cases. Choose the one that best fits your needs: - + Transcribe audio files using synchronous HTTPS POST requests. Perfect for batch processing, archived media, and offline transcription workflows. - + Stream audio and receive transcription results as the audio is processed. Ideal for live conversations, voice assistants, and low-latency applications. ## Feature highlights -Our models specialize in processing audio to preserve information that is often lost during conventional speech-to-text conversion. +Our models specialize in processing audio to preserve information that is often lost during conventional speech to text conversion. - - Support for 30+ languages with automatic language detection or ISO 639-1 codes (`en`, `hi`, etc.). Use `language=multi` to enable automatic language detection across all supported languages. + + Support for 32+ languages with automatic language detection or ISO 639-1 codes (`en`, `hi`, etc.). Use `language=multi` to enable automatic language detection across all supported languages. @@ -64,204 +70,204 @@ Our models specialize in processing audio to preserve information that is often - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + +
LanguageCodePre-RecordedReal-TimeLanguageCodePre-RecordedReal-Time
ItalianitYesYesItalianitYesYes
SpanishesYesYesSpanishesYesYes
EnglishenYesYesEnglishenYesYes
PortugueseptYesYesPortugueseptYesYes
HindihiYesYesHindihiYesYes
GermandeYesYesGermandeYesYes
FrenchfrYesYesFrenchfrYesYes
UkrainianukYesYesUkrainianukYesYes
RussianruYesYesRussianruYesYes
KannadaknYesYesKannadaknYesYes
MalayalammlYesYesMalayalammlYesYes
PolishplYesYesPolishplYesYes
MarathimrYesYesMarathimrYesYes
GujaratiguYesYesGujaratiguYesYes
CzechcsYesYesCzechcsYesYes
SlovakskYesYesSlovakskYesYes
TeluguteYesYesTeluguteYesYes
Oriya (Odia)orYesYesOriya (Odia)orYesYes
DutchnlYesYesDutchnlYesYes
BengalibnYesYesBengalibnYesYes
LatvianlvYesYesLatvianlvYesYes
EstonianetYesYesEstonianetYesYes
RomanianroYesYesRomanianroYesYes
PunjabipaYesYesPunjabipaYesYes
FinnishfiYesYesFinnishfiYesYes
SwedishsvYesYesSwedishsvYesYes
BulgarianbgYesYesBulgarianbgYesYes
TamiltaYesYesTamiltaYesYes
HungarianhuYesYesHungarianhuYesYes
DanishdaYesYesDanishdaYesYes
LithuanianltYesYesLithuanianltYesYes
MaltesemtYesYesMaltesemtYesYes
@@ -270,8 +276,7 @@ Use `language=multi` to auto-detect across the full list or specify one of the c ## Next steps -- Send your first POST request in the [Lightning Pre-Recorded quickstart](/v4.0.0/content/speech-to-text-new/pre-recorded/quickstart). -- Start your first WebSocket connection in the [Lightning WebSocket quickstart](/v4.0.0/content/speech-to-text-new/realtime/quickstart). -- Review [best practices](/v4.0.0/content/speech-to-text-new/pre-recorded/best-practices) for audio preprocessing and request hygiene. -- Use the [troubleshooting guide](/v4.0.0/content/speech-to-text-new/pre-recorded/troubleshooting) when you need quick fixes. - +- Send your first POST request in the [Pulse STT Pre-Recorded quickstart](/waves/documentation/speech-to-text-pulse/pre-recorded/quickstart). +- Start your first WebSocket connection in the [Pulse STT WebSocket quickstart](/waves/documentation/speech-to-text-pulse/realtime-web-socket/quickstart). +- Review [best practices](/waves/documentation/speech-to-text-pulse/pre-recorded/best-practices) for audio preprocessing and request hygiene. +- Use the [troubleshooting guide](/waves/documentation/speech-to-text-pulse/pre-recorded/troubleshooting) when you need quick fixes. diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/audio-formats.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/audio-formats.mdx similarity index 59% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/audio-formats.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/audio-formats.mdx index 611d054..1f54c0b 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/audio-formats.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/audio-formats.mdx @@ -1,9 +1,8 @@ --- title: 'Audio Specifications' +description: 'Supported formats, codecs, and recommendations for pre-recorded audio' --- -This guide covers supported formats, codecs, and recommendations for optimal transcription quality. - ## Input Methods Our API supports two input methods for transcribing audio: @@ -11,74 +10,74 @@ Our API supports two input methods for transcribing audio: - - - + + + - - - + + + - - - + + +
MethodContent TypeUse CaseMethodContent TypeUse Case
Raw Bytesapplication/octet-streamUpload audio files directly from your systemRaw Bytesapplication/octet-streamUpload audio files directly from your system
Audio URLapplication/jsonProcess audio files hosted on a remote serverAudio URLapplication/jsonProcess audio files hosted on a remote server
## Supported Formats -The Lightning STT API supports a wide range of audio formats for pre-recorded transcription. +The Pulse STT API supports a wide range of audio formats for pre-recorded transcription. - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - - - + + + +
FormatExtensionCodecNotesFormatExtensionCodecNotes
WAV.wavPCM, Linear PCMRecommended for best qualityWAV.wavPCM, Linear PCMRecommended for best quality
MP3.mp3MPEG Audio Layer IIIWidely compatibleMP3.mp3MPEG Audio Layer IIIWidely compatible
FLAC.flacFree Lossless Audio CodecLossless compressionFLAC.flacFree Lossless Audio CodecLossless compression
OGG.oggVorbis, OpusOpen source formatOGG.oggVorbis, OpusOpen source format
M4A.m4aAAC, ALACApple formatM4A.m4aAAC, ALACApple format
WebM.webmOpus, VorbisWeb-optimizedWebM.webmOpus, VorbisWeb-optimized
diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/best-practices.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/best-practices.mdx similarity index 88% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/best-practices.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/best-practices.mdx index 7a7fe7d..81f3a22 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/best-practices.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/best-practices.mdx @@ -1,11 +1,11 @@ --- title: 'Best Practices' -description: 'Prepare audio inputs before submitting them to Lightning STT' +description: 'Prepare audio inputs before submitting them to Pulse STT' --- # Pre-recorded best practices -Follow these recommendations to keep Lightning STT latencies low while preserving transcript fidelity. +Follow these recommendations to keep Pulse STT latencies low while preserving transcript fidelity. ## Audio preprocessing workflow diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/code-examples.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/code-examples.mdx similarity index 95% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/code-examples.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/code-examples.mdx index bcba148..56c0001 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/code-examples.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/code-examples.mdx @@ -1,5 +1,6 @@ --- title: 'Code Examples' +description: 'Complete code samples for transcribing pre-recorded audio with Pulse STT' --- Below is a complete Python example demonstrating audio preprocessing, transcription with age/gender detection, emotion detection, and sentence-level timestamps (utterances). @@ -15,7 +16,7 @@ client = WavesClient(api_key=os.getenv("SMALLEST_API_KEY")) def preprocess_audio(input_path, output_path): """ - Preprocess audio file to optimal format for Lightning STT: + Preprocess audio file to optimal format for Pulse STT: - Convert to 16 kHz mono WAV - Normalize audio levels - Remove leading/trailing silence @@ -34,7 +35,7 @@ def transcribe_with_features(audio_path): """ response = client.transcribe( file_path=audio_path, - model="lightning", + model="pulse", language="en", word_timestamps=True, age_detection=True, diff --git a/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/features.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/features.mdx new file mode 100644 index 0000000..1156ac0 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/features.mdx @@ -0,0 +1,42 @@ +--- +title: 'Features' +description: 'Available features for Pre-Recorded Pulse STT API' +--- + +The Pre-Recorded Pulse STT API supports the following features: + +## Available Features + + + + Get precise timing information for each word in the transcription + + + Automatically detect the language of the audio + + + Identify and label different speakers in the audio + + + Predict demographic attributes alongside transcription + + + Detect emotional tone in the transcribed speech + + + Get the complete transcription of the audio + + + Automatically redact sensitive information from transcriptions + + + Format numbers, dates, and currencies in transcriptions + + + Segment transcription into meaningful utterances + + + Boost recognition accuracy for specific words, brand names, and domain terms + + + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/quickstart.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/quickstart.mdx similarity index 76% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/quickstart.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/quickstart.mdx index 4a4f971..af068d8 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/quickstart.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/quickstart.mdx @@ -3,7 +3,7 @@ title: 'Quickstart' description: 'Get started with transcribing pre-recorded audio files using the Waves STT API' --- -This guide shows you how to convert an audio file into text using Smallest AI's Lightning STT model. +This guide shows you how to convert an audio file into text using Smallest AI's Pulse STT model. # Pre-Recorded Audio @@ -21,12 +21,12 @@ The Pre-Recorded API allows you to upload audio files and receive complete trans ## Endpoint ``` -POST https://waves-api.smallest.ai/api/v1/lightning/get_text +POST https://api.smallest.ai/waves/v1/pulse/get_text ``` ## Authentication -Head over to the [smallest console](https://console.smallest.ai/apikeys) to generate an API key, if not done previously. Also look at [Authentication guide](/v4.0.0/content/getting-started/authentication) for more information about API keys and their usage. +Head over to the [smallest console](https://app.smallest.ai/dashboard/settings/apikeys) to generate an API key, if not done previously. Also look at [Authentication guide](/waves/documentation/getting-started/authentication) for more information about API keys and their usage. Include your API key in the Authorization header: @@ -37,7 +37,7 @@ Authorization: Bearer SMALLEST_API_KEY ## Example Request -The API supports two input methods: **Raw Audio Bytes** and **Audio URL**. For details on both methods, see the [Audio Specifications](/v4.0.0/content/speech-to-text-new/pre-recorded/audio-formats) guide. +The API supports two input methods: **Raw Audio Bytes** and **Audio URL**. For details on both methods, see the [Audio Specifications](/waves/documentation/speech-to-text/pre-recorded/audio-formats) guide. ### Method 1: Raw Audio Bytes @@ -46,7 +46,7 @@ Upload audio files directly by sending raw audio data: ```bash cURL curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&word_timestamps=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/audio.wav" @@ -57,9 +57,8 @@ import os import requests API_KEY = os.environ["SMALLEST_API_KEY"] -endpoint = "https://waves-api.smallest.ai/api/v1/lightning/get_text" +endpoint = "https://api.smallest.ai/waves/v1/pulse/get_text" params = { - "model": "lightning", "language": "en", "word_timestamps": "true", } @@ -80,9 +79,8 @@ print(result["transcription"]) import fetch from "node-fetch"; import fs from "fs"; -const endpoint = "https://waves-api.smallest.ai/api/v1/lightning/get_text"; +const endpoint = "https://api.smallest.ai/waves/v1/pulse/get_text"; const params = new URLSearchParams({ - model: "lightning", language: "en", word_timestamps: "true", }); @@ -111,7 +109,7 @@ Provide a URL to an audio file hosted remotely. This is useful when your audio f ```bash cURL curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&word_timestamps=true" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: application/json" \ --data '{ @@ -124,9 +122,8 @@ import os import requests API_KEY = os.environ["SMALLEST_API_KEY"] -endpoint = "https://waves-api.smallest.ai/api/v1/lightning/get_text" +endpoint = "https://api.smallest.ai/waves/v1/pulse/get_text" params = { - "model": "lightning", "language": "en", "word_timestamps": "true", } @@ -147,9 +144,8 @@ print(result["transcription"]) ```javascript JavaScript import fetch from "node-fetch"; -const endpoint = "https://waves-api.smallest.ai/api/v1/lightning/get_text"; +const endpoint = "https://api.smallest.ai/waves/v1/pulse/get_text"; const params = new URLSearchParams({ - model: "lightning", language: "en", word_timestamps: "true", }); @@ -195,8 +191,8 @@ A successful request returns a JSON object with the transcription: ## Next Steps -- Learn about [supported audio formats](/v4.0.0/content/speech-to-text-new/pre-recorded/audio-formats). -- Decide which enrichment options to enable in the [features guide](/v4.0.0/content/speech-to-text-new/pre-recorded/features). -- Configure asynchronous callbacks with [webhooks](/v4.0.0/content/speech-to-text-new/pre-recorded/webhooks). -- Review a full [code example](/v4.0.0/content/speech-to-text-new/pre-recorded/code-examples) here. +- Learn about [supported audio formats](/waves/documentation/speech-to-text/pre-recorded/audio-formats). +- Decide which enrichment options to enable in the [features guide](/waves/documentation/speech-to-text/pre-recorded/features). +- Configure asynchronous callbacks with [webhooks](/waves/documentation/speech-to-text/pre-recorded/webhooks). +- Review a full [code example](/waves/documentation/speech-to-text/pre-recorded/code-examples) here. diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/troubleshooting.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/troubleshooting.mdx similarity index 90% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/troubleshooting.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/troubleshooting.mdx index 87d27f0..44dc9ce 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/troubleshooting.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/troubleshooting.mdx @@ -1,11 +1,11 @@ --- title: 'Troubleshooting' -description: 'Resolve common issues when uploading pre-recorded audio to Lightning STT' +description: 'Resolve common issues when uploading pre-recorded audio to Pulse STT' --- # Troubleshooting pre-recorded uploads -Go through this guide to learn about common bugs and issues that might occur when transcribing audio with Lightning STT. +Go through this guide to learn about common bugs and issues that might occur when transcribing audio with Pulse STT. ## Low-quality transcripts @@ -15,7 +15,7 @@ Go through this guide to learn about common bugs and issues that might occur whe - Resample audio to at least 16 kHz and keep it mono. - Prefer lossless codecs (WAV, FLAC) whenever you control the capture pipeline. - Maintain 128 kbps or higher bitrate for compressed formats. -- Normalize and denoise audio before uploading (see [best practices](/v4.0.0/content/speech-to-text-new/pre-recorded/best-practices)). +- Normalize and denoise audio before uploading (see [best practices](/waves/documentation/speech-to-text/pre-recorded/best-practices)). ## Large file sizes diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/webhooks.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/webhooks.mdx similarity index 74% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/webhooks.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/webhooks.mdx index be9d348..c1b8135 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/webhooks.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/pre-recorded/webhooks.mdx @@ -1,11 +1,11 @@ --- title: 'Webhooks' -description: 'Receive asynchronous Lightning STT results without polling' +description: 'Receive asynchronous Pulse STT results without polling' --- # Webhooks for pre-recorded uploads -Use webhooks to process Lightning STT jobs asynchronously—ideal for long recordings or high-volume backfills. When a transcription finishes, Waves sends a POST request to your callback with the final payload. +Use webhooks to process Pulse STT jobs asynchronously—ideal for long recordings or high-volume backfills. When a transcription finishes, Waves sends a POST request to your callback with the final payload. ## Steps @@ -14,10 +14,10 @@ Use webhooks to process Lightning STT jobs asynchronously—ideal for long recor Route an HTTPS URL (e.g., `https://api.example.com/webhooks/stt`) that accepts POST requests. Implement signature checks or HMAC validation inside this handler to guard against spoofed traffic. - Include `webhook_url` and optional `webhook_extra` query parameters when calling `POST /api/v1/lightning/get_text`. `webhook_extra` accepts comma-separated `key:value` pairs that return verbatim in the webhook payload for correlation. + Include `webhook_url` and optional `webhook_extra` query parameters when calling `POST /waves/v1/pulse/get_text`. `webhook_extra` accepts comma-separated `key:value` pairs that return verbatim in the webhook payload for correlation. - Make your transcription request as usual (raw bytes or audio URL). Lightning STT queues the job, streams to the model, and emits the webhook once `status=success` (or `failed`). + Make your transcription request as usual (raw bytes or audio URL). Pulse STT queues the job, streams to the model, and emits the webhook once `status=success` (or `failed`). Parse the JSON payload, verify any signatures you added, and store the transcript, timestamps, and metadata in your system of record. Respond with `2xx` to acknowledge receipt; send `5xx` to trigger a retry. @@ -28,7 +28,7 @@ Use webhooks to process Lightning STT jobs asynchronously—ideal for long recor ```bash curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&word_timestamps=true&webhook_url=https://api.example.com/webhooks/stt&webhook_extra=case_id:42,region:us-east" \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true&webhook_url=https://api.example.com/webhooks/stt&webhook_extra=case_id:42,region:us-east" \ --header "Authorization: Bearer $SMALLEST_API_KEY" \ --header "Content-Type: audio/wav" \ --data-binary "@/path/to/audio.wav" diff --git a/fern/products/waves/pages/v4.0.0/speech-to-text/quickstart.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/quickstart.mdx new file mode 100644 index 0000000..4523b48 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/quickstart.mdx @@ -0,0 +1,141 @@ +--- +title: "Quickstart" +description: "Transcribe your first audio file in under 60 seconds with Pulse STT." +icon: "rocket" +--- + +## Step 1: Get Your API Key + +In the [Smallest AI Console](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=speech-to-text), go to **Settings → API Keys** and click **Create API Key**. + +API Keys settings page with Create API Key button highlighted + +Create New API Key dialog with name field + +Copy the key and export it: + +```bash +export SMALLEST_API_KEY="your-api-key-here" +``` + +New to Smallest AI? [Sign up here](https://app.smallest.ai?utm_source=documentation&utm_medium=speech-to-text) first. + +## Step 2: Transcribe Audio + +Here's the sample audio we'll transcribe: + + + +First, generate a test audio file (or use any WAV file you have): + +```bash +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"text": "Hello from Smallest AI.", "voice_id": "magnus", "sample_rate": 24000, "output_format": "wav"}' \ + --output audio.wav +``` + +Now transcribe it: + +```bash +curl -X POST "https://api.smallest.ai/waves/v1/pulse/get_text?language=en" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: audio/wav" \ + --data-binary @audio.wav +``` + +You'll get back: + +```json +{ + "status": "success", + "transcription": "Hello from smallest AI!" +} +``` + +## Step 3: Build It Into Your App + + + +```bash cURL (file upload) +curl -X POST "https://api.smallest.ai/waves/v1/pulse/get_text?language=en" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: audio/wav" \ + --data-binary "@audio.wav" +``` + +```python Python +import os +import requests + +API_KEY = os.environ["SMALLEST_API_KEY"] + +response = requests.post( + "https://api.smallest.ai/waves/v1/pulse/get_text", + params={"language": "en"}, + headers={ + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "audio/wav", + }, + data=open("audio.wav", "rb").read(), + timeout=120, +) + +result = response.json() +print(result["transcription"]) +``` + +```javascript JavaScript +const fs = require("fs"); + +const audioData = fs.readFileSync("audio.wav"); + +const params = new URLSearchParams({ language: "en" }); +const response = await fetch( + `https://api.smallest.ai/waves/v1/pulse/get_text?${params}`, + { + method: "POST", + headers: { + Authorization: `Bearer ${process.env.SMALLEST_API_KEY}`, + "Content-Type": "audio/wav", + }, + body: audioData, + } +); + +const result = await response.json(); +console.log(result.transcription); +``` + + + +## Step 4: Explore Features + + + + Stream audio via WebSocket for live transcription. + + + Identify and label different speakers. + + + Precise timing for each transcribed word. + + + Analyze emotional tone in speech. + + + +Full endpoint spec: [Pulse API Reference](/waves/api-reference) + +## Need Help? + + + Ask questions and get help from the community. + + +Or email [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/audio-formats.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/audio-formats.mdx similarity index 70% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/audio-formats.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/realtime/audio-formats.mdx index 8221ef5..c3897ee 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/audio-formats.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/audio-formats.mdx @@ -3,50 +3,48 @@ title: 'Audio Specifications' description: 'Supported audio encoding formats and requirements for real-time WebSocket transcription' --- -This guide covers supported encoding formats, sample rates, and recommendations for optimal real-time transcription quality. - ## Supported Encoding Formats -The Lightning STT WebSocket API supports the following audio encoding formats for real-time streaming: +The Pulse STT WebSocket API supports the following audio encoding formats for real-time streaming: - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + +
EncodingDescriptionUse CaseEncodingDescriptionUse Case
linear1616-bit linear PCMRecommended for best qualitylinear1616-bit linear PCMRecommended for best quality
linear3232-bit linear PCMHigh-fidelity audiolinear3232-bit linear PCMHigh-fidelity audio
alawA-law encodingTelephony systemsalawA-law encodingTelephony systems
mulawμ-law encodingTelephony systems (North America)mulawμ-law encodingTelephony systems (North America)
opusOpus compressed audioLow bandwidth, high qualityopusOpus compressed audioLow bandwidth, high quality
ogg_opusOgg Opus containerOgg container with Opus codecogg_opusOgg Opus containerOgg container with Opus codec
@@ -158,7 +156,7 @@ chunks = [audio_int16[i:i+chunk_size//2] for i in range(0, len(audio_int16), chu Specify encoding and sample rate in the WebSocket connection URL: ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); ``` diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/best-practices.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/best-practices.mdx similarity index 91% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/best-practices.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/realtime/best-practices.mdx index 6a4c541..14a92a4 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/best-practices.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/best-practices.mdx @@ -5,7 +5,7 @@ description: 'Optimize your real-time WebSocket transcription for low latency an # Real-time streaming best practices -Follow these recommendations to keep Lightning STT latencies low while preserving transcript fidelity in real-time scenarios. +Follow these recommendations to keep Pulse STT latencies low while preserving transcript fidelity in real-time scenarios. ## Chunk Size and Streaming Rate @@ -149,17 +149,17 @@ function connect() { 1. **Establish connection**: Create WebSocket with proper authentication 2. **Stream audio**: Send chunks at regular intervals 3. **Handle responses**: Process partial and final transcripts -4. **End session**: Send `{"type": "end"}` when done +4. **End session**: Send `{"type": "finalize"}` when done 5. **Close connection**: Gracefully close the WebSocket ### Graceful Shutdown -To properly close a session, send the end token and wait for the server to respond with `is_last=true` before closing the WebSocket connection: +To properly close a session, send the finalize token and wait for the server to respond with `is_last=true` before closing the WebSocket connection: ```javascript function endTranscription(ws) { // Send end signal - ws.send(JSON.stringify({ type: "end" })); + ws.send(JSON.stringify({ type: "finalize" })); // Wait for is_last=true response before closing ws.onmessage = (event) => { @@ -172,7 +172,7 @@ function endTranscription(ws) { ``` -Do not close the WebSocket immediately after sending the end token. Always wait for the `is_last=true` response to ensure all audio has been processed and final transcripts are received. +Do not close the WebSocket immediately after sending the finalize token. Always wait for the `is_last=true` response to ensure all audio has been processed and final transcripts are received. ## Latency Optimization diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/code-examples.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/code-examples.mdx similarity index 95% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/code-examples.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/realtime/code-examples.mdx index 92b8f38..4e4ede6 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/code-examples.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/code-examples.mdx @@ -38,7 +38,7 @@ import os import pathlib from urllib.parse import urlencode -BASE_WS_URL = "wss://waves-api.smallest.ai/api/v1/lightning/get_text" +BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text" params = { "language": "en", "encoding": "linear16", @@ -73,7 +73,7 @@ async def stream_audio(): await asyncio.sleep(0.05) # 50ms delay between chunks print("Finished sending audio, sending end signal...") - await ws.send(json.dumps({"type": "end"})) + await ws.send(json.dumps({"type": "finalize"})) sender = asyncio.create_task(send_chunks()) @@ -119,7 +119,7 @@ const fs = require("fs"); const API_KEY = process.env.SMALLEST_API_KEY; const AUDIO_FILE = "path/to/audio.wav"; -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -141,7 +141,7 @@ ws.on("open", () => { const sendChunk = () => { if (offset >= audioBuffer.length) { console.log("Finished sending audio, sending end signal..."); - ws.send(JSON.stringify({ type: "end" })); + ws.send(JSON.stringify({ type: "finalize" })); return; } @@ -198,7 +198,7 @@ This example shows how to stream audio from a file input in the browser: const API_KEY = "SMALLEST_API_KEY"; async function transcribeAudio(audioFile) { - const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); + const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -216,7 +216,7 @@ async function transcribeAudio(audioFile) { const sendChunk = () => { if (offset >= arrayBuffer.byteLength) { console.log("Finished sending audio"); - ws.send(JSON.stringify({ type: "end" })); + ws.send(JSON.stringify({ type: "finalize" })); return; } @@ -291,7 +291,7 @@ async function streamMicrophone() { // Create script processor for audio chunks const processor = audioContext.createScriptProcessor(4096, 1, 1); - const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); + const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -327,7 +327,7 @@ async function streamMicrophone() { processor.disconnect(); source.disconnect(); stream.getTracks().forEach(track => track.stop()); - ws.send(JSON.stringify({ type: "end" })); + ws.send(JSON.stringify({ type: "finalize" })); ws.close(); }, 30000); } diff --git a/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/features.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/features.mdx new file mode 100644 index 0000000..d5bd2be --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/features.mdx @@ -0,0 +1,35 @@ +--- +title: 'Features' +description: 'Available features for Real-Time Pulse STT WebSocket API' +--- + +The Real-Time Pulse STT WebSocket API supports the following features: + +## Available Features + + + + Get precise timing information for each word in the transcription with confidence scores + + + Automatically detect the language of the audio + + + Get sentence-level transcription segments with timing information + + + Automatically redact personally identifiable information and payment card information + + + Get cumulative transcript received up to this point in responses where is_final is true + + + Control how numbers are formatted in transcriptions (digits, words, or auto-detect) + + + Identify and label different speakers in the audio with speaker confidence scores + + + Boost recognition accuracy for specific words, brand names, and domain terms + + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/quickstart.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/quickstart.mdx similarity index 70% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/quickstart.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/realtime/quickstart.mdx index b43efba..252e869 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/quickstart.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/quickstart.mdx @@ -1,9 +1,9 @@ --- title: 'Quickstart' -description: 'Get started with real-time transcription using the Lightning STT WebSocket API' +description: 'Get started with real-time transcription using the Pulse STT WebSocket API' --- -This guide shows you how to transcribe streaming audio using Smallest AI's Lightning STT model via the WebSocket API. The Lightning model provides state-of-the-art low latencies (64ms) for TTFT (Time to First Transcript), making it an ideal choice for speech-to-text conversion during live conversations. +This guide shows you how to transcribe streaming audio using Smallest AI's Pulse STT model via the WebSocket API. The Pulse model provides state-of-the-art low latencies (64ms) for TTFT (Time to First Transcript), making it an ideal choice for speech-to-text conversion during live conversations. # Real-Time Audio Transcription @@ -19,12 +19,12 @@ The Real-Time API allows you to stream audio data and receive transcription resu ## Endpoint ``` -WSS wss://waves-api.smallest.ai/api/v1/lightning/get_text +WSS wss://api.smallest.ai/waves/v1/pulse/get_text ``` ## Authentication -Head over to the [smallest console](https://console.smallest.ai/apikeys) to generate an API key if not done previously. Also look at [Authentication guide](/v4.0.0/content/getting-started/authentication) for more information about API keys and their usage. +Head over to the [smallest console](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=speech-to-text) to generate an API key if not done previously. Also look at [Authentication guide](/waves/documentation/getting-started/authentication) for more information about API keys and their usage. Include your API key in the Authorization header when establishing the WebSocket connection: @@ -38,7 +38,7 @@ Authorization: Bearer SMALLEST_API_KEY ```javascript JavaScript const API_KEY = "SMALLEST_API_KEY"; -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -69,7 +69,7 @@ import websockets import json from urllib.parse import urlencode -BASE_WS_URL = "wss://waves-api.smallest.ai/api/v1/lightning/get_text" +BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text" params = { "language": "en", "encoding": "linear16", @@ -116,7 +116,7 @@ The server responds with JSON messages containing transcription results: } ``` -For detailed information about response fields, see the [response format documentation](/v4.0.0/content/speech-to-text-new/realtime/response-format). +For detailed information about response fields, see the [response format documentation](/waves/documentation/speech-to-text/realtime/response-format). ## Streaming Audio @@ -131,14 +131,14 @@ When you're done streaming, send an end signal: ```json { - "type": "end" + "type": "finalize" } ``` ## Next Steps -- Learn about [supported audio formats](/v4.0.0/content/speech-to-text-new/realtime/audio-formats) for WebSocket streaming. -- Review complete [code examples](/v4.0.0/content/speech-to-text-new/realtime/code-examples) for Python, Node.js, and Browser JavaScript. -- Follow [best practices](/v4.0.0/content/speech-to-text-new/realtime/best-practices) for optimal streaming performance. -- Troubleshoot common issues in the [troubleshooting guide](/v4.0.0/content/speech-to-text-new/realtime/troubleshooting). +- Learn about [supported audio formats](/waves/documentation/speech-to-text/realtime/audio-formats) for WebSocket streaming. +- Review complete [code examples](/waves/documentation/speech-to-text/realtime/code-examples) for Python, Node.js, and Browser JavaScript. +- Follow [best practices](/waves/documentation/speech-to-text/realtime/best-practices) for optimal streaming performance. +- Troubleshoot common issues in the [troubleshooting guide](/waves/documentation/speech-to-text/realtime/troubleshooting). diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/response-format.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/response-format.mdx similarity index 89% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/response-format.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/realtime/response-format.mdx index 5c42de1..2455c15 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/response-format.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/response-format.mdx @@ -61,7 +61,7 @@ We recommend processing responses of this kind for optimal transcription accurac ``` - Additionally, the `language` field is set to the specified language, or the detected language if the language parameter is set to `multi`. Other responses will not include the `language` field. -- The `full_transcript` is non-empty if the user sends the end token `{"type":"end"}` to signal end of session. +- The `full_transcript` is non-empty if the user sends the finalize token `{"type":"finalize"}` to signal end of session. ### `is_final = false` @@ -81,12 +81,12 @@ These are interim transcript responses sent for each chunk. They provide quick f - These responses may provide inaccurate results for the most recent words. This occurs when the audio for these words is not fully sent to the server in the respective chunk. -The `full_transcript` field is a feature that requires the `full_transcript` query parameter to be set to `true`. Learn more about the [Full Transcript feature](/v4.0.0/content/speech-to-text-new/features/full-transcript). +The `full_transcript` field is a feature that requires the `full_transcript` query parameter to be set to `true`. Learn more about the [Full Transcript feature](/waves/documentation/speech-to-text/features/full-transcript). ### `is_last = true` -This response is similar to an `is_final=true` response, but it is the final response received after the user sends the end token `{"type":"end"}`. When `is_last=true`, the server has finished processing all audio and the session is complete. +This response is similar to an `is_final=true` response, but it is the final response received after the user sends the finalize token `{"type":"finalize"}`. When `is_last=true`, the server has finished processing all audio and the session is complete. ```json { @@ -105,5 +105,5 @@ This response is similar to an `is_final=true` response, but it is the final res - This is the last response of the live transcription session and contains all the fields of the `is_final=true` response. -Do not close the WebSocket connection immediately after sending the end token. Wait for this `is_last=true` response to ensure all audio has been processed and you receive the complete transcript. +Do not close the WebSocket connection immediately after sending the finalize token. Wait for this `is_last=true` response to ensure all audio has been processed and you receive the complete transcript. diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/troubleshooting.mdx b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/troubleshooting.mdx similarity index 96% rename from fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/troubleshooting.mdx rename to fern/products/waves/pages/v4.0.0/speech-to-text/realtime/troubleshooting.mdx index add39b9..4bdd358 100644 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/troubleshooting.mdx +++ b/fern/products/waves/pages/v4.0.0/speech-to-text/realtime/troubleshooting.mdx @@ -27,7 +27,7 @@ description: 'Common issues and solutions for real-time WebSocket transcription' 2. **Check WebSocket URL**: ```javascript // Correct URL format - const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); + const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); // Not "ws://" or "https://" ``` @@ -38,7 +38,7 @@ description: 'Common issues and solutions for real-time WebSocket transcription' -H "Upgrade: websocket" \ -H "Sec-WebSocket-Version: 13" \ -H "Sec-WebSocket-Key: test" \ - https://waves-api.smallest.ai/api/v1/lightning/get_text + https://api.smallest.ai/waves/v1/pulse/get_text ``` ### Connection Drops Unexpectedly @@ -201,7 +201,7 @@ description: 'Common issues and solutions for real-time WebSocket transcription' 3. **Check Network**: ```bash # Test latency to API - ping waves-api.smallest.ai + ping api.smallest.ai ``` ### Memory Issues @@ -241,7 +241,7 @@ description: 'Common issues and solutions for real-time WebSocket transcription' 1. **Use WSS (Secure WebSocket)**: ```javascript // Always use wss:// in browsers - const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); + const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); ``` 2. **Handle Authentication**: diff --git a/fern/products/waves/pages/v4.0.0/text-to-speech/get-voice-models-langs.mdx b/fern/products/waves/pages/v4.0.0/text-to-speech/get-voice-models-langs.mdx new file mode 100644 index 0000000..33b2027 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/text-to-speech/get-voice-models-langs.mdx @@ -0,0 +1,71 @@ +--- +title: "Voices & Languages" +description: "Learn how to retrieve available voices, models, and languages." +icon: "toolbox" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to fetch the available languages, models, and voices. By the end of this guide, you'll be able to retrieve and display this information using the Smallest AI SDK. + +## Requirements + +Before you begin, ensure you have the following: + +- Python (3.9 or higher) installed on your machine. +- An API key from the Smallest AI [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=text-to-speech). +- The Smallest AI Python SDK installed. If you haven't installed it yet, follow the instructions below: + +### Install the SDK +```bash +pip install smallestai +``` + +Set your API key as an environment variable: +```bash +export SMALLEST_API_KEY=YOUR_API_KEY +``` + +## Fetch Available Voices, Models, and Languages + +The Smallest AI SDK allows you to query the available languages, voices, and models for your TTS needs. Here's how you can do it: + + +```python python +from smallestai.waves import WavesClient + +def main(): + client = WavesClient(api_key="YOUR_API_KEY") + + # Get available languages + languages = client.get_languages() + print(f"Available Languages: {languages}") + + # Get available voices for the "lightning-v3.1" model + voices = client.get_voices(model="lightning-v3.1") + print(f"Available Voices (Model: 'lightning-v3.1'): {voices}") + + # Get user-specific cloned voices + cloned_voices = client.get_cloned_voices() + print(f"Available Cloned Voices: {cloned_voices}") + + # Get available models + models = client.get_models() + print(f"Available Models: {models}") + +if __name__ == "__main__": + main() +``` + + +## Explanation of Functions + +- `get_languages()`: Retrieves the list of supported languages for Text-to-Speech. +- `get_voices(model="model_name")`: Retrieves the voices available for a specific model (e.g., "lightning-v3.1"). +- `get_cloned_voices()`: Fetches all user-specific cloned voices. +- `get_models()`: Retrieves the TTS models on the platform available through API. + + +## Need Help? + +If you have any questions or encounter issues, our community is here to help! +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Contact us via email: [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/text-to-speech/how-to-tts.mdx b/fern/products/waves/pages/v4.0.0/text-to-speech/how-to-tts.mdx new file mode 100644 index 0000000..35aa552 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/text-to-speech/how-to-tts.mdx @@ -0,0 +1,199 @@ +--- +title: "Sync & Async Synthesis" +sidebarTitle: "Sync & Async" +description: "Generate speech synchronously or concurrently — REST API and SDK examples." +icon: "wave-square" +--- + +Generate speech via the REST API or Python SDK — synchronously (one request, complete audio) or asynchronously (multiple requests in parallel). + +**Sample output (sync, voice: magnus):** + + + +## Requirements + +- An API key from the [Smallest AI Console](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=text-to-speech) +- For Python: `requests` (or `smallestai` for the SDK) +- For JavaScript: Node.js 18+ (built-in `fetch`) + +```bash +export SMALLEST_API_KEY="your-api-key-here" +``` + +## Synchronous Text to Speech + +Send text, receive complete audio in the response: + + + +```bash cURL +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Hello, this is a test of synchronous speech synthesis.", + "voice_id": "magnus", + "sample_rate": 24000, + "output_format": "wav" + }' --output sync_output.wav +``` + +```python Python +import os +import requests + +API_KEY = os.environ["SMALLEST_API_KEY"] + +response = requests.post( + "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech", + headers={ + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json", + }, + json={ + "text": "Hello, this is a test of synchronous speech synthesis.", + "voice_id": "magnus", + "sample_rate": 24000, + "output_format": "wav", + }, +) + +with open("sync_output.wav", "wb") as f: + f.write(response.content) +``` + +```javascript JavaScript +const fs = require("fs"); + +const response = await fetch( + "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech", + { + method: "POST", + headers: { + Authorization: `Bearer ${process.env.SMALLEST_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + text: "Hello, this is a test of synchronous speech synthesis.", + voice_id: "magnus", + sample_rate: 24000, + output_format: "wav", + }), + } +); + +const buffer = Buffer.from(await response.arrayBuffer()); +fs.writeFileSync("sync_output.wav", buffer); +``` + +```python Python SDK +from smallestai.waves import WavesClient + +client = WavesClient(api_key="SMALLEST_API_KEY") +audio = client.synthesize( + "Hello, this is a test of synchronous speech synthesis.", +) +with open("sync_output.wav", "wb") as f: + f.write(audio) +``` + + + +## Asynchronous Text to Speech + +For concurrent requests (e.g., generating multiple audio files in parallel): + + + +```python Python (asyncio) +import os +import asyncio +import aiohttp + +API_KEY = os.environ["SMALLEST_API_KEY"] +URL = "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" + +async def synthesize(session, text, filename): + async with session.post(URL, headers={ + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json", + }, json={ + "text": text, + "voice_id": "magnus", + "sample_rate": 24000, + "output_format": "wav", + }) as resp: + audio = await resp.read() + with open(filename, "wb") as f: + f.write(audio) + print(f"Saved {filename}") + +async def main(): + async with aiohttp.ClientSession() as session: + await asyncio.gather( + synthesize(session, "First sentence.", "async_1.wav"), + synthesize(session, "Second sentence.", "async_2.wav"), + synthesize(session, "Third sentence.", "async_3.wav"), + ) + +asyncio.run(main()) +``` + +```python Python SDK +import asyncio +import aiofiles +from smallestai.waves import AsyncWavesClient + +async def main(): + client = AsyncWavesClient(api_key="SMALLEST_API_KEY") + async with client as tts: + audio_bytes = await tts.synthesize( + "Hello, this is a test of the async synthesis function." + ) + async with aiofiles.open("async_output.wav", "wb") as f: + await f.write(audio_bytes) + +asyncio.run(main()) +``` + + + +## Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `text` | string | *required* | Text to synthesize (max ~250 chars recommended) | +| `voice_id` | string | *required* | Voice to use (e.g., `magnus`, `olivia`, `aarush`) | +| `sample_rate` | int | `44100` | `8000`, `16000`, `24000`, or `44100` Hz | +| `speed` | float | `1.0` | Speech rate multiplier (`0.5` to `2.0`) | +| `language` | string | `auto` | Language code: `en`, `hi`, `es`, `ta`, or `auto` | +| `output_format` | string | `pcm` | Audio format: `pcm`, `wav`, `mp3`, or `mulaw` | +| `pronunciation_dicts` | array | — | List of [pronunciation dictionary](/waves/documentation/text-to-speech/pronunciation-dictionaries) IDs | + +You can override any parameter per request: + +```python +# Override speed and sample rate for a single call +response = requests.post(URL, headers=headers, json={ + "text": "Fast and high quality.", + "voice_id": "magnus", + "speed": 1.5, + "sample_rate": 44100, + "output_format": "mp3", +}) +``` + +## When to Use Each Mode + +- **Synchronous**: Real-time voice assistants, chatbot responses, single audio generation +- **Asynchronous**: Batch processing, generating multiple audio files, audiobook chapters, concurrent API calls + +For real-time streaming where audio starts playing before generation completes, see [Streaming TTS](/waves/documentation/text-to-speech/stream-tts). + +## Need Help? + +Check out the [API Reference](/waves/documentation/api-references/lightning-v3.1) for the full endpoint specification, or ask on [Discord](https://discord.gg/5evETqguJs). diff --git a/fern/products/waves/pages/v4.0.0/text-to-speech/model-cards/lightning-v3-1.mdx b/fern/products/waves/pages/v4.0.0/text-to-speech/model-cards/lightning-v3-1.mdx new file mode 100644 index 0000000..779accd --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/text-to-speech/model-cards/lightning-v3-1.mdx @@ -0,0 +1,284 @@ +--- +title: "Lightning v3.1" +description: "Model card for Lightning v3.1 — high-fidelity, low-latency text-to-speech at 44 kHz with voice cloning, streaming, and multi-language support." +icon: "sparkles" +--- + +Lightning v3.1 is a high-fidelity, low-latency text-to-speech model delivering natural, expressive, and realistic speech at 44 kHz. Optimized for real-time applications with ultra-low latency and voice cloning support, it delivers broadcast-quality audio with genuinely conversational characteristics. + + + + Native sample rate + + + Latency at 20 concurrent requests + + + English, Hindi, Spanish, Tamil + + + Real-time factor (faster than playback) + + + +## Model Overview + +| | | +|---|---| +| **Developed by** | Smallest AI | +| **Model type** | Text-to-Speech / Speech Synthesis | +| **Languages** | English, Hindi, Spanish, Tamil | +| **License** | Proprietary | +| **Version** | v3.1 | +| **Native sample rate** | 44,100 Hz | + +### Key Capabilities + + + + Ultra-low latency architecture designed for conversational AI and live streaming. + + + Instant voice cloning with just 5-15 seconds of audio. Professional cloning available on demand. + + + HTTP, SSE, and WebSocket support for real-time applications. + + + +--- + +## Performance & Benchmarks + +In blind listening tests against OpenAI GPT-4o-mini-TTS, Lightning v3.1 was preferred by listeners **76.2% of the time** — a 3.4x preference ratio. + + + **Evaluation:** Seed TTS dataset, 1,088 samples across English, Hindi, Spanish, and Tamil. LLM-as-a-Judge framework with ASR-based intelligibility testing. + + + + + +| Category | Metric | Score | Notes | +|----------|--------|-------|-------| +| **Audio Quality** | WVMOS | 5.06 | Broadcast-quality audio | +| | Naturalness | 4.33 | Predominantly human-like | +| | Overall Quality | 4.42 | Premium-tier experience | +| | Native Sample Rate | 44.1 kHz | Highest fidelity among Lightning models | +| **Intelligibility** | Word Error Rate (WER) | 6.3% | 93.7% word accuracy | +| | Character Error Rate (CER) | 1.6% | Excellent character-level accuracy | +| **Latency & Speed** | Latency | 200ms | At 20 concurrent requests | +| | Real-Time Factor (RTF) | 0.3 | 3.3x faster than playback | +| | Speed Control | 0.5x - 2.0x | Adjustable playback speed | +| | Max Chunk Size | 250 chars | Optimal: 140 characters per request | +| **Prosody** | Pronunciation | 4.70 / 5.0 | Near-perfect articulation | +| | Intonation | 4.71 / 5.0 | Highly expressive pitch variation | +| | Prosody | 4.47 / 5.0 | Natural conversational rhythm | + + + + +| Category | Metric | Score | +|----------|--------|-------| +| **Audio Quality** | WVMOS | 4.64 | +| | Naturalness | 4.36 | +| | Overall Quality | 4.50 | +| **Intelligibility** | Word Error Rate (WER) | 5.93% | +| | Character Error Rate (CER) | 1.47% | +| **Latency & Speed** | Latency | 250-300ms | +| **Prosody** | Pronunciation | 4.83 | +| | Intonation | 4.81 | +| | Prosody | 4.5 | + + + + +--- + +## Supported Languages + + + **Automatic Language Detection & Language Switching:** Set `language` to `"auto"` (default) and Lightning v3.1 will automatically detect the language from input text. The model also supports language switching within a single session — no need to restart or reconnect when switching between supported languages. + + +| Language | Code | Status | +|----------|------|--------| +| English | `en` | Available | +| Hindi | `hi` | Available | +| Spanish | `es` | Available | +| Tamil | `ta` | Available | +| Italian | `it` | Coming soon | +| French | `fr` | Coming soon | +| Portuguese | `pt` | Coming soon | +| Swedish | `sv` | Coming soon | +| Dutch | `nl` | Coming soon | +| German | `de` | Coming soon | +| Telugu | `te` | Coming soon | +| Malayalam | `ml` | Coming soon | +| Kannada | `kn` | Coming soon | +| Marathi | `mr` | Coming soon | +| Gujarati | `gu` | Coming soon | + +--- + +## Voice Catalog + +### English Voices + +| Voice ID | Name | Gender | Accent | Languages | +|----------|------|--------|--------|-----------| +| `magnus` | Magnus | Male | American | English | +| `olivia` | Olivia | Female | American | English | +| `daniel` | Daniel | Male | American | English | +| `rachel` | Rachel | Female | American | English | +| `nicole` | Nicole | Female | American | English | +| `elizabeth` | Elizabeth | Female | American | English | +| `kyle` | Kyle | Male | American | English | + +### Hindi Voices + +| Voice ID | Name | Gender | Accent | Languages | +|----------|------|--------|--------|-----------| +| `aarush` | Aarush | Male | Indian | English, Hindi | +| `sakshi` | Sakshi | Female | Indian | English, Hindi | +| `parth` | Parth | Male | Indian | English, Hindi | +| `sana` | Sana | Female | Indian | English, Hindi | +| `vivaan` | Vivaan | Male | Indian | English, Hindi | + +### Voice Cloning + + + + **Audio required:** 5-15 seconds + + Self-serve voice cloning available via API and console. Captures core voice characteristics for quick replication. + + + **Audio required:** 45+ minutes (high-quality) + + Near-perfect voice match capturing intonation, accent, emotions, and vocal nuances. Available on demand — contact [support@smallest.ai](mailto:support@smallest.ai) to get started. + + + +--- + +## API Reference + +### Endpoints + +| Endpoint | Method | Use Case | +|----------|--------|----------| +| `/waves/v1/lightning-v3.1/get_speech` | POST | Synchronous synthesis | +| `/waves/v1/lightning-v3.1/stream` | POST (SSE) | Server-sent events streaming | +| `/waves/v1/lightning-v3.1/get_speech/stream` | WebSocket | Real-time streaming | + +### Request Parameters + +| Parameter | Type | Required | Default | Description | +|-----------|------|----------|---------|-------------| +| `text` | string | Yes | — | Text to synthesize | +| `voice_id` | string | Yes | — | Voice identifier | +| `sample_rate` | integer | No | 44100 | Output sample rate (Hz) | +| `speed` | float | No | 1.0 | Speech speed (0.5-2.0) | +| `language` | string | No | `"auto"` | Language code (`en`, `hi`, `es`, `ta`) | +| `output_format` | string | No | `"pcm"` | Audio format | +| `pronunciation_dicts` | array | No | — | Custom pronunciation IDs (WebSocket only) | + + + Get started in minutes with synchronous or streaming synthesis. + + +--- + +## Technical Specifications + +### Audio Output + +| Specification | Details | +|---------------|---------| +| **Native sample rate** | 44,100 Hz | +| **Supported sample rates** | 8,000 / 16,000 / 24,000 / 44,100 Hz | +| **Output formats** | PCM, MP3, WAV, mulaw | +| **Audio channels** | Mono | + +### Text Formatting Guidelines + +| Aspect | Recommendation | +|--------|----------------| +| **Language scripts** | English and Spanish in Latin script, Hindi in Devanagari | +| **Break points** | Natural punctuation (`.` `!` `?` `,`) | +| **Mixed language** | Avoid transliteration — use native script for each language | + +### Number & Date Handling + +| Type | Format | +|------|--------| +| Phone numbers | Default 3-4-3 grouping | +| Dates | DD/MM/YYYY or DD-MM-YYYY | +| Time | HH:MM or HH:MM:SS | + + + +**Hardware** +- Recommended GPU: NVIDIA L40S +- Recommended VRAM: 48 GB + +**Software** +- Server regions (AWS): India (Hyderabad), USA (Oregon) +- Automatic geo-location based routing for lowest latency + + + +--- + +## Use Cases + +### Direct Use + +- Voice assistants and conversational AI +- Interactive chatbots with voice output +- Real-time narration and live streaming +- Accessibility tools and screen readers +- Gaming (dynamic character voices) +- Customer service automation + +### Downstream Use + +- Multi-turn conversational agents +- Audio content generation pipelines +- Telephony and IVR systems +- Podcast and audiobook generation + +--- + +## Limitations & Safety + +### Known Limitations + +- Mixed-language text (transliteration) may produce suboptimal results. Hindi text should be in Devanagari script (e.g., "नमस्ते"), not Latin (e.g., "Namaste"). English text should be in Latin script, not Devanagari. + + + **Recommendations:** Use proper script for each language. Break long text at natural punctuation points. Use [pronunciation dictionaries](/waves/documentation/text-to-speech/pronunciation-dictionaries) for specialized vocabulary. Test voice selection for your specific use case. + + + + Lightning v3.1 must **not** be used for impersonation or fraud, generating deceptive audio content (deepfakes), creating content that violates consent or privacy, harassment or abuse, or any illegal or unethical purposes. + + +### Safety & Compliance + +- Voice cloning requires explicit consent +- No retention of synthesized audio +- No storage of personal voice data beyond cloning scope +- Usage monitoring for policy compliance + +For compliance documentation (GDPR, SOC2, HIPAA), contact [support@smallest.ai](mailto:support@smallest.ai). + +--- + +| Channel | Details | +|---------|---------| +| **Support** | [support@smallest.ai](mailto:support@smallest.ai) | +| **Documentation** | [docs.smallest.ai/waves](https://docs.smallest.ai/waves) | +| **Console** | [app.smallest.ai](https://app.smallest.ai) | +| **Community** | [Discord](https://discord.gg/5evETqguJs) | diff --git a/fern/products/waves/pages/v4.0.0/text-to-speech/overview.mdx b/fern/products/waves/pages/v4.0.0/text-to-speech/overview.mdx new file mode 100644 index 0000000..43ce313 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/text-to-speech/overview.mdx @@ -0,0 +1,212 @@ +--- +title: 'Overview' +description: 'Lightning TTS API — generate speech from text with 80+ voices, 44.1 kHz audio, ~200ms latency, and streaming support.' +icon: 'waveform-lines' +--- + +The Lightning TTS API converts text into natural speech via `https://api.smallest.ai/waves/v1`. 80+ voices across 4 languages, 44.1 kHz native sample rate, ~200ms latency, with sync, SSE, and WebSocket streaming. + +**Hear Lightning v3.1 (voice: magnus):** + + + + + Generate your first audio in under 60 seconds. + + +## Synthesis Modes + +Choose the synthesis mode that best fits your application's needs: + + + + Generate complete audio files with a single HTTP request. Ideal for pre-rendering content, batch processing, and applications where immediate streaming isn't required. + + + Receive audio chunks as they're generated via WebSocket. Perfect for real-time voice assistants, live narration, and low-latency conversational AI. + + + +## Available Models + + + + Deprecated High-quality multilingual TTS with 100ms TTFB. Supports 16+ languages including English, Hindi, and European languages. Includes voice cloning support. + + + Our most natural-sounding model with 44 kHz audio output. Ultra-low latency with expressive, human-like speech. Supports English, Hindi, Tamil, and Spanish with voice cloning. + + + +## Feature Highlights + + + + Optimized streaming pipeline delivers sub-100ms time-to-first-byte (TTFB) for real-time applications. Lightning v3.1 achieves even faster response times for conversational AI. + + + + Create custom voice profiles by uploading audio samples. Instant voice cloning works with just a few seconds of audio, while professional voice cloning delivers studio-quality results. + + + + Comprehensive language support including English, Hindi, Tamil, Kannada, Malayalam, Telugu, Gujarati, Bengali, Marathi, German, French, Spanish, Italian, Polish, Dutch, and Russian. + + + + Choose from PCM, WAV, MP3, or μ-law encoding. Configurable sample rates from 8kHz to 44kHz to match your application's requirements. + + + + Adjust speech rate with a simple multiplier. Slow down for clarity or speed up for faster content delivery without pitch distortion. + + + + Define custom pronunciations for brand names, technical terms, and acronyms. Ensure consistent, accurate pronunciation across all synthesized audio. + + + + Lightning v3.1 produces 44 kHz audio with natural prosody and expressiveness. Perfect for audiobooks, podcasts, and premium voice experiences. + + + + Persistent connections for continuous audio streaming. Ideal for voice bots and interactive applications where latency is critical. + + + +## Supported Languages + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
LanguageCodeLightning v2Lightning v3.1
EnglishenYesYes
HindihiYesYes
TamiltaYesYes
KannadaknYes
MalayalammlYes
TeluguteYes
GujaratiguYes
BengalibnYes
MarathimrYes
GermandeYes
FrenchfrYes
SpanishesYesYes
ItalianitYes
PolishplYes
DutchnlYes
RussianruYes
+ +## Explore + + + + First API call in 60 seconds + + + Real-time audio via WebSocket + + + Clone from 5-15 seconds of audio + + + 20+ open-source examples on GitHub + + + See what developers have built + + + Lightning v3.1 specs and benchmarks + + diff --git a/fern/products/waves/pages/v4.0.0/text-to-speech/pronunciation-dictionaries.mdx b/fern/products/waves/pages/v4.0.0/text-to-speech/pronunciation-dictionaries.mdx new file mode 100644 index 0000000..4b6c750 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/text-to-speech/pronunciation-dictionaries.mdx @@ -0,0 +1,321 @@ +--- +title: "Pronunciation Dictionaries" +icon: "speaker" +description: "Learn how to create and use pronunciation dictionaries to control how specific words are pronounced in your text-to-speech synthesis" +--- + +Pronunciation dictionaries allow you to customize how specific words are pronounced in your text-to-speech synthesis. This is particularly useful for: + +- Brand names, product names, or proper nouns +- Technical terms or acronyms +- Words that should be pronounced differently than their standard pronunciation +- Non-English words in English text (or vice versa) + +## How Pronunciation Dictionaries Work + +A pronunciation dictionary is a collection of word-pronunciation pairs that you create and manage through the Smallest AI API. Each dictionary has a unique ID that you can reference in your TTS requests to ensure consistent pronunciation across your applications. + +### Key Concepts + +- **Word**: The text that appears in your input +- **Pronunciation**: The way the word is written out in normal words to show how it sounds (not IPA) +- **Dictionary ID**: A unique identifier for your pronunciation dictionary that you use in TTS requests + +## Creating a Pronunciation Dictionary + +### Step 1: Create Your Dictionary + +First, create a pronunciation dictionary with your custom word-pronunciation pairs: + +```bash +curl -X POST "https://api.smallest.ai/waves/v1/pronunciation-dicts" \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "items": [ + { + "word": "API", + "pronunciation": "ay-pee-eye" + }, + { + "word": "GitHub", + "pronunciation": "git-hub" + }, + { + "word": "SQL", + "pronunciation": "sequel" + } + ] + }' +``` + +**Response:** + +```json +{ + "id": "64f1234567890abcdef12345", + "items": [ + { + "word": "API", + "pronunciation": "ay-pee-eye" + }, + { + "word": "GitHub", + "pronunciation": "git-hub" + }, + { + "word": "SQL", + "pronunciation": "sequel" + } + ], + "createdAt": "2023-09-01T12:00:00.000Z" +} +``` + +### Step 2: Save the Dictionary ID + +**Important:** Save the returned `id` from the response. You'll need this ID to reference your pronunciation dictionary in TTS requests and for future updates or deletions. + +```javascript +const dictionaryId = "64f1234567890abcdef12345"; // Save this! +``` + +## Managing Your Pronunciation Dictionaries + +### List All Dictionaries + +Retrieve all your pronunciation dictionaries: + +```bash +curl -X GET "https://api.smallest.ai/waves/v1/pronunciation-dicts" \ + -H "Authorization: Bearer YOUR_API_KEY" +``` + +### Update a Dictionary + +Modify an existing pronunciation dictionary: + +```bash +curl -X PUT "https://api.smallest.ai/waves/v1/pronunciation-dicts" \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "id": "64f1234567890abcdef12345", + "items": [ + { + "word": "OpenAI", + "pronunciation": "open ay eye" + }, + ] + }' +``` + +### Delete a Dictionary + +Remove a pronunciation dictionary: + +```bash +curl -X DELETE "https://api.smallest.ai/waves/v1/pronunciation-dicts" \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "id": "64f1234567890abcdef12345" + }' +``` + +## Using Pronunciation Dictionaries in TTS Requests + +Once you have created a pronunciation dictionary and obtained its ID, you can use it in your TTS requests by including the `pronunciation_dicts` parameter. This parameter accepts an array of dictionary IDs, allowing you to use multiple pronunciation dictionaries in a single request: + +### Example + +```bash +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Welcome to Smallest AI API! Our TTS service integrates with GitHub.", + "voice_id": "your_voice_id", + "pronunciation_dicts": ["64f1234567890abcdef12345"], + "sample_rate": 24000, + "speed": 1.0, + "language": "en" + }' +``` + +### Using Multiple Dictionaries + +You can also use multiple pronunciation dictionaries in a single request by providing an array of dictionary IDs: + +```bash +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Our API uses PostgreSQL and integrates with GitHub for CI/CD.", + "voice_id": "your_voice_id", + "pronunciation_dicts": [ + "64f1234567890abcdef12345", + "64f9876543210fedcba09876" + ], + "sample_rate": 24000, + "speed": 1.0, + "language": "en", + "output_format": "wav" + }' +``` + +## Complete Workflow Example + +Here's a complete example showing the full workflow from creating a dictionary to using it in synthesis: + +```python +import requests +import json + +# Your API configuration +API_KEY = "your_api_key_here" +BASE_URL = "https://api.smallest.ai/waves/v1" +headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json" +} + +# Step 1: Create pronunciation dictionary +pronunciation_data = { + "items": [ + {"word": "PostgreSQL", "pronunciation": "post-gres"}, + {"word": "Redis", "pronunciation": "red-iss"}, + {"word": "Kubernetes", "pronunciation": "koo-ber-net-ees"}, + {"word": "nginx", "pronunciation": "engine-x"} + ] +} + +# Create the dictionary +response = requests.post( + f"{BASE_URL}/pronunciation-dicts", + headers=headers, + json=pronunciation_data +) + +dict_data = response.json() +dictionary_id = dict_data["id"] +print(f"Created pronunciation dictionary with ID: {dictionary_id}") + +# Step 2: Use the dictionary in TTS synthesis +tts_request = { + "text": "Our infrastructure uses PostgreSQL, Redis, Kubernetes, and nginx.", + "voice_id": "your_voice_id", + "pronunciation_dicts": [dictionary_id], # Use the dictionary ID here + "sample_rate": 24000, + "speed": 1.0, + "language": "en", + "output_format": "wav" +} + +# Generate speech with custom pronunciations +audio_response = requests.post( + f"{BASE_URL}/lightning-v3.1/get_speech", + headers=headers, + json=tts_request +) + +# Save the audio file +with open("speech_with_custom_pronunciations.wav", "wb") as f: + f.write(audio_response.content) + +print("Speech generated with custom pronunciations!") +``` + +### Tips for Creating Pronunciations + +1. **Break down complex words**: For multi-syllable words, separate syllables with hyphens + + - "Kubernetes" → "koo-ber-net-ees" + +2. **Spell it how it sounds**: Write words the way you want them spoken, even if it’s not standard spelling + + - "SQL" → "sequel" + - "API" → "ay-pee-eye" + +3. **Stay consistent**: Use the same style across your dictionary (e.g., always use hyphens for syllables). + +4. **Test and refine**: Generate a small dictionary first, test the pronunciations, and adjust until they sound natural. + +--- + +## Best Practices + +### Dictionary Management + +- **Keep dictionaries focused**: Create separate dictionaries for different domains (e.g., one for technical terms, another for product names). +- **Combine multiple dictionaries**: Use the array format to apply multiple pronunciation dictionaries in a single TTS request. +- **Update regularly**: Add or refine pronunciations as your vocabulary grows. + +### Pronunciation Quality + +- **Verify pronunciations**: Listen to the output to confirm it matches expectations. +- **Consider context**: Some words may have multiple valid pronunciations—pick the one that makes sense for your use case. +- **Language consistency**: Ensure pronunciations match the language setting of your TTS requests. + +### Performance Considerations + +- **Cache dictionary IDs**: Store dictionary IDs in your application to avoid repeated API calls. +- **Batch updates**: When possible, update multiple pronunciations in a single API call. +- **Monitor usage**: Track which dictionaries are actively used in production. + +--- + +## Troubleshooting + +### Common Issues + +**Dictionary not found** + +- Make sure you’re using the correct dictionary ID and that the dictionary hasn’t been deleted. + +**Pronunciations not applied** + +- Verify that the dictionary ID is included in your TTS request. +- Ensure the words in your text match exactly (case-sensitive) with your dictionary entries. +- Confirm the pronunciation is written in plain text (not IPA). + +**Unexpected pronunciations** + +- Simplify your spelling. +- Test with shorter words first and adjust gradually. + +--- + +### Error Responses + +The API will return specific error messages for common issues: + +```json +{ + "error": "Invalid request body", + "details": [ + { + "code": "invalid_type", + "expected": "string", + "received": "undefined", + "path": ["items", 0, "pronunciation"], + "message": "Required" + } + ] +} +``` + +## Next Steps + + + + Explore detailed parameter information for pronunciation dictionary endpoints. + + + Optimization tips for text formatting and audio generation. + + + Create custom voices from short audio samples. + + diff --git a/fern/products/waves/pages/v4.0.0/text-to-speech/quickstart.mdx b/fern/products/waves/pages/v4.0.0/text-to-speech/quickstart.mdx new file mode 100644 index 0000000..1b2d5f6 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/text-to-speech/quickstart.mdx @@ -0,0 +1,167 @@ +--- +title: "Quickstart" +description: "Generate your first speech audio in under 60 seconds with Lightning TTS." +icon: "rocket" +--- + +## Step 1: Get Your API Key + +In the [Smallest AI Console](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=text-to-speech), go to **Settings → API Keys** and click **Create API Key**. + +API Keys settings page with Create API Key button highlighted + +Create New API Key dialog with name field + +Copy the key and export it: + +```bash +export SMALLEST_API_KEY="your-api-key-here" +``` + +New to Smallest AI? [Sign up here](https://app.smallest.ai?utm_source=documentation&utm_medium=text-to-speech) first — it takes 30 seconds. + +## Step 2: Hear Audio in 30 Seconds + +Paste this in your terminal — no install required: + +```bash +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"text": "Hello from Smallest AI! This is Lightning v3.1.", "voice_id": "magnus", "sample_rate": 24000, "output_format": "wav"}' \ + --output hello.wav +``` + +Play `hello.wav` — it should sound like this: + + + +That's broadcast-quality TTS with ~200ms latency. + +## Step 3: Build It Into Your App + + + +```bash cURL +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Modern problems require modern solutions.", + "voice_id": "magnus", + "sample_rate": 24000, + "speed": 1.0, + "language": "en", + "output_format": "wav" + }' --output output.wav +``` + +```python Python +import os +import requests + +API_KEY = os.environ["SMALLEST_API_KEY"] + +response = requests.post( + "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech", + headers={ + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json", + }, + json={ + "text": "Modern problems require modern solutions.", + "voice_id": "magnus", + "sample_rate": 24000, + "speed": 1.0, + "language": "en", + "output_format": "wav", + }, +) + +with open("output.wav", "wb") as f: + f.write(response.content) +print(f"Saved output.wav ({len(response.content):,} bytes)") +``` + +```javascript JavaScript +const fs = require("fs"); + +const API_KEY = process.env.SMALLEST_API_KEY; + +const response = await fetch( + "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech", + { + method: "POST", + headers: { + Authorization: `Bearer ${API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + text: "Modern problems require modern solutions.", + voice_id: "magnus", + sample_rate: 24000, + speed: 1.0, + language: "en", + output_format: "wav", + }), + } +); + +const buffer = Buffer.from(await response.arrayBuffer()); +fs.writeFileSync("output.wav", buffer); +console.log(`Saved output.wav (${buffer.length} bytes)`); +``` + +```python Python SDK +from smallestai.waves import WavesClient + +client = WavesClient(api_key="YOUR_API_KEY") +audio = client.synthesize( + "Modern problems require modern solutions.", + sample_rate=24000, + speed=1.0, +) +with open("output.wav", "wb") as f: + f.write(audio) +``` + + + +## Step 4: Explore More + + + + Browse 80+ voices across English, Hindi, Spanish, and Tamil. + + + Real-time audio streaming via WebSocket for voice assistants. + + + Clone any voice from just 5-15 seconds of audio. + + + Custom pronunciations for brand names and technical terms. + + + +## Key Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `text` | string | *required* | Text to synthesize (max ~250 chars recommended) | +| `voice_id` | string | *required* | Voice to use (e.g., `magnus`, `olivia`) | +| `sample_rate` | int | `44100` | `8000`, `16000`, `24000`, or `44100` Hz | +| `speed` | float | `1.0` | Speech rate: `0.5` to `2.0` | +| `language` | string | `auto` | `en`, `hi`, `es`, `ta`, or `auto` | +| `output_format` | string | `pcm` | `pcm`, `wav`, `mp3`, or `mulaw` | + +## Need Help? + + + Ask questions, share what you're building, and connect with other developers on Discord. + + +If you need direct assistance, reach out at [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v4.0.0/text-to-speech/stream-tts.mdx b/fern/products/waves/pages/v4.0.0/text-to-speech/stream-tts.mdx new file mode 100644 index 0000000..c3622ea --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/text-to-speech/stream-tts.mdx @@ -0,0 +1,287 @@ +--- +title: "Streaming" +sidebarTitle: "Streaming" +description: "Stream TTS audio in real-time via WebSocket or SSE — first chunk in ~200ms." +icon: "bars-staggered" +--- + +Streaming TTS delivers audio chunks as they're generated — playback starts immediately instead of waiting for the full file. First chunk arrives in ~200ms. + +**Streamed audio output:** + + + +```mermaid +sequenceDiagram + participant Client + participant API as Lightning TTS + Note over Client,API: Synchronous — wait for full audio + Client->>API: POST /get_speech + API-->>Client: ⏳ Generating... + API->>Client: Complete audio file + + Note over Client,API: Streaming — chunks as generated + Client->>API: Connect WebSocket + API->>Client: Chunk 1 (~200ms) + Note right of Client: ▶ Start playback + API->>Client: Chunk 2 + API->>Client: Chunk 3 + API->>Client: ... + API->>Client: Complete ✓ +``` + +## WebSocket Streaming + +Persistent connections for continuous, low-latency audio. Best for conversational AI and real-time apps. + +**Endpoint:** `wss://api.smallest.ai/waves/v1/lightning-v3.1/get_speech/stream` + + + +```python Python +import asyncio +import json +import base64 +import wave +import os +import websockets + +API_KEY = os.environ["SMALLEST_API_KEY"] +WS_URL = "wss://api.smallest.ai/waves/v1/lightning-v3.1/get_speech/stream" + +async def stream_tts(text): + audio_chunks = [] + + async with websockets.connect( + WS_URL, + extra_headers={"Authorization": f"Bearer {API_KEY}"}, + ) as ws: + await ws.send(json.dumps({ + "text": text, + "voice_id": "magnus", + "sample_rate": 24000, + })) + + while True: + response = await ws.recv() + data = json.loads(response) + + if data["status"] == "chunk": + audio = base64.b64decode(data["data"]["audio"]) + audio_chunks.append(audio) + elif data["status"] == "complete": + break + + # Save as WAV + raw = b"".join(audio_chunks) + with wave.open("streamed.wav", "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(24000) + wf.writeframes(raw) + + print(f"Saved streamed.wav ({len(audio_chunks)} chunks)") + +asyncio.run(stream_tts("Streaming delivers audio in real-time for voice assistants and chatbots.")) +``` + +```javascript JavaScript +const WebSocket = require("ws"); +const fs = require("fs"); + +const API_KEY = process.env.SMALLEST_API_KEY; + +const ws = new WebSocket( + "wss://api.smallest.ai/waves/v1/lightning-v3.1/get_speech/stream", + { headers: { Authorization: `Bearer ${API_KEY}` } } +); + +const audioChunks = []; + +ws.on("open", () => { + ws.send(JSON.stringify({ + text: "Streaming delivers audio in real-time for voice assistants and chatbots.", + voice_id: "magnus", + sample_rate: 24000, + })); +}); + +ws.on("message", (raw) => { + const data = JSON.parse(raw); + + if (data.status === "chunk") { + audioChunks.push(Buffer.from(data.data.audio, "base64")); + } else if (data.status === "complete") { + const audio = Buffer.concat(audioChunks); + // Add WAV header and save + fs.writeFileSync("streamed.pcm", audio); + console.log(`Saved streamed.pcm (${audioChunks.length} chunks)`); + ws.close(); + } +}); +``` + +```python Python SDK +from smallestai.waves import TTSConfig, WavesStreamingTTS +import wave + +config = TTSConfig( + voice_id="magnus", + api_key="YOUR_SMALLEST_API_KEY", + sample_rate=24000, + speed=1.0, + max_buffer_flush_ms=100, +) + +streaming_tts = WavesStreamingTTS(config) + +text = "Streaming delivers audio in real-time for voice assistants and chatbots." +audio_chunks = list(streaming_tts.synthesize(text)) + +with wave.open("streamed.wav", "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(24000) + wf.writeframes(b"".join(audio_chunks)) +``` + + + +## SSE Streaming + +Server-Sent Events over HTTP — simpler to set up, no persistent connection needed. + +**Endpoint:** `POST https://api.smallest.ai/waves/v1/lightning-v3.1/stream` + + + +```python Python +import os +import json +import base64 +import wave +import requests + +API_KEY = os.environ["SMALLEST_API_KEY"] + +response = requests.post( + "https://api.smallest.ai/waves/v1/lightning-v3.1/stream", + headers={ + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json", + "Accept": "text/event-stream", + }, + json={ + "text": "SSE streaming is simpler to set up than WebSocket.", + "voice_id": "magnus", + "sample_rate": 24000, + }, + stream=True, +) + +audio_chunks = [] +for line in response.iter_lines(): + if not line: + continue + line = line.decode() + if not line.startswith("data: "): + continue + + data = json.loads(line[6:]) + if data["status"] == "chunk": + audio_chunks.append(base64.b64decode(data["data"]["audio"])) + elif data["status"] == "complete": + break + +raw = b"".join(audio_chunks) +with wave.open("sse_output.wav", "wb") as wf: + wf.setnchannels(1) + wf.setsampwidth(2) + wf.setframerate(24000) + wf.writeframes(raw) +``` + +```bash cURL +curl -N -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/stream" \ + -H "Authorization: Bearer $SMALLEST_API_KEY" \ + -H "Content-Type: application/json" \ + -H "Accept: text/event-stream" \ + -d '{ + "text": "SSE streaming is simpler to set up than WebSocket.", + "voice_id": "magnus", + "sample_rate": 24000 + }' +``` + + + +## Streaming Text Input (SDK) + +For real-time applications where text arrives incrementally (e.g., from an LLM), the SDK supports streaming text input: + +```python +from smallestai.waves import TTSConfig, WavesStreamingTTS + +config = TTSConfig(voice_id="magnus", api_key="YOUR_API_KEY", sample_rate=24000) +streaming_tts = WavesStreamingTTS(config) + +def text_stream(): + """Simulates text arriving word by word (e.g., from an LLM).""" + text = "Streaming synthesis with chunked text input." + for word in text.split(): + yield word + " " + +audio_chunks = [] +for chunk in streaming_tts.synthesize_streaming(text_stream()): + audio_chunks.append(chunk) + # In a real app, play each chunk immediately +``` + +## WebSocket vs SSE + +| | WebSocket | SSE | +|---|---|---| +| **Connection** | Persistent, bidirectional | New HTTP request each time | +| **Multiple messages** | Reuse same connection | New request per message | +| **Best for** | Voice assistants, chatbots | Simple one-off streaming | +| **Latency** | Lowest (no reconnect overhead) | Slightly higher | +| **Concurrency** | Up to 5 connections per unit | Per-request | + + +Use **WebSocket** when sending multiple TTS requests over time (conversations, voice bots). Use **SSE** for simple one-shot streaming where you don't need a persistent connection. + + +## Response Format + +Each WebSocket/SSE message is JSON: + +**Audio chunk:** +```json +{ + "status": "chunk", + "data": { "audio": "base64_encoded_pcm_data" } +} +``` + +**Stream complete:** +```json +{ + "status": "complete", + "message": "All chunks sent", + "done": true +} +``` + +## Configuration Parameters + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `voice_id` | *required* | Voice identifier | +| `sample_rate` | `44100` | Audio sample rate (8000–44100 Hz) | +| `speed` | `1.0` | Speech speed (0.5–2.0) | +| `language` | `auto` | Language code | +| `output_format` | `pcm` | `pcm`, `mp3`, `wav`, or `mulaw` | + +For concurrency limits and connection management, see [Concurrency and Limits](/waves/documentation/api-references/concurrency-and-limits). diff --git a/fern/products/waves/pages/v4.0.0/video/angry_gen_t.mp4 b/fern/products/waves/pages/v4.0.0/video/angry_gen_t.mp4 new file mode 100644 index 0000000..0b3628e Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/angry_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/angry_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/video/angry_ref_t.mp4 new file mode 100644 index 0000000..aca4571 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/angry_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/bg_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/video/bg_ref_t.mp4 new file mode 100644 index 0000000..7d1d723 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/bg_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/fast_gen_t.mp4 b/fern/products/waves/pages/v4.0.0/video/fast_gen_t.mp4 new file mode 100644 index 0000000..9c66deb Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/fast_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/fast_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/video/fast_ref_t.mp4 new file mode 100644 index 0000000..1be0d9c Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/fast_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/good_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/video/good_ref_t.mp4 new file mode 100644 index 0000000..8b16dec Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/good_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/inconsistent_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/video/inconsistent_ref_t.mp4 new file mode 100644 index 0000000..dec6109 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/inconsistent_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/overlap_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/video/overlap_ref_t.mp4 new file mode 100644 index 0000000..ebc3285 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/overlap_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/whisper_gen_t.mp4 b/fern/products/waves/pages/v4.0.0/video/whisper_gen_t.mp4 new file mode 100644 index 0000000..2c6188a Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/whisper_gen_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/video/whisper_ref_t.mp4 b/fern/products/waves/pages/v4.0.0/video/whisper_ref_t.mp4 new file mode 100644 index 0000000..6a2edeb Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/video/whisper_ref_t.mp4 differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-delete-vc.mdx b/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-delete-vc.mdx new file mode 100644 index 0000000..8674576 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-delete-vc.mdx @@ -0,0 +1,73 @@ +--- +title: "How to delete your Voice Clone using Python SDK" +icon: "compact-disc" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to delete your voice clone. By the end of this tutorial, you will be able to clone your voice using our API. + +You can access the source code for the Python SDK on our [GitHub repository](https://github.com/smallest-inc/smallest-python-sdk). + +## Requirements + +Before you begin, ensure you have the following: + +- Python (3.9 or higher) installed on your machine. +- An API key from the Smallest AI [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=voice-cloning). + +## Setup + +### Install our SDK +```bash +pip install smallestai +``` + +Set your API key as an environment variable. +```bash +export SMALLEST_API_KEY=YOUR_API_KEY +``` + +## Delete your Voice +The Smallest AI SDK allows you to delete your cloned voice. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality. + +### Synchronously + + +```python python +from smallestai.waves import WavesClient + +def main(): + client = WavesClient(api_key="SMALLEST_API_KEY") + res = client.delete_voice(voice_id="voice_id") + print(res) + +if __name__ == "__main__": + main() +``` + + +### Asynchronously + +```python python +import asyncio +from smallestai.waves import AsyncWavesClient + +async def main(): + client = AsyncWavesClient(api_key="SMALLEST_API_KEY") + res = await client.delete_voice(voice_id="voice_id") + print(res) + +if __name__ == "__main__": + asyncio.run(main()) +``` + + +## Parameters + +- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable). +- `voice_id`: Unique Voice ID of the voice to be deleted. + + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-pvc.mdx b/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-pvc.mdx new file mode 100644 index 0000000..c54eca7 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-pvc.mdx @@ -0,0 +1,31 @@ +--- +title: "How to Create a Professional Voice Clone" +description: "Train our model on your voice and generate a high-quality professional voice clone." +icon: "compact-disc" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to create a professional voice clone by uploading an audio file. + +# Creating a Professional Voice Clone + +1. **Go to the Smallest AI Platform** + Navigate to the [platform](https://app.smallest.ai/waves/voice-cloning?utm_source=documentation&utm_medium=voice-cloning) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: + + ![Voice Clone Setup](../../images/pvc_page.png) + +2. **Upload Your Audio File** + Follow the instructions provided on the page to upload your audio file. Ensure that the recording is clear for the best results. + +3. **Enable Denoise (Optional)** + If your audio contains background noise, toggle **Denoise** on to improve quality. + +4. **Wait for Model to get trained** + The voice cloning process typically takes **3 to 6 hours**, but may take longer depending on demand. The Voice Clone will be available to Use on platform and you will also get mail for that. + +### **Note:** +**Creation of Professional Voice Clones (PVC) is not available via the SDK** due to the requirement of larger audio files. Please use the Smallest AI platform for this process. + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). \ No newline at end of file diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-vc-ui.mdx b/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-vc-ui.mdx new file mode 100644 index 0000000..a4c6e89 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-vc-ui.mdx @@ -0,0 +1,49 @@ +--- +title: "Instant Voice Clone (Web UI)" +sidebarTitle: "Instant Clone (UI)" +description: "Clone a voice from a short audio sample using the web console." +icon: "compact-disc" +--- + +Clone any voice from just 5-15 seconds of audio. Upload a sample, get a voice ID, and use it in your TTS calls immediately. + + +**Professional voice cloning** (45+ minutes of studio audio, higher fidelity) is available on demand. Contact [support@smallest.ai](mailto:support@smallest.ai) or reach out on [Discord](https://discord.gg/5evETqguJs). + + +# Creating an Instant Voice Clone + + 1️. **Go to the Smallest AI Platform** + - Navigate to the **[platform](https://app.smallest.ai/waves/voice-cloning?utm_source=documentation&utm_medium=voice-cloning)** and click on **Create New**. + - In the modal that appears, select **Instant Voice Clone**. + + ![Voice Clone Setup](../../images/ivc-image-1.png) + + 2️. **Upload Your Clean Reference Audio** + - Select a **short, high-quality** audio clip (5-15 seconds). + - Ensure the recording is **clear and noise-free** for the best results. + - Follow the recommended **[best practices](/waves/documentation/best-practices/vc-best-practices)** to maximize quality. + + ![Upload your clean reference audio](../../images/ivc-image-2.png) + + 3️. **Review Generated Testing Examples** + - The platform will process your reference audio and generate **sample outputs**. + - Listen to the test clips to verify the voice match. + + ![Testing Examples](../../images/ivc-image-3.png) + + 4️. **Customize & Save Your Voice Clone** + - Fill in details like **Name, Tags, and Description** for your voice. + - Click **Generate** to store your cloned voice. + + ![Create Voice](../../images/ivc-image-4.png) + +**Next:** Use your cloned voice in TTS calls by passing the voice ID as `voice_id` — or clone via the [Python SDK](/waves/documentation/voice-cloning/how-to-vc) for programmatic workflows. + +## Need Help? + + + Ask questions, share what you're building, and connect with other developers on Discord. + + +If you need direct assistance, reach out at [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-vc.mdx b/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-vc.mdx new file mode 100644 index 0000000..050827c --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/how-to-vc.mdx @@ -0,0 +1,91 @@ +--- +title: "How to create an Instant Voice Clone using Python SDK" +icon: "compact-disc" +--- + +In this tutorial, you will learn how to use the Smallest AI platform to create a voice by uploading an audio file. By the end of this tutorial, you will be able to clone your voice using our API. + +You can access the source code for the Python SDK on our [GitHub repository](https://github.com/smallest-inc/smallest-python-sdk). + +## Requirements + +Before you begin, ensure you have the following: + +- Python (3.9 or higher) installed on your machine. +- An API key from the Smallest AI [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=voice-cloning). + +## Setup + +### Install our SDK +```bash +pip install smallestai +``` + +Set your API key as an environment variable. +```bash +export SMALLEST_API_KEY=YOUR_API_KEY +``` + +## Add your Voice +The Smallest AI SDK allows you to clone your voice by uploading an audio file. This feature is available both synchronously and asynchronously, making it flexible for different use cases. Below are examples of how to use this functionality. + +### Synchronously + + +```python python +from smallestai.waves import WavesClient + +def main(): + client = WavesClient(api_key="YOUR_API_KEY") + res = client.add_voice(display_name="My Voice", file_path="my_voice.wav") + print(res) + +if __name__ == "__main__": + main() +``` + + +### Asynchronously + +```python python +import asyncio +from smallestai.waves import AsyncWavesClient + +async def main(): + client = AsyncWavesClient(api_key="YOUR_API_KEY") + res = await client.add_voice(display_name="My Voice", file_path="my_voice.wav") + print(res) + +if __name__ == "__main__": + asyncio.run(main()) +``` + + +## Parameters + +- `api_key`: Your API key (can be set via SMALLEST_API_KEY environment variable). +- `display_name`: Name of the voice to be created. +- `file_path`: Path to the audio file to be cloned. + +These parameters are part of the add_voice function. They can be set when calling the function as shown above. + +## Get All Cloned Voices + +Once you have cloned your voices, you can retrieve a list of all cloned voices associated with your account using the following code: + +```python python +from smallestai.waves import WavesClient + +client = WavesClient(api_key="YOUR_API_KEY") +print(f"Available Voices: {client.get_cloned_voices()}") +``` + + + +## Need Help? + + + Connect with other developers and get real-time support on Discord. + + +If you need direct assistance, reach out at [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/agent-dashboard-conversions.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/agent-dashboard-conversions.png new file mode 100644 index 0000000..edf540d Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/agent-dashboard-conversions.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/agent-dashboard.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/agent-dashboard.png new file mode 100644 index 0000000..888291a Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/agent-dashboard.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/checks-passed.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/checks-passed.png new file mode 100644 index 0000000..3303c77 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/checks-passed.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/conversions-list.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/conversions-list.png new file mode 100644 index 0000000..aa46a94 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/conversions-list.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-audience.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-audience.png new file mode 100644 index 0000000..e1e062d Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-audience.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-campaign.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-campaign.png new file mode 100644 index 0000000..a7efb9d Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-campaign.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-conversion.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-conversion.png new file mode 100644 index 0000000..2e95316 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/create-conversion.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/download.svg b/fern/products/waves/pages/v4.0.0/voice-cloning/images/download.svg new file mode 100644 index 0000000..f41f94e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/images/download.svg @@ -0,0 +1,3 @@ + + + diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/hero-dark.svg b/fern/products/waves/pages/v4.0.0/voice-cloning/images/hero-dark.svg new file mode 100644 index 0000000..c6a30e8 --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/images/hero-dark.svg @@ -0,0 +1,161 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/hero-light.svg b/fern/products/waves/pages/v4.0.0/voice-cloning/images/hero-light.svg new file mode 100644 index 0000000..297d68f --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/images/hero-light.svg @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-1.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-1.png new file mode 100644 index 0000000..5c18503 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-1.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-2.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-2.png new file mode 100644 index 0000000..b4d3f32 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-2.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-3.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-3.png new file mode 100644 index 0000000..0cf238e Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-3.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-4.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-4.png new file mode 100644 index 0000000..5ecf5d6 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/ivc-image-4.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/lightning_cover.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/lightning_cover.png new file mode 100644 index 0000000..acaebb8 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/lightning_cover.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/pvc_page.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/pvc_page.png new file mode 100644 index 0000000..8bf1c5e Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/pvc_page.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/save-campaign.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/save-campaign.png new file mode 100644 index 0000000..972b0df Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/save-campaign.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/smallest_cover.jpeg b/fern/products/waves/pages/v4.0.0/voice-cloning/images/smallest_cover.jpeg new file mode 100644 index 0000000..2f584f4 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/smallest_cover.jpeg differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/started-campaign.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/started-campaign.png new file mode 100644 index 0000000..0c1857a Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/started-campaign.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/test-agent.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/test-agent.png new file mode 100644 index 0000000..f90fbec Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/test-agent.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/thunder.png b/fern/products/waves/pages/v4.0.0/voice-cloning/images/thunder.png new file mode 100644 index 0000000..2a64ee9 Binary files /dev/null and b/fern/products/waves/pages/v4.0.0/voice-cloning/images/thunder.png differ diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/images/thunder.svg b/fern/products/waves/pages/v4.0.0/voice-cloning/images/thunder.svg new file mode 100644 index 0000000..35f6a0e --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/images/thunder.svg @@ -0,0 +1,26 @@ + + + + + + + + + + image/svg+xml + + + + + Openclipart + + + + + + + + + + + diff --git a/fern/products/waves/pages/v4.0.0/voice-cloning/types-of-clone.mdx b/fern/products/waves/pages/v4.0.0/voice-cloning/types-of-clone.mdx new file mode 100644 index 0000000..515f3fa --- /dev/null +++ b/fern/products/waves/pages/v4.0.0/voice-cloning/types-of-clone.mdx @@ -0,0 +1,28 @@ +--- +title: "Types of Cloning: Instant vs Professional" +description: "Train our model on your voice and generate a high-quality professional voice clone." +icon: "compact-disc" +--- + +## Instant vs Professional Cloning + +Voice cloning technology offers two primary methods: **Instant Cloning** and **Professional Voice Cloning (PVC)**. Each method varies in terms of time, accuracy, and overall quality. + +### **Instant Cloning** +- Requires **5 to 15 seconds** of audio. +- Provides a **quick and accessible** voice clone. +- Captures **basic voice characteristics** but lacks deep refinement. +- Best suited for casual applications where **speed matters more than accuracy**. + +### **Professional Voice Cloning (PVC)** +- Requires **at least 45 minutes** of high-quality recorded audio. +- Uses extensive training and **fine-tuning** for a near-perfect voice match. +- Captures **intonation, accent, emotions, and vocal nuances** accurately. +- Ideal for professional applications like **content creation, dubbing, and virtual assistants**. + +While **Instant Cloning** offers a fast and convenient solution, **Professional Voice Cloning** ensures a **high-fidelity, natural, and expressive** result tailored for professional use. + +If you have any questions or run into any issues, our community is here to help! + +- Join our [Discord server](https://discord.gg/ywShEyXHBW) to connect with other developers and get real-time support. +- Reach out to our team via email: [support@smallest.ai](mailto:support@smallest.ai). diff --git a/fern/products/waves/pages/video/angry_gen_t.mp4 b/fern/products/waves/pages/video/angry_gen_t.mp4 new file mode 100644 index 0000000..0b3628e Binary files /dev/null and b/fern/products/waves/pages/video/angry_gen_t.mp4 differ diff --git a/fern/products/waves/pages/video/angry_ref_t.mp4 b/fern/products/waves/pages/video/angry_ref_t.mp4 new file mode 100644 index 0000000..aca4571 Binary files /dev/null and b/fern/products/waves/pages/video/angry_ref_t.mp4 differ diff --git a/fern/products/waves/pages/video/bg_ref_t.mp4 b/fern/products/waves/pages/video/bg_ref_t.mp4 new file mode 100644 index 0000000..7d1d723 Binary files /dev/null and b/fern/products/waves/pages/video/bg_ref_t.mp4 differ diff --git a/fern/products/waves/pages/video/fast_gen_t.mp4 b/fern/products/waves/pages/video/fast_gen_t.mp4 new file mode 100644 index 0000000..9c66deb Binary files /dev/null and b/fern/products/waves/pages/video/fast_gen_t.mp4 differ diff --git a/fern/products/waves/pages/video/fast_ref_t.mp4 b/fern/products/waves/pages/video/fast_ref_t.mp4 new file mode 100644 index 0000000..1be0d9c Binary files /dev/null and b/fern/products/waves/pages/video/fast_ref_t.mp4 differ diff --git a/fern/products/waves/pages/video/good_ref_t.mp4 b/fern/products/waves/pages/video/good_ref_t.mp4 new file mode 100644 index 0000000..8b16dec Binary files /dev/null and b/fern/products/waves/pages/video/good_ref_t.mp4 differ diff --git a/fern/products/waves/pages/video/inconsistent_ref_t.mp4 b/fern/products/waves/pages/video/inconsistent_ref_t.mp4 new file mode 100644 index 0000000..dec6109 Binary files /dev/null and b/fern/products/waves/pages/video/inconsistent_ref_t.mp4 differ diff --git a/fern/products/waves/pages/video/overlap_ref_t.mp4 b/fern/products/waves/pages/video/overlap_ref_t.mp4 new file mode 100644 index 0000000..ebc3285 Binary files /dev/null and b/fern/products/waves/pages/video/overlap_ref_t.mp4 differ diff --git a/fern/products/waves/pages/video/whisper_gen_t.mp4 b/fern/products/waves/pages/video/whisper_gen_t.mp4 new file mode 100644 index 0000000..2c6188a Binary files /dev/null and b/fern/products/waves/pages/video/whisper_gen_t.mp4 differ diff --git a/fern/products/waves/pages/video/whisper_ref_t.mp4 b/fern/products/waves/pages/video/whisper_ref_t.mp4 new file mode 100644 index 0000000..6a2edeb Binary files /dev/null and b/fern/products/waves/pages/video/whisper_ref_t.mp4 differ diff --git a/fern/products/waves/versions/v2.2.0.yml b/fern/products/waves/versions/v2.2.0.yml index 0da417e..cd0d49b 100644 --- a/fern/products/waves/versions/v2.2.0.yml +++ b/fern/products/waves/versions/v2.2.0.yml @@ -1,59 +1,139 @@ tabs: - docs: - display-name: Docs - icon: home + documentation: + display-name: Documentation + icon: fa-solid fa-book api-reference: display-name: API Reference - icon: puzzle + icon: fa-solid fa-puzzle-piece + client-libraries: + display-name: Client Libraries + icon: fa-solid fa-code + changelog: + display-name: Changelog + icon: fa-solid fa-clock-rotate-left navigation: - - tab: docs + - tab: documentation layout: - - section: Getting Started + - section: Introduction contents: - page: Introduction - path: ./v2.2.0/introduction/introduction.mdx + path: ../pages/v2.2.0/introduction/introduction.mdx + icon: fa-regular fa-book-open + - section: Getting Started + contents: - page: Quickstart - path: ./v2.2.0/getting-started/quickstart.mdx + path: ../pages/v2.2.0/getting-started/quickstart.mdx + icon: fa-solid fa-rocket + - page: Models + path: ../pages/v2.2.0/getting-started/models.mdx + icon: fa-solid fa-cube - page: Authentication - path: ./v2.2.0/getting-started/authentication.mdx + path: ../pages/v2.2.0/getting-started/authentication.mdx + icon: fa-solid fa-lock + - page: HTTP Streaming + path: ../pages/v2.2.0/getting-started/http-stream.mdx + icon: fa-solid fa-bars-staggered - section: Text to Speech contents: - page: How to TTS - path: ./v2.2.0/text-to-speech/how-to-tts.mdx + path: ../pages/v2.2.0/text-to-speech/how-to-tts.mdx + icon: fa-solid fa-diagram-project - page: LLM to TTS - path: ./v2.2.0/text-to-speech/llm-to-tts.mdx + path: ../pages/v2.2.0/text-to-speech/llm-to-tts.mdx + icon: fa-solid fa-robot - page: Voice Models & Languages - path: ./v2.2.0/text-to-speech/get-voice-models-langs.mdx + path: ../pages/v2.2.0/text-to-speech/get-voice-models-langs.mdx + icon: fa-solid fa-print - section: Voice Cloning contents: - page: Types of Cloning - path: ./v2.2.0/voice-cloning/types-of-clone.mdx - - page: How to Voice Clone - path: ./v2.2.0/voice-cloning/how-to-vc.mdx + path: ../pages/v2.2.0/voice-cloning/types-of-clone.mdx + icon: fa-solid fa-circle-dot - page: Voice Clone via UI - path: ./v2.2.0/voice-cloning/how-to-vc-ui.mdx - - page: Professional Voice Cloning - path: ./v2.2.0/voice-cloning/how-to-pvc.mdx + path: ../pages/v2.2.0/voice-cloning/how-to-vc-ui.mdx + icon: fa-solid fa-desktop + - page: How to Voice Clone + path: ../pages/v2.2.0/voice-cloning/how-to-vc.mdx + icon: fa-solid fa-circle-dot - page: Delete Cloned Voice - path: ./v2.2.0/voice-cloning/how-to-delete-vc.mdx - - section: Best Practices - contents: - - page: TTS Best Practices - path: ./v2.2.0/best-practices/tts-best-practices.mdx - - page: Voice Cloning Best Practices - path: ./v2.2.0/best-practices/vc-best-practices.mdx - - page: PVC Best Practices - path: ./v2.2.0/best-practices/pvc-best-practices.mdx + path: ../pages/v2.2.0/voice-cloning/how-to-delete-vc.mdx + icon: fa-solid fa-trash + - page: Professional Voice Cloning + path: ../pages/v2.2.0/voice-cloning/how-to-pvc.mdx + icon: fa-solid fa-star - section: Integrations contents: - page: LiveKit - path: ./v2.2.0/integrations/livekit.mdx + path: ../pages/v2.2.0/integrations/livekit.mdx + icon: fa-solid fa-plug - page: Plivo - path: ./v2.2.0/integrations/plivo.mdx + path: ../pages/v2.2.0/integrations/plivo.mdx + icon: fa-solid fa-plug - page: Vonage - path: ./v2.2.0/integrations/vonage.mdx + path: ../pages/v2.2.0/integrations/vonage.mdx + icon: fa-solid fa-plug + - section: Product + contents: + - page: Projects + path: ../pages/v2.2.0/product/projects.mdx + icon: fa-solid fa-folder + - section: Best Practices + contents: + - page: Voice Cloning Best Practices + path: ../pages/v2.2.0/best-practices/vc-best-practices.mdx + icon: fa-solid fa-lightbulb + - page: PVC Best Practices + path: ../pages/v2.2.0/best-practices/pvc-best-practices.mdx + icon: fa-solid fa-lightbulb + - page: TTS Best Practices + path: ../pages/v2.2.0/best-practices/tts-best-practices.mdx + icon: fa-solid fa-lightbulb + - tab: api-reference layout: + - section: API References + contents: + - page: Authentication + path: ../pages/v2.2.0/api-references/authentication.mdx + icon: fa-solid fa-key + - page: WebSocket + path: ../pages/v2.2.0/api-references/websocket.mdx + icon: fa-solid fa-plug - api: API Reference api-name: waves + snippets: + python: smallest-ai + typescript: smallest-ai + audiences: + - v2 + flattened: true + layout: + - section: Lightning v2 + contents: + - endpoint: POST /waves/v1/lightning-v2/get_speech + title: "Text to Speech" + - endpoint: POST /waves/v1/lightning-v2/stream + title: "Text to Speech (SSE)" + - endpoint: WSS /waves/v1/lightning-v2/get_speech/stream + title: "Text to Speech (WebSocket)" + - Lightning Large: + title: Lightning Large + - Lightning: + title: Lightning + - Voices: + title: Voices + - Voice Cloning: + title: Voice Cloning + + - tab: client-libraries + layout: + - section: Client Libraries + contents: + - page: Overview + path: ../pages/v2.2.0/client-libraries/overview.mdx + icon: fa-solid fa-code + + - tab: changelog + layout: + - changelog: ../pages/v2.2.0/changelog-entries diff --git a/fern/products/waves/versions/v2.2.0/api-references/add-voice-api.mdx b/fern/products/waves/versions/v2.2.0/api-references/add-voice-api.mdx index fa4fc8c..b1911c3 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/add-voice-api.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/add-voice-api.mdx @@ -1,7 +1,7 @@ --- title: "Add your Voice" description: "Add your voice using the Waves API." -openapi: "POST /api/v1/lightning-large/add_voice" +openapi: "POST /waves/v1/lightning-large/add_voice" hideApiMarker: False --- @@ -9,7 +9,7 @@ hideApiMarker: False ## Sample cURL Example ```bash -curl -X POST https://waves-api.smallest.ai/api/v1/lightning-large/add_voice \ +curl -X POST https://api.smallest.ai/waves/v1/lightning-large/add_voice \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "displayName=my voice" \ -F "file=@my_voice.wav;type=audio/wav" @@ -20,7 +20,7 @@ Here is a Python example using the `requests` library: ```python python import requests -url = "https://waves-api.smallest.ai/api/v1/lightning-large/add_voice" +url = "https://api.smallest.ai/waves/v1/lightning-large/add_voice" payload = {'displayName': 'my voice'} files=[ ('file', ('my_voice.wav', open('my_voice.wav','rb'), 'audio/wav')) diff --git a/fern/products/waves/versions/v2.2.0/api-references/authentication.mdx b/fern/products/waves/versions/v2.2.0/api-references/authentication.mdx index bddd057..6617424 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/authentication.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/authentication.mdx @@ -10,7 +10,7 @@ Our API requires authentication using API keys to ensure secure access. ## Obtaining Your API Key -To access our API, you need an API key. You can find your API key by visiting [Smallest AI Console](https://console.smallest.ai/apikeys). +To access our API, you need an API key. You can find your API key by visiting [Smallest AI Console](https://api.smallest.ai/apikeys). ## Using API Keys @@ -31,4 +31,4 @@ Authorization: Bearer YOUR_API_KEY_HERE - **Use environment variables**: Store API keys securely instead of hardcoding them in your source code. - **Monitor API usage**: Keep track of your API calls to detect any unauthorized access. -For more details, visit our [API Documentation](/v2.2.0/content/getting-started/authentication). +For more details, visit our [API Documentation](/waves/v-2-2-0/guides/getting-started/authentication). diff --git a/fern/products/waves/versions/v2.2.0/api-references/delete-cloned-voice.mdx b/fern/products/waves/versions/v2.2.0/api-references/delete-cloned-voice.mdx index 29b3853..a9d01e8 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/delete-cloned-voice.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/delete-cloned-voice.mdx @@ -1,6 +1,6 @@ --- title: 'Delete Cloned Voice' description: 'Delete a cloned voice using the new Waves API.' -openapi: 'DELETE /api/v1/lightning-large' +openapi: 'DELETE /waves/v1/lightning-large' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v2.2.0/api-references/get-cloned-voices-api.mdx b/fern/products/waves/versions/v2.2.0/api-references/get-cloned-voices-api.mdx index 6658398..397899e 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/get-cloned-voices-api.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/get-cloned-voices-api.mdx @@ -1,6 +1,6 @@ --- title: "Get your cloned Voices" description: "Retrieve your cloned voices." -openapi: "GET /api/v1/lightning-large/get_cloned_voices" +openapi: "GET /waves/v1/lightning-large/get_cloned_voices" hideApiMarker: False --- \ No newline at end of file diff --git a/fern/products/waves/versions/v2.2.0/api-references/get-voices-api.mdx b/fern/products/waves/versions/v2.2.0/api-references/get-voices-api.mdx index 3b25253..6491278 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/get-voices-api.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/get-voices-api.mdx @@ -1,6 +1,6 @@ --- title: "Get Voices" description: "Get voices supported for a given model using the new Waves API." -openapi: "GET /api/v1/{model}/get_voices" +openapi: "GET /waves/v1/{model}/get_voices" hideApiMarker: False --- \ No newline at end of file diff --git a/fern/products/waves/versions/v2.2.0/api-references/lighntning-tts.mdx b/fern/products/waves/versions/v2.2.0/api-references/lighntning-tts.mdx index ca23c1e..34c162f 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lighntning-tts.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lighntning-tts.mdx @@ -2,6 +2,6 @@ title: 'Text to speech' sidebarTitle: 'Text to speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning/get_speech' +openapi: 'POST /waves/v1/lightning/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v2.2.0/api-references/lightning-large-stream.mdx b/fern/products/waves/versions/v2.2.0/api-references/lightning-large-stream.mdx index 9e98093..b8909cf 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lightning-large-stream.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lightning-large-stream.mdx @@ -2,7 +2,7 @@ title: 'Text to Speech' sidebarTitle: 'Text to Speech (SSE)' description: 'Stream speech for given text using the Lightning-Large SSE API' -openapi: 'POST /api/v1/lightning-large/stream' +openapi: 'POST /waves/v1/lightning-large/stream' --- ## Overview diff --git a/fern/products/waves/versions/v2.2.0/api-references/lightning-large-ws.mdx b/fern/products/waves/versions/v2.2.0/api-references/lightning-large-ws.mdx index 32d3cbf..f86af89 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lightning-large-ws.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lightning-large-ws.mdx @@ -15,7 +15,7 @@ Connect to the WebSocket endpoint: ```javascript const socket = new WebSocket( - 'wss://api.smallest.ai/api/v1/lightning-large/stream' + 'wss://api.smallest.ai/waves/v1/lightning-large/stream' ); ``` @@ -25,7 +25,7 @@ Authentication is required. Include your API key in the connection headers: ```javascript const socket = new WebSocket( - 'wss://api.smallest.ai/api/v1/lightning-large/stream', + 'wss://api.smallest.ai/waves/v1/lightning-large/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -110,7 +110,7 @@ If an error occurs: const WebSocket = require('ws'); const ws = new WebSocket( - 'wss://waves-api.smallest.ai/api/v1/lightning-large/get_speech/stream', + 'wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -164,7 +164,7 @@ import json import base64 API_KEY = "YOUR_API_KEY" -WS_URL = "wss://waves-api.smallest.ai/api/v1/lightning-large/get_speech/stream" +WS_URL = "wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream" async def text_to_speech(): async with websockets.connect(WS_URL, extra_headers={"Authorization": f"Bearer {API_KEY}"}) as ws: diff --git a/fern/products/waves/versions/v2.2.0/api-references/lightning-large.mdx b/fern/products/waves/versions/v2.2.0/api-references/lightning-large.mdx index 69784e5..d4fc470 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lightning-large.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lightning-large.mdx @@ -1,6 +1,6 @@ --- title: 'Text to Speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning-large/get_speech' +openapi: 'POST /waves/v1/lightning-large/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v2.2.0/api-references/lightning-tts-ws.mdx b/fern/products/waves/versions/v2.2.0/api-references/lightning-tts-ws.mdx index b5cf4f9..7cfc53e 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lightning-tts-ws.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lightning-tts-ws.mdx @@ -15,7 +15,7 @@ Connect to the SSE endpoint: ```javascript const eventSource = new EventSource( - 'https://api.smallest.ai/api/v1/lightning/stream', + 'https://api.smallest.ai/waves/v1/lightning/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -50,7 +50,7 @@ Send a POST request with the following JSON structure: ```javascript const eventSource = new EventSource( - 'https://api.smallest.ai/api/v1/lightning/stream', + 'https://api.smallest.ai/waves/v1/lightning/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' diff --git a/fern/products/waves/versions/v2.2.0/api-references/lightning-v2-stream.mdx b/fern/products/waves/versions/v2.2.0/api-references/lightning-v2-stream.mdx index e89bc36..007f89d 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lightning-v2-stream.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lightning-v2-stream.mdx @@ -2,7 +2,7 @@ title: 'Text to Speech' sidebarTitle: 'Text to Speech (SSE)' description: 'Stream speech for given text using the Lightning v2 SSE API' -openapi: 'POST /api/v1/lightning-v2/stream' +openapi: 'POST /waves/v1/lightning-v2/stream' --- ## Overview diff --git a/fern/products/waves/versions/v2.2.0/api-references/lightning-v2-ws.mdx b/fern/products/waves/versions/v2.2.0/api-references/lightning-v2-ws.mdx index 6776587..a035d69 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lightning-v2-ws.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lightning-v2-ws.mdx @@ -1,6 +1,6 @@ --- title: 'Text to Speech (WebSocket)' description: The Lightning v2 WebSocket API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. Perfect for interactive applications, voice assistants, and real-time communication systems that require immediate audio feedback. For an end-to-end example of how to use the Lightning v2 WebSocket API, check out [Text to Speech (WS) Example](https://github.com/smallest-inc/waves-examples/tree/main/lightning_v2/ws_streaming) -asyncapi: "/asyncapi-spec/lightning-v2-ws.json /api/v1/lightning-v2/get_speech/stream" +asyncapi: "/asyncapi-spec/lightning-v2-ws.json /waves/v1/lightning-v2/get_speech/stream" --- diff --git a/fern/products/waves/versions/v2.2.0/api-references/lightning-v2.mdx b/fern/products/waves/versions/v2.2.0/api-references/lightning-v2.mdx index 0d51417..51853d1 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lightning-v2.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lightning-v2.mdx @@ -1,6 +1,6 @@ --- title: 'Text to Speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning-v2/get_speech' +openapi: 'POST /waves/v1/lightning-v2/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v2.2.0/api-references/lightning.mdx b/fern/products/waves/versions/v2.2.0/api-references/lightning.mdx index ca23c1e..34c162f 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/lightning.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/lightning.mdx @@ -2,6 +2,6 @@ title: 'Text to speech' sidebarTitle: 'Text to speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning/get_speech' +openapi: 'POST /waves/v1/lightning/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v2.2.0/api-references/websocket.mdx b/fern/products/waves/versions/v2.2.0/api-references/websocket.mdx index 43faf49..b7be891 100644 --- a/fern/products/waves/versions/v2.2.0/api-references/websocket.mdx +++ b/fern/products/waves/versions/v2.2.0/api-references/websocket.mdx @@ -46,7 +46,7 @@ To support longer sessions for use cases where clients need more time (e.g., lon You can include the `timeout` parameter in the WebSocket URL like so: ```json -wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream?timeout=60 +wss://api.smallest.ai/waves/v1/lightning-v2/get_speech/stream?timeout=60 ``` @@ -103,4 +103,4 @@ The WebSocket TTS API is optimized to handle real-time text-to-speech conversion } ``` -For implementation details, check our [WebSocket API documentation](/v3.0.1/content/api-references/lightning-v2-ws). +For implementation details, check our [WebSocket API documentation](/waves/v-2-2-0/api-reference/text-to-speech/lightning-v2-web-socket). diff --git a/fern/products/waves/versions/v2.2.0/best-practices/pvc-best-practices.mdx b/fern/products/waves/versions/v2.2.0/best-practices/pvc-best-practices.mdx index 39dcc20..1a0ed52 100644 --- a/fern/products/waves/versions/v2.2.0/best-practices/pvc-best-practices.mdx +++ b/fern/products/waves/versions/v2.2.0/best-practices/pvc-best-practices.mdx @@ -6,7 +6,7 @@ icon: "user-tie" To get the most accurate and natural voice clone, it's essential to provide **high-quality reference audio**. The best practices for recording remain the same as those for **Instant Voice Cloning**, which you can find here: -🔗 **[Instant Voice Cloning - Best Practices](/v2.2.0/content/best-practices/vc-best-practices)** +🔗 **[Instant Voice Cloning - Best Practices](/waves/v-2-2-0/guides/best-practices/vc-best-practices)** However, **Professional Voice Cloning (PVC) significantly improves upon Instant Voice Cloning** in the following ways: diff --git a/fern/products/waves/versions/v2.2.0/best-practices/vc-best-practices.mdx b/fern/products/waves/versions/v2.2.0/best-practices/vc-best-practices.mdx index 115b010..74ec697 100644 --- a/fern/products/waves/versions/v2.2.0/best-practices/vc-best-practices.mdx +++ b/fern/products/waves/versions/v2.2.0/best-practices/vc-best-practices.mdx @@ -6,7 +6,7 @@ icon: "clone" To achieve the best results when cloning your voice, it's essential to provide high-quality reference audio. Below are some best practices, dos and don'ts, and examples to guide you. -Ready to Clone Your Voice? Try it out on our platform [waves.smallest.ai](https://waves.smallest.ai/studio/create) +Ready to Clone Your Voice? Try it out on our platform [api.smallest.ai](https://api.smallest.ai/studio/create) --- diff --git a/fern/products/waves/versions/v2.2.0/changelog/announcements.mdx b/fern/products/waves/versions/v2.2.0/changelog/announcements.mdx index ba62d8b..f41edaf 100644 --- a/fern/products/waves/versions/v2.2.0/changelog/announcements.mdx +++ b/fern/products/waves/versions/v2.2.0/changelog/announcements.mdx @@ -13,7 +13,7 @@ mode: "center" - **100ms TTFB**: Superfast and scalable to support your realtime applications. - **0.05 per 10K characters**: 3x cheaper than other providers. - Experience the new capabilities of Lightning v2 on our [Platform](https://waves.smallest.ai). + Experience the new capabilities of Lightning v2 on our [Platform](https://api.smallest.ai). @@ -25,7 +25,7 @@ mode: "center" - **Versatile Applications**: Ideal for global applications requiring diverse language support. - **Beta Stage**: Currently in beta, with ongoing improvements and updates. - Experience the new capabilities of Lightning Multilingual on our [Platform](https://waves.smallest.ai). + Experience the new capabilities of Lightning Multilingual on our [Platform](https://api.smallest.ai). @@ -35,26 +35,26 @@ mode: "center" - **Enhanced Voice Cloning**: More accurate and lifelike voice clones. - **Best-in-Market Latency**: Instant voice cloning with a latency of under 300ms. - Experience the new capabilities of Lightning Large via our [Platform](https://waves.smallest.ai) or the [API](/v2.2.0/content/api-references/lightning). + Experience the new capabilities of Lightning Large via our [Platform](https://api.smallest.ai) or the [API](/waves/v-2-2-0/api-reference/text-to-speech/lightning-v-31). ## Introducing Waves - [Waves](https://waves.smallest.ai) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. + [Waves](https://api.smallest.ai) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. However, waves will also be supporting various features such as voice cloning, accent cloning, style transfer, speech to speech etc in the near future, making it a ubiquitous platform for anyone who needs AI generated speech. - The voices available on waves platform are available via the [Waves API](/v2.2.0/content/api-references/lightning) + The voices available on waves platform are available via the [Waves API](/waves/v-2-2-0/api-reference/text-to-speech/lightning-v-31) ## Introducing Lightning - World's Fastest Text to Speech Lightning is the world's fastest text to speech model, generating around 10 seconds of hyper-realistic audio in just 100ms, all at once, no streaming. - Read more about lightning in our release post [here](https://smallest.ai/blog/lightning-fast-text-to-speech). + Read more about lightning in our release post [here](https://smallest.ai). - You can access lightning via the [Waves API](/v2.2.0/content/api-references/lightning) + You can access lightning via the [Waves API](/waves/v-2-2-0/api-reference/text-to-speech/lightning-v-31) *A lot more coming up, very soon* \ No newline at end of file diff --git a/fern/products/waves/versions/v2.2.0/getting-started/authentication.mdx b/fern/products/waves/versions/v2.2.0/getting-started/authentication.mdx index a12a7a1..15413b5 100644 --- a/fern/products/waves/versions/v2.2.0/getting-started/authentication.mdx +++ b/fern/products/waves/versions/v2.2.0/getting-started/authentication.mdx @@ -25,7 +25,7 @@ Authorization: Bearer YOUR_API_KEY Test the API with this curl command by replacing `YOUR_API_KEY` with your actual key: ```bash -curl 'https://waves-api.smallest.ai/api/v1/lightning/get_voices' \ +curl 'https://api.smallest.ai/waves/v1/lightning/get_voices' \ -H 'Content-Type: application/json' \ -H 'Authorization: Bearer YOUR_API_KEY' ``` diff --git a/fern/products/waves/versions/v2.2.0/getting-started/quickstart.mdx b/fern/products/waves/versions/v2.2.0/getting-started/quickstart.mdx index cd8022f..0e9b573 100644 --- a/fern/products/waves/versions/v2.2.0/getting-started/quickstart.mdx +++ b/fern/products/waves/versions/v2.2.0/getting-started/quickstart.mdx @@ -6,7 +6,7 @@ icon: "rocket" ## Step 1: Sign Up & get the API Key -1. Visit [waves.smallest.ai](https://waves.smallest.ai/) and sign up for an account or log in if you already have an account. +1. Visit [api.smallest.ai](https://api.smallest.ai/) and sign up for an account or log in if you already have an account. 2. Navigate to `API Key` tab in your account dashboard. 3. Create a new API Key and copy it. 4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication. diff --git a/fern/products/waves/versions/v2.2.0/introduction/introduction.mdx b/fern/products/waves/versions/v2.2.0/introduction/introduction.mdx index d53aed8..66a4a8c 100644 --- a/fern/products/waves/versions/v2.2.0/introduction/introduction.mdx +++ b/fern/products/waves/versions/v2.2.0/introduction/introduction.mdx @@ -6,7 +6,7 @@ icon: "globe" ## About Waves -Welcome to [Waves](https://waves.smallest.ai/), the cutting-edge platform designed to deliver real-time, hyper-realistic text-to-speech solutions. Our platform leverages advanced AI models to provide seamless and natural-sounding speech synthesis, catering to a wide range of applications including voicebots, interactive systems, and more. +Welcome to [Waves](https://api.smallest.ai/), the cutting-edge platform designed to deliver real-time, hyper-realistic text-to-speech solutions. Our platform leverages advanced AI models to provide seamless and natural-sounding speech synthesis, catering to a wide range of applications including voicebots, interactive systems, and more. ### Key Features @@ -16,7 +16,7 @@ Welcome to [Waves](https://waves.smallest.ai/), the cutting-edge platform design ### Stay Updated -We are committed to continuous improvement and innovation. All new releases and updates are announced [here](/v2.2.0/content/changelog/announcements). +We are committed to continuous improvement and innovation. All new releases and updates are announced [here](/waves/v-2-2-0/guides/changelog/announcements). ### Get in Touch diff --git a/fern/products/waves/versions/v2.2.0/product/projects.mdx b/fern/products/waves/versions/v2.2.0/product/projects.mdx index 1d603fe..32cffb9 100644 --- a/fern/products/waves/versions/v2.2.0/product/projects.mdx +++ b/fern/products/waves/versions/v2.2.0/product/projects.mdx @@ -112,7 +112,7 @@ Welcome to the official documentation for our text-to-speech (TTS) project. Our ### Installation & Setup -1. Register for an account and +1. Register for an account and log into the platform. 2. Create a new project or open an existing one. 3. Add or paste your text content to the project. diff --git a/fern/products/waves/versions/v2.2.0/text-to-speech/get-voice-models-langs.mdx b/fern/products/waves/versions/v2.2.0/text-to-speech/get-voice-models-langs.mdx index 8e931d1..7a152ca 100644 --- a/fern/products/waves/versions/v2.2.0/text-to-speech/get-voice-models-langs.mdx +++ b/fern/products/waves/versions/v2.2.0/text-to-speech/get-voice-models-langs.mdx @@ -11,7 +11,7 @@ In this tutorial, you will learn how to use the Smallest AI platform to fetch th Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://api.smallest.ai)). - The Smallest AI Python SDK installed. If you haven't installed it yet, follow the instructions below: ### Install the SDK diff --git a/fern/products/waves/versions/v2.2.0/text-to-speech/how-to-tts.mdx b/fern/products/waves/versions/v2.2.0/text-to-speech/how-to-tts.mdx index 23a4c7b..d176b21 100644 --- a/fern/products/waves/versions/v2.2.0/text-to-speech/how-to-tts.mdx +++ b/fern/products/waves/versions/v2.2.0/text-to-speech/how-to-tts.mdx @@ -13,7 +13,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://api.smallest.ai)). ## Setup diff --git a/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-delete-vc.mdx b/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-delete-vc.mdx index 4bedc84..bd238a1 100644 --- a/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-delete-vc.mdx +++ b/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-delete-vc.mdx @@ -12,7 +12,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://api.smallest.ai)). ## Setup diff --git a/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-pvc.mdx b/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-pvc.mdx index ddb07ff..ec40dbf 100644 --- a/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-pvc.mdx +++ b/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-pvc.mdx @@ -9,7 +9,7 @@ In this tutorial, you will learn how to use the Smallest AI platform to create a # Creating a Professional Voice Clone 1. **Go to the Smallest AI Platform** - Navigate to [smallest.ai](https://waves.smallest.ai/voice-clone) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: + Navigate to [smallest.ai](https://api.smallest.ai/voice-clone) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: ![Voice Clone Setup](../../../images/pvc_page.png) diff --git a/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-vc-ui.mdx b/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-vc-ui.mdx index 7b110df..af84419 100644 --- a/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-vc-ui.mdx +++ b/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-vc-ui.mdx @@ -8,7 +8,7 @@ In this tutorial, you will learn how to use the **Smallest AI** platform to crea # Creating an Instant Voice Clone 1️. **Go to the Smallest AI Platform** - - Navigate to **[smallest.ai](https://waves.smallest.ai/voice-clone)** and click on **Create New**. + - Navigate to **[smallest.ai](https://api.smallest.ai/voice-clone)** and click on **Create New**. - In the modal that appears, select **Instant Voice Clone**. ![Voice Clone Setup](../../../images/ivc-image-1.png) @@ -16,7 +16,7 @@ In this tutorial, you will learn how to use the **Smallest AI** platform to crea 2️. **Upload Your Clean Reference Audio** - Select a **short, high-quality** audio clip (5-15 seconds). - Ensure the recording is **clear and noise-free** for the best results. - - Follow the recommended **[best practices](/v2.2.0/content/best-practices/vc-best-practices)** to maximize quality. + - Follow the recommended **[best practices](/waves/v-2-2-0/guides/best-practices/vc-best-practices)** to maximize quality. ![Upload your clean reference audio](../../../images/ivc-image-2.png) diff --git a/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-vc.mdx b/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-vc.mdx index 534c085..a642a0d 100644 --- a/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-vc.mdx +++ b/fern/products/waves/versions/v2.2.0/voice-cloning/how-to-vc.mdx @@ -12,7 +12,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://api.smallest.ai)). ## Setup diff --git a/fern/products/waves/versions/v3.0.1.yml b/fern/products/waves/versions/v3.0.1.yml index 6770a5d..fb369f4 100644 --- a/fern/products/waves/versions/v3.0.1.yml +++ b/fern/products/waves/versions/v3.0.1.yml @@ -1,59 +1,139 @@ tabs: - docs: - display-name: Docs - icon: home + documentation: + display-name: Documentation + icon: fa-solid fa-book api-reference: display-name: API Reference - icon: puzzle + icon: fa-solid fa-puzzle-piece + client-libraries: + display-name: Client Libraries + icon: fa-solid fa-code + changelog: + display-name: Changelog + icon: fa-solid fa-clock-rotate-left navigation: - - tab: docs + - tab: documentation layout: - - section: Getting Started + - section: Introduction contents: - page: Introduction - path: ./v3.0.1/introduction/introduction.mdx + path: ../pages/v3.0.1/introduction/introduction.mdx + icon: fa-regular fa-book-open + - section: Getting Started + contents: - page: Quickstart - path: ./v3.0.1/getting-started/quickstart.mdx + path: ../pages/v3.0.1/getting-started/quickstart.mdx + icon: fa-solid fa-rocket + - page: Models + path: ../pages/v3.0.1/getting-started/models.mdx + icon: fa-solid fa-cube - page: Authentication - path: ./v3.0.1/getting-started/authentication.mdx + path: ../pages/v3.0.1/getting-started/authentication.mdx + icon: fa-solid fa-lock + - page: HTTP Streaming + path: ../pages/v3.0.1/getting-started/http-stream.mdx + icon: fa-solid fa-bars-staggered - section: Text to Speech contents: - page: How to TTS - path: ./v3.0.1/text-to-speech/how-to-tts.mdx + path: ../pages/v3.0.1/text-to-speech/how-to-tts.mdx + icon: fa-solid fa-diagram-project - page: LLM to TTS - path: ./v3.0.1/text-to-speech/llm-to-tts.mdx + path: ../pages/v3.0.1/text-to-speech/llm-to-tts.mdx + icon: fa-solid fa-robot - page: Voice Models & Languages - path: ./v3.0.1/text-to-speech/get-voice-models-langs.mdx + path: ../pages/v3.0.1/text-to-speech/get-voice-models-langs.mdx + icon: fa-solid fa-print - section: Voice Cloning contents: - page: Types of Cloning - path: ./v3.0.1/voice-cloning/types-of-clone.mdx - - page: How to Voice Clone - path: ./v3.0.1/voice-cloning/how-to-vc.mdx + path: ../pages/v3.0.1/voice-cloning/types-of-clone.mdx + icon: fa-solid fa-circle-dot - page: Voice Clone via UI - path: ./v3.0.1/voice-cloning/how-to-vc-ui.mdx - - page: Professional Voice Cloning - path: ./v3.0.1/voice-cloning/how-to-pvc.mdx + path: ../pages/v3.0.1/voice-cloning/how-to-vc-ui.mdx + icon: fa-solid fa-desktop + - page: How to Voice Clone + path: ../pages/v3.0.1/voice-cloning/how-to-vc.mdx + icon: fa-solid fa-circle-dot - page: Delete Cloned Voice - path: ./v3.0.1/voice-cloning/how-to-delete-vc.mdx - - section: Best Practices - contents: - - page: TTS Best Practices - path: ./v3.0.1/best-practices/tts-best-practices.mdx - - page: Voice Cloning Best Practices - path: ./v3.0.1/best-practices/vc-best-practices.mdx - - page: PVC Best Practices - path: ./v3.0.1/best-practices/pvc-best-practices.mdx + path: ../pages/v3.0.1/voice-cloning/how-to-delete-vc.mdx + icon: fa-solid fa-trash + - page: Professional Voice Cloning + path: ../pages/v3.0.1/voice-cloning/how-to-pvc.mdx + icon: fa-solid fa-star - section: Integrations contents: - page: LiveKit - path: ./v3.0.1/integrations/livekit.mdx + path: ../pages/v3.0.1/integrations/livekit.mdx + icon: fa-solid fa-plug - page: Plivo - path: ./v3.0.1/integrations/plivo.mdx + path: ../pages/v3.0.1/integrations/plivo.mdx + icon: fa-solid fa-plug - page: Vonage - path: ./v3.0.1/integrations/vonage.mdx + path: ../pages/v3.0.1/integrations/vonage.mdx + icon: fa-solid fa-plug + - section: Product + contents: + - page: Projects + path: ../pages/v3.0.1/product/projects.mdx + icon: fa-solid fa-folder + - section: Best Practices + contents: + - page: Voice Cloning Best Practices + path: ../pages/v3.0.1/best-practices/vc-best-practices.mdx + icon: fa-solid fa-lightbulb + - page: PVC Best Practices + path: ../pages/v3.0.1/best-practices/pvc-best-practices.mdx + icon: fa-solid fa-lightbulb + - page: TTS Best Practices + path: ../pages/v3.0.1/best-practices/tts-best-practices.mdx + icon: fa-solid fa-lightbulb + - tab: api-reference layout: + - section: API References + contents: + - page: Authentication + path: ../pages/v3.0.1/api-references/authentication.mdx + icon: fa-solid fa-key + - page: WebSocket + path: ../pages/v3.0.1/api-references/websocket.mdx + icon: fa-solid fa-plug - api: API Reference api-name: waves + snippets: + python: smallest-ai + typescript: smallest-ai + audiences: + - v3 + flattened: true + layout: + - section: Lightning v2 + contents: + - endpoint: POST /waves/v1/lightning-v2/get_speech + title: "Text to Speech" + - endpoint: POST /waves/v1/lightning-v2/stream + title: "Text to Speech (SSE)" + - endpoint: WSS /waves/v1/lightning-v2/get_speech/stream + title: "Text to Speech (WebSocket)" + - Lightning Large: + title: Lightning Large + - Lightning: + title: Lightning + - Voices: + title: Voices + - Voice Cloning: + title: Voice Cloning + + - tab: client-libraries + layout: + - section: Client Libraries + contents: + - page: Overview + path: ../pages/v3.0.1/client-libraries/overview.mdx + icon: fa-solid fa-code + + - tab: changelog + layout: + - changelog: ../pages/v3.0.1/changelog-entries diff --git a/fern/products/waves/versions/v3.0.1/api-references/add-voice-api.mdx b/fern/products/waves/versions/v3.0.1/api-references/add-voice-api.mdx index fa4fc8c..b1911c3 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/add-voice-api.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/add-voice-api.mdx @@ -1,7 +1,7 @@ --- title: "Add your Voice" description: "Add your voice using the Waves API." -openapi: "POST /api/v1/lightning-large/add_voice" +openapi: "POST /waves/v1/lightning-large/add_voice" hideApiMarker: False --- @@ -9,7 +9,7 @@ hideApiMarker: False ## Sample cURL Example ```bash -curl -X POST https://waves-api.smallest.ai/api/v1/lightning-large/add_voice \ +curl -X POST https://api.smallest.ai/waves/v1/lightning-large/add_voice \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "displayName=my voice" \ -F "file=@my_voice.wav;type=audio/wav" @@ -20,7 +20,7 @@ Here is a Python example using the `requests` library: ```python python import requests -url = "https://waves-api.smallest.ai/api/v1/lightning-large/add_voice" +url = "https://api.smallest.ai/waves/v1/lightning-large/add_voice" payload = {'displayName': 'my voice'} files=[ ('file', ('my_voice.wav', open('my_voice.wav','rb'), 'audio/wav')) diff --git a/fern/products/waves/versions/v3.0.1/api-references/authentication.mdx b/fern/products/waves/versions/v3.0.1/api-references/authentication.mdx index cb07dd3..a80208e 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/authentication.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/authentication.mdx @@ -10,7 +10,7 @@ Our API requires authentication using API keys to ensure secure access. ## Obtaining Your API Key -To access our API, you need an API key. You can find your API key by visiting [Smallest AI Console](https://console.smallest.ai/apikeys). +To access our API, you need an API key. You can find your API key by visiting [Smallest AI Console](https://api.smallest.ai/apikeys). ## Using API Keys @@ -31,4 +31,4 @@ Authorization: Bearer YOUR_API_KEY_HERE - **Use environment variables**: Store API keys securely instead of hardcoding them in your source code. - **Monitor API usage**: Keep track of your API calls to detect any unauthorized access. -For more details, visit our [API Documentation](/v3.0.1/content/getting-started/authentication). +For more details, visit our [API Documentation](/waves/v-3-0-1/guides/getting-started/authentication). diff --git a/fern/products/waves/versions/v3.0.1/api-references/delete-cloned-voice.mdx b/fern/products/waves/versions/v3.0.1/api-references/delete-cloned-voice.mdx index 29b3853..a9d01e8 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/delete-cloned-voice.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/delete-cloned-voice.mdx @@ -1,6 +1,6 @@ --- title: 'Delete Cloned Voice' description: 'Delete a cloned voice using the new Waves API.' -openapi: 'DELETE /api/v1/lightning-large' +openapi: 'DELETE /waves/v1/lightning-large' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v3.0.1/api-references/get-cloned-voices-api.mdx b/fern/products/waves/versions/v3.0.1/api-references/get-cloned-voices-api.mdx index 6658398..397899e 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/get-cloned-voices-api.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/get-cloned-voices-api.mdx @@ -1,6 +1,6 @@ --- title: "Get your cloned Voices" description: "Retrieve your cloned voices." -openapi: "GET /api/v1/lightning-large/get_cloned_voices" +openapi: "GET /waves/v1/lightning-large/get_cloned_voices" hideApiMarker: False --- \ No newline at end of file diff --git a/fern/products/waves/versions/v3.0.1/api-references/get-voices-api.mdx b/fern/products/waves/versions/v3.0.1/api-references/get-voices-api.mdx index 3b25253..6491278 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/get-voices-api.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/get-voices-api.mdx @@ -1,6 +1,6 @@ --- title: "Get Voices" description: "Get voices supported for a given model using the new Waves API." -openapi: "GET /api/v1/{model}/get_voices" +openapi: "GET /waves/v1/{model}/get_voices" hideApiMarker: False --- \ No newline at end of file diff --git a/fern/products/waves/versions/v3.0.1/api-references/lighntning-tts.mdx b/fern/products/waves/versions/v3.0.1/api-references/lighntning-tts.mdx index ca23c1e..34c162f 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lighntning-tts.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lighntning-tts.mdx @@ -2,6 +2,6 @@ title: 'Text to speech' sidebarTitle: 'Text to speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning/get_speech' +openapi: 'POST /waves/v1/lightning/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v3.0.1/api-references/lightning-large-stream.mdx b/fern/products/waves/versions/v3.0.1/api-references/lightning-large-stream.mdx index 54008dc..e314d6e 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lightning-large-stream.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lightning-large-stream.mdx @@ -2,7 +2,7 @@ title: 'Text to Speech' sidebarTitle: 'Text to Speech (SSE)' description: 'Stream speech for given text using the Lightning-Large SSE API' -openapi: 'POST /api/v1/lightning-large/stream' +openapi: 'POST /waves/v1/lightning-large/stream' --- ## Overview diff --git a/fern/products/waves/versions/v3.0.1/api-references/lightning-large-ws.mdx b/fern/products/waves/versions/v3.0.1/api-references/lightning-large-ws.mdx index b97812e..9660c2b 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lightning-large-ws.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lightning-large-ws.mdx @@ -15,7 +15,7 @@ Connect to the WebSocket endpoint: ```javascript const socket = new WebSocket( - 'wss://api.smallest.ai/api/v1/lightning-large/stream' + 'wss://api.smallest.ai/waves/v1/lightning-large/stream' ); ``` @@ -25,7 +25,7 @@ Authentication is required. Include your API key in the connection headers: ```javascript const socket = new WebSocket( - 'wss://api.smallest.ai/api/v1/lightning-large/stream', + 'wss://api.smallest.ai/waves/v1/lightning-large/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -110,7 +110,7 @@ If an error occurs: const WebSocket = require('ws'); const ws = new WebSocket( - 'wss://waves-api.smallest.ai/api/v1/lightning-large/get_speech/stream', + 'wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -164,7 +164,7 @@ import json import base64 API_KEY = "YOUR_API_KEY" -WS_URL = "wss://waves-api.smallest.ai/api/v1/lightning-large/get_speech/stream" +WS_URL = "wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream" async def text_to_speech(): async with websockets.connect(WS_URL, extra_headers={"Authorization": f"Bearer {API_KEY}"}) as ws: diff --git a/fern/products/waves/versions/v3.0.1/api-references/lightning-large.mdx b/fern/products/waves/versions/v3.0.1/api-references/lightning-large.mdx index 69784e5..d4fc470 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lightning-large.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lightning-large.mdx @@ -1,6 +1,6 @@ --- title: 'Text to Speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning-large/get_speech' +openapi: 'POST /waves/v1/lightning-large/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v3.0.1/api-references/lightning-tts-ws.mdx b/fern/products/waves/versions/v3.0.1/api-references/lightning-tts-ws.mdx index 3cfeb08..bfb2538 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lightning-tts-ws.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lightning-tts-ws.mdx @@ -15,7 +15,7 @@ Connect to the SSE endpoint: ```javascript const eventSource = new EventSource( - 'https://api.smallest.ai/api/v1/lightning/stream', + 'https://api.smallest.ai/waves/v1/lightning/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -50,7 +50,7 @@ Send a POST request with the following JSON structure: ```javascript const eventSource = new EventSource( - 'https://api.smallest.ai/api/v1/lightning/stream', + 'https://api.smallest.ai/waves/v1/lightning/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' diff --git a/fern/products/waves/versions/v3.0.1/api-references/lightning-v2-stream.mdx b/fern/products/waves/versions/v3.0.1/api-references/lightning-v2-stream.mdx index e89bc36..007f89d 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lightning-v2-stream.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lightning-v2-stream.mdx @@ -2,7 +2,7 @@ title: 'Text to Speech' sidebarTitle: 'Text to Speech (SSE)' description: 'Stream speech for given text using the Lightning v2 SSE API' -openapi: 'POST /api/v1/lightning-v2/stream' +openapi: 'POST /waves/v1/lightning-v2/stream' --- ## Overview diff --git a/fern/products/waves/versions/v3.0.1/api-references/lightning-v2-ws.mdx b/fern/products/waves/versions/v3.0.1/api-references/lightning-v2-ws.mdx index 5343f6d..f7d82d7 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lightning-v2-ws.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lightning-v2-ws.mdx @@ -1,6 +1,6 @@ --- title: 'Text to Speech (WebSocket)' description: The Lightning v2 WebSocket API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. For an end-to-end example of how to use the Lightning v2 WebSocket API, check out [Text to Speech (WS) Example](https://github.com/smallest-inc/waves-examples/tree/main/lightning_v2/ws_streaming) -asyncapi: "/asyncapi-spec/lightning-v2-ws.json /api/v1/lightning-v2/get_speech/stream" +asyncapi: "/asyncapi-spec/lightning-v2-ws.json /waves/v1/lightning-v2/get_speech/stream" --- diff --git a/fern/products/waves/versions/v3.0.1/api-references/lightning-v2.mdx b/fern/products/waves/versions/v3.0.1/api-references/lightning-v2.mdx index 0d51417..51853d1 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lightning-v2.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lightning-v2.mdx @@ -1,6 +1,6 @@ --- title: 'Text to Speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning-v2/get_speech' +openapi: 'POST /waves/v1/lightning-v2/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v3.0.1/api-references/lightning.mdx b/fern/products/waves/versions/v3.0.1/api-references/lightning.mdx index ca23c1e..34c162f 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/lightning.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/lightning.mdx @@ -2,6 +2,6 @@ title: 'Text to speech' sidebarTitle: 'Text to speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning/get_speech' +openapi: 'POST /waves/v1/lightning/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v3.0.1/api-references/websocket.mdx b/fern/products/waves/versions/v3.0.1/api-references/websocket.mdx index ae738c7..24bb215 100644 --- a/fern/products/waves/versions/v3.0.1/api-references/websocket.mdx +++ b/fern/products/waves/versions/v3.0.1/api-references/websocket.mdx @@ -46,7 +46,7 @@ To support longer sessions for use cases where clients need more time (e.g., lon You can include the `timeout` parameter in the WebSocket URL like so: ```link -wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream?timeout=60 +wss://api.smallest.ai/waves/v1/lightning-v2/get_speech/stream?timeout=60 ``` @@ -103,4 +103,4 @@ The WebSocket TTS API is optimized to handle real-time text-to-speech conversion } ``` -For implementation details, check our [WebSocket API documentation](/v3.0.1/content/api-references/lightning-v2-ws). +For implementation details, check our [WebSocket API documentation](/waves/v-3-0-1/api-reference/text-to-speech/lightning-v2-web-socket). diff --git a/fern/products/waves/versions/v3.0.1/best-practices/pvc-best-practices.mdx b/fern/products/waves/versions/v3.0.1/best-practices/pvc-best-practices.mdx index 8a1e183..2837886 100644 --- a/fern/products/waves/versions/v3.0.1/best-practices/pvc-best-practices.mdx +++ b/fern/products/waves/versions/v3.0.1/best-practices/pvc-best-practices.mdx @@ -6,7 +6,7 @@ icon: "user-tie" To get the most accurate and natural voice clone, it's essential to provide **high-quality reference audio**. The best practices for recording remain the same as those for **Instant Voice Cloning**, which you can find here: -🔗 **[Instant Voice Cloning - Best Practices](/v3.0.1/content/best-practices/vc-best-practices)** +🔗 **[Instant Voice Cloning - Best Practices](/waves/v-3-0-1/guides/best-practices/vc-best-practices)** However, **Professional Voice Cloning (PVC) significantly improves upon Instant Voice Cloning** in the following ways: diff --git a/fern/products/waves/versions/v3.0.1/best-practices/vc-best-practices.mdx b/fern/products/waves/versions/v3.0.1/best-practices/vc-best-practices.mdx index 115b010..74ec697 100644 --- a/fern/products/waves/versions/v3.0.1/best-practices/vc-best-practices.mdx +++ b/fern/products/waves/versions/v3.0.1/best-practices/vc-best-practices.mdx @@ -6,7 +6,7 @@ icon: "clone" To achieve the best results when cloning your voice, it's essential to provide high-quality reference audio. Below are some best practices, dos and don'ts, and examples to guide you. -Ready to Clone Your Voice? Try it out on our platform [waves.smallest.ai](https://waves.smallest.ai/studio/create) +Ready to Clone Your Voice? Try it out on our platform [api.smallest.ai](https://api.smallest.ai/studio/create) --- diff --git a/fern/products/waves/versions/v3.0.1/changelog/announcements.mdx b/fern/products/waves/versions/v3.0.1/changelog/announcements.mdx index af82811..3d8ead4 100644 --- a/fern/products/waves/versions/v3.0.1/changelog/announcements.mdx +++ b/fern/products/waves/versions/v3.0.1/changelog/announcements.mdx @@ -13,7 +13,7 @@ mode: "center" - **100ms TTFB**: Superfast and scalable to support your realtime applications. - **0.05 per 10K characters**: 3x cheaper than other providers. - Experience the new capabilities of Lightning v2 on our [Platform](https://waves.smallest.ai). + Experience the new capabilities of Lightning v2 on our [Platform](https://api.smallest.ai). @@ -25,7 +25,7 @@ mode: "center" - **Versatile Applications**: Ideal for global applications requiring diverse language support. - **Beta Stage**: Currently in beta, with ongoing improvements and updates. - Experience the new capabilities of Lightning Multilingual on our [Platform](https://waves.smallest.ai). + Experience the new capabilities of Lightning Multilingual on our [Platform](https://api.smallest.ai). @@ -35,26 +35,26 @@ mode: "center" - **Enhanced Voice Cloning**: More accurate and lifelike voice clones. - **Best-in-Market Latency**: Instant voice cloning with a latency of under 300ms. - Experience the new capabilities of Lightning Large via our [Platform](https://waves.smallest.ai) or the [API](/v3.0.1/content/api-references/lightning). + Experience the new capabilities of Lightning Large via our [Platform](https://api.smallest.ai) or the [API](/waves/v-3-0-1/api-reference/text-to-speech/lightning-v-31). ## Introducing Waves - [Waves](https://waves.smallest.ai) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. + [Waves](https://api.smallest.ai) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. However, waves will also be supporting various features such as voice cloning, accent cloning, style transfer, speech to speech etc in the near future, making it a ubiquitous platform for anyone who needs AI generated speech. - The voices available on waves platform are available via the [Waves API](/v3.0.1/content/api-references/lightning) + The voices available on waves platform are available via the [Waves API](/waves/v-3-0-1/api-reference/text-to-speech/lightning-v-31) ## Introducing Lightning - World's Fastest Text to Speech Lightning is the world's fastest text to speech model, generating around 10 seconds of hyper-realistic audio in just 100ms, all at once, no streaming. - Read more about lightning in our release post [here](https://smallest.ai/blog/lightning-fast-text-to-speech). + Read more about lightning in our release post [here](https://smallest.ai). - You can access lightning via the [Waves API](/v3.0.1/content/api-references/lightning) + You can access lightning via the [Waves API](/waves/v-3-0-1/api-reference/text-to-speech/lightning-v-31) *A lot more coming up, very soon* \ No newline at end of file diff --git a/fern/products/waves/versions/v3.0.1/getting-started/authentication.mdx b/fern/products/waves/versions/v3.0.1/getting-started/authentication.mdx index 405aff5..5c635c5 100644 --- a/fern/products/waves/versions/v3.0.1/getting-started/authentication.mdx +++ b/fern/products/waves/versions/v3.0.1/getting-started/authentication.mdx @@ -25,7 +25,7 @@ Authorization: Bearer YOUR_API_KEY Test the API with this curl command by replacing `YOUR_API_KEY` with your actual key: ```bash -curl 'https://waves-api.smallest.ai/api/v1/lightning/get_voices' \ +curl 'https://api.smallest.ai/waves/v1/lightning/get_voices' \ -H 'Content-Type: application/json' \ -H 'Authorization: Bearer YOUR_API_KEY' ``` diff --git a/fern/products/waves/versions/v3.0.1/getting-started/quickstart.mdx b/fern/products/waves/versions/v3.0.1/getting-started/quickstart.mdx index f87dda2..b85222d 100644 --- a/fern/products/waves/versions/v3.0.1/getting-started/quickstart.mdx +++ b/fern/products/waves/versions/v3.0.1/getting-started/quickstart.mdx @@ -6,7 +6,7 @@ icon: "rocket" ## Step 1: Sign Up & get the API Key -1. Visit [waves.smallest.ai](https://waves.smallest.ai/) and sign up for an account or log in if you already have an account. +1. Visit [api.smallest.ai](https://api.smallest.ai/) and sign up for an account or log in if you already have an account. 2. Navigate to `API Key` tab in your account dashboard. 3. Create a new API Key and copy it. 4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication. diff --git a/fern/products/waves/versions/v3.0.1/introduction/introduction.mdx b/fern/products/waves/versions/v3.0.1/introduction/introduction.mdx index d42681b..d8dff20 100644 --- a/fern/products/waves/versions/v3.0.1/introduction/introduction.mdx +++ b/fern/products/waves/versions/v3.0.1/introduction/introduction.mdx @@ -6,7 +6,7 @@ icon: "globe" ## About Waves -Welcome to [Waves](https://waves.smallest.ai/), the cutting-edge platform designed to deliver real-time, hyper-realistic text-to-speech solutions. Our platform leverages advanced AI models to provide seamless and natural-sounding speech synthesis, catering to a wide range of applications including voicebots, interactive systems, and more. +Welcome to [Waves](https://api.smallest.ai/), the cutting-edge platform designed to deliver real-time, hyper-realistic text-to-speech solutions. Our platform leverages advanced AI models to provide seamless and natural-sounding speech synthesis, catering to a wide range of applications including voicebots, interactive systems, and more. ### Key Features @@ -16,7 +16,7 @@ Welcome to [Waves](https://waves.smallest.ai/), the cutting-edge platform design ### Stay Updated -We are committed to continuous improvement and innovation. All new releases and updates are announced [here](/v3.0.1/content/changelog/announcements). +We are committed to continuous improvement and innovation. All new releases and updates are announced [here](/waves/v-3-0-1/guides/changelog/announcements). ### Get in Touch diff --git a/fern/products/waves/versions/v3.0.1/product/projects.mdx b/fern/products/waves/versions/v3.0.1/product/projects.mdx index 1d603fe..32cffb9 100644 --- a/fern/products/waves/versions/v3.0.1/product/projects.mdx +++ b/fern/products/waves/versions/v3.0.1/product/projects.mdx @@ -112,7 +112,7 @@ Welcome to the official documentation for our text-to-speech (TTS) project. Our ### Installation & Setup -1. Register for an account and +1. Register for an account and log into the platform. 2. Create a new project or open an existing one. 3. Add or paste your text content to the project. diff --git a/fern/products/waves/versions/v3.0.1/text-to-speech/get-voice-models-langs.mdx b/fern/products/waves/versions/v3.0.1/text-to-speech/get-voice-models-langs.mdx index 742c613..afadb7e 100644 --- a/fern/products/waves/versions/v3.0.1/text-to-speech/get-voice-models-langs.mdx +++ b/fern/products/waves/versions/v3.0.1/text-to-speech/get-voice-models-langs.mdx @@ -11,7 +11,7 @@ In this tutorial, you will learn how to use the Smallest AI platform to fetch th Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://api.smallest.ai)). - The Smallest AI Python SDK installed. If you haven't installed it yet, follow the instructions below: ### Install the SDK diff --git a/fern/products/waves/versions/v3.0.1/text-to-speech/how-to-tts.mdx b/fern/products/waves/versions/v3.0.1/text-to-speech/how-to-tts.mdx index e504443..3f0ce74 100644 --- a/fern/products/waves/versions/v3.0.1/text-to-speech/how-to-tts.mdx +++ b/fern/products/waves/versions/v3.0.1/text-to-speech/how-to-tts.mdx @@ -13,7 +13,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://api.smallest.ai)). ## Setup diff --git a/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-delete-vc.mdx b/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-delete-vc.mdx index aaacb19..3d5326c 100644 --- a/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-delete-vc.mdx +++ b/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-delete-vc.mdx @@ -12,7 +12,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://api.smallest.ai)). ## Setup diff --git a/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-pvc.mdx b/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-pvc.mdx index ddb07ff..ec40dbf 100644 --- a/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-pvc.mdx +++ b/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-pvc.mdx @@ -9,7 +9,7 @@ In this tutorial, you will learn how to use the Smallest AI platform to create a # Creating a Professional Voice Clone 1. **Go to the Smallest AI Platform** - Navigate to [smallest.ai](https://waves.smallest.ai/voice-clone) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: + Navigate to [smallest.ai](https://api.smallest.ai/voice-clone) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: ![Voice Clone Setup](../../../images/pvc_page.png) diff --git a/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-vc-ui.mdx b/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-vc-ui.mdx index e6e6f10..c4c00d3 100644 --- a/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-vc-ui.mdx +++ b/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-vc-ui.mdx @@ -8,7 +8,7 @@ In this tutorial, you will learn how to use the **Smallest AI** platform to crea # Creating an Instant Voice Clone 1️. **Go to the Smallest AI Platform** - - Navigate to **[smallest.ai](https://waves.smallest.ai/voice-clone)** and click on **Create New**. + - Navigate to **[smallest.ai](https://api.smallest.ai/voice-clone)** and click on **Create New**. - In the modal that appears, select **Instant Voice Clone**. ![Voice Clone Setup](../../../images/ivc-image-1.png) @@ -16,7 +16,7 @@ In this tutorial, you will learn how to use the **Smallest AI** platform to crea 2️. **Upload Your Clean Reference Audio** - Select a **short, high-quality** audio clip (5-15 seconds). - Ensure the recording is **clear and noise-free** for the best results. - - Follow the recommended **[best practices](/v3.0.1/content/best-practices/vc-best-practices)** to maximize quality. + - Follow the recommended **[best practices](/waves/v-3-0-1/guides/best-practices/vc-best-practices)** to maximize quality. ![Upload your clean reference audio](../../../images/ivc-image-2.png) diff --git a/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-vc.mdx b/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-vc.mdx index 27c95ea..05d11e8 100644 --- a/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-vc.mdx +++ b/fern/products/waves/versions/v3.0.1/voice-cloning/how-to-vc.mdx @@ -12,7 +12,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI platform (sign up [here](https://api.smallest.ai)). ## Setup diff --git a/fern/products/waves/versions/v4.0.0.yml b/fern/products/waves/versions/v4.0.0.yml index 6fd812f..a6ef232 100644 --- a/fern/products/waves/versions/v4.0.0.yml +++ b/fern/products/waves/versions/v4.0.0.yml @@ -1,149 +1,405 @@ tabs: - docs: - display-name: Docs - icon: home + documentation: + display-name: Documentation + icon: fa-solid fa-book api-reference: display-name: API Reference - icon: puzzle + icon: fa-solid fa-puzzle-piece + self-host: + display-name: Self Host + icon: fa-solid fa-server + model-cards: + display-name: Model Cards + icon: fa-solid fa-id-card + client-libraries: + display-name: Client Libraries + icon: fa-solid fa-code + changelog: + display-name: Changelog + icon: fa-solid fa-clock-rotate-left navigation: - - tab: docs + - tab: documentation layout: - section: Getting Started contents: - page: Introduction - path: ./v4.0.0/introduction/introduction.mdx - - page: Quickstart - path: ./v4.0.0/getting-started/quickstart.mdx + path: ../pages/v4.0.0/getting-started/introduction.mdx + icon: fa-regular fa-book-open + - page: Models + path: ../pages/v4.0.0/getting-started/models.mdx + icon: fa-solid fa-cube - page: Authentication - path: ./v4.0.0/getting-started/authentication.mdx - - section: Text to Speech + path: ../pages/v4.0.0/getting-started/authentication.mdx + icon: fa-solid fa-lock + - section: Text to Speech (Lightning) contents: - - page: How to TTS - path: ./v4.0.0/text-to-speech/how-to-tts.mdx - - page: Stream TTS - path: ./v4.0.0/text-to-speech/stream-tts.mdx - - page: Voice Models & Languages - path: ./v4.0.0/text-to-speech/get-voice-models-langs.mdx + - page: Quickstart + path: ../pages/v4.0.0/text-to-speech/quickstart.mdx + icon: fa-solid fa-rocket + - page: Overview + path: ../pages/v4.0.0/text-to-speech/overview.mdx + icon: fa-solid fa-volume-high + - page: Sync & Async + path: ../pages/v4.0.0/text-to-speech/how-to-tts.mdx + icon: fa-solid fa-diagram-project + - page: Streaming + path: ../pages/v4.0.0/text-to-speech/stream-tts.mdx + icon: fa-solid fa-bars-staggered - page: Pronunciation Dictionaries - path: ./v4.0.0/text-to-speech/pronunciation-dictionaries.mdx - - section: Speech to Text + path: ../pages/v4.0.0/text-to-speech/pronunciation-dictionaries.mdx + icon: fa-solid fa-brackets-curly + - page: Voices & Languages + path: ../pages/v4.0.0/text-to-speech/get-voice-models-langs.mdx + icon: fa-solid fa-print + - page: HTTP vs Streaming vs WebSockets + path: ../pages/v4.0.0/getting-started/http-stream.mdx + icon: fa-solid fa-arrows-split-up-and-left + - section: Speech to Text (Pulse) contents: + - page: Quickstart + path: ../pages/v4.0.0/speech-to-text/quickstart.mdx + icon: fa-solid fa-rocket - page: Overview - path: ./v4.0.0/speech-to-text-new/overview.mdx - - section: Real-time + path: ../pages/v4.0.0/speech-to-text/overview.mdx + icon: fa-solid fa-microphone + - section: Pre-Recorded + icon: fa-solid fa-play contents: - page: Quickstart - path: ./v4.0.0/speech-to-text-new/realtime/quickstart.mdx + path: ../pages/v4.0.0/speech-to-text/pre-recorded/quickstart.mdx + icon: fa-solid fa-rocket - page: Audio Formats - path: ./v4.0.0/speech-to-text-new/realtime/audio-formats.mdx + path: ../pages/v4.0.0/speech-to-text/pre-recorded/audio-formats.mdx + icon: fa-solid fa-file-audio + - page: Webhooks + path: ../pages/v4.0.0/speech-to-text/pre-recorded/webhooks.mdx + icon: fa-solid fa-webhook - page: Features - path: ./v4.0.0/speech-to-text-new/realtime/features.mdx - - page: Response Format - path: ./v4.0.0/speech-to-text-new/realtime/response-format.mdx - - page: Code Examples - path: ./v4.0.0/speech-to-text-new/realtime/code-examples.mdx - - page: Best Practices - path: ./v4.0.0/speech-to-text-new/realtime/best-practices.mdx + path: ../pages/v4.0.0/speech-to-text/pre-recorded/features.mdx + icon: fa-solid fa-list-check - page: Troubleshooting - path: ./v4.0.0/speech-to-text-new/realtime/troubleshooting.mdx - - section: Pre-recorded + path: ../pages/v4.0.0/speech-to-text/pre-recorded/troubleshooting.mdx + icon: fa-solid fa-wrench + - page: Best Practices + path: ../pages/v4.0.0/speech-to-text/pre-recorded/best-practices.mdx + icon: fa-solid fa-star + - page: Code Examples + path: ../pages/v4.0.0/speech-to-text/pre-recorded/code-examples.mdx + icon: fa-solid fa-terminal + - section: Realtime (WebSocket) + icon: fa-solid fa-radio contents: - page: Quickstart - path: ./v4.0.0/speech-to-text-new/pre-recorded/quickstart.mdx + path: ../pages/v4.0.0/speech-to-text/realtime/quickstart.mdx + icon: fa-solid fa-rocket + - page: Response Format + path: ../pages/v4.0.0/speech-to-text/realtime/response-format.mdx + icon: fa-solid fa-code - page: Audio Formats - path: ./v4.0.0/speech-to-text-new/pre-recorded/audio-formats.mdx + path: ../pages/v4.0.0/speech-to-text/realtime/audio-formats.mdx + icon: fa-solid fa-file-audio - page: Features - path: ./v4.0.0/speech-to-text-new/pre-recorded/features.mdx - - page: Webhooks - path: ./v4.0.0/speech-to-text-new/pre-recorded/webhooks.mdx - - page: Code Examples - path: ./v4.0.0/speech-to-text-new/pre-recorded/code-examples.mdx - - page: Best Practices - path: ./v4.0.0/speech-to-text-new/pre-recorded/best-practices.mdx + path: ../pages/v4.0.0/speech-to-text/realtime/features.mdx + icon: fa-solid fa-list-check - page: Troubleshooting - path: ./v4.0.0/speech-to-text-new/pre-recorded/troubleshooting.mdx + path: ../pages/v4.0.0/speech-to-text/realtime/troubleshooting.mdx + icon: fa-solid fa-wrench + - page: Best Practices + path: ../pages/v4.0.0/speech-to-text/realtime/best-practices.mdx + icon: fa-solid fa-star - section: Features + icon: fa-solid fa-layer-group contents: - page: Word Timestamps - path: ./v4.0.0/speech-to-text-new/features/word-timestamps.mdx - - page: Diarization - path: ./v4.0.0/speech-to-text-new/features/diarization.mdx + path: ../pages/v4.0.0/speech-to-text/features/word-timestamps.mdx + icon: fa-solid fa-clock - page: Language Detection - path: ./v4.0.0/speech-to-text-new/features/language-detection.mdx - - page: Redaction - path: ./v4.0.0/speech-to-text-new/features/redaction.mdx - - page: Word Boosting - path: ./v4.0.0/speech-to-text-new/features/word-boosting.mdx - - page: Numeric Formatting - path: ./v4.0.0/speech-to-text-new/features/numeric-formatting.mdx + path: ../pages/v4.0.0/speech-to-text/features/language-detection.mdx + icon: fa-solid fa-language - page: Utterances - path: ./v4.0.0/speech-to-text-new/features/utterances.mdx + path: ../pages/v4.0.0/speech-to-text/features/utterances.mdx + icon: fa-solid fa-comment + - page: Diarization + path: ../pages/v4.0.0/speech-to-text/features/diarization.mdx + icon: fa-solid fa-users + - page: Redaction + path: ../pages/v4.0.0/speech-to-text/features/redaction.mdx + icon: fa-solid fa-eye-slash - page: Full Transcript - path: ./v4.0.0/speech-to-text-new/features/full-transcript.mdx - - page: Emotion Detection - path: ./v4.0.0/speech-to-text-new/features/emotion-detection.mdx + path: ../pages/v4.0.0/speech-to-text/features/full-transcript.mdx + icon: fa-solid fa-file-lines + - page: Numeric Formatting + path: ../pages/v4.0.0/speech-to-text/features/numeric-formatting.mdx + icon: fa-solid fa-hashtag - page: Age and Gender Detection - path: ./v4.0.0/speech-to-text-new/features/age-and-gender-detection.mdx + path: ../pages/v4.0.0/speech-to-text/features/age-and-gender-detection.mdx + icon: fa-solid fa-user + - page: Emotion Detection + path: ../pages/v4.0.0/speech-to-text/features/emotion-detection.mdx + icon: fa-solid fa-face-smile + - page: Keyword Boosting + path: ../pages/v4.0.0/speech-to-text/features/word-boosting.mdx + icon: fa-solid fa-arrow-up-right-dots - section: Benchmarks + icon: fa-solid fa-chart-line contents: - page: Performance - path: ./v4.0.0/speech-to-text-new/benchmarks/performance.mdx + path: ../pages/v4.0.0/speech-to-text/benchmarks/performance.mdx + icon: fa-solid fa-gauge-high - page: Metrics Overview - path: ./v4.0.0/speech-to-text-new/benchmarks/metrics-overview.mdx + path: ../pages/v4.0.0/speech-to-text/benchmarks/metrics-overview.mdx + icon: fa-solid fa-chart-bar - page: Evaluation Walkthrough - path: ./v4.0.0/speech-to-text-new/benchmarks/evaluation-walkthrough.mdx + path: ../pages/v4.0.0/speech-to-text/benchmarks/evaluation-walkthrough.mdx + icon: fa-solid fa-clipboard-check + - section: Cookbooks + contents: + - page: Speech to Text + path: ../pages/v4.0.0/cookbooks/speech-to-text.mdx + icon: fa-solid fa-book + - page: Text to Speech + path: ../pages/v4.0.0/cookbooks/text-to-speech.mdx + icon: fa-solid fa-book - section: Voice Cloning contents: - - page: Types of Cloning - path: ./v4.0.0/voice-cloning/types-of-clone.mdx + - page: Instant Clone (UI) + path: ../pages/v4.0.0/voice-cloning/how-to-vc-ui.mdx + icon: fa-solid fa-desktop - page: How to Voice Clone - path: ./v4.0.0/voice-cloning/how-to-vc.mdx - - page: Voice Clone via UI - path: ./v4.0.0/voice-cloning/how-to-vc-ui.mdx - - page: Professional Voice Cloning - path: ./v4.0.0/voice-cloning/how-to-pvc.mdx + path: ../pages/v4.0.0/voice-cloning/how-to-vc.mdx + icon: fa-solid fa-circle-dot - page: Delete Cloned Voice - path: ./v4.0.0/voice-cloning/how-to-delete-vc.mdx - - section: Best Practices - contents: - - page: TTS Best Practices - path: ./v4.0.0/best-practices/tts-best-practices.mdx - - page: Voice Cloning Best Practices - path: ./v4.0.0/best-practices/vc-best-practices.mdx - - page: PVC Best Practices - path: ./v4.0.0/best-practices/pvc-best-practices.mdx + path: ../pages/v4.0.0/voice-cloning/how-to-delete-vc.mdx + icon: fa-solid fa-trash - section: Integrations contents: + - page: Vercel AI SDK + path: ../pages/v4.0.0/integrations/vercel-ai-sdk.mdx + icon: fa-solid fa-triangle + - page: OpenClaw + path: ../pages/v4.0.0/integrations/openclaw.mdx + icon: fa-solid fa-message-bot - page: LiveKit - path: ./v4.0.0/integrations/livekit.mdx + path: ../pages/v4.0.0/integrations/livekit.mdx + icon: fa-solid fa-plug - page: Plivo - path: ./v4.0.0/integrations/plivo.mdx + path: ../pages/v4.0.0/integrations/plivo.mdx + icon: fa-solid fa-plug - page: Vonage - path: ./v4.0.0/integrations/vonage.mdx - - section: On-Premise + path: ../pages/v4.0.0/integrations/vonage.mdx + icon: fa-solid fa-plug + - section: Best Practices + contents: + - page: Voice Cloning Best Practices + path: ../pages/v4.0.0/best-practices/vc-best-practices.mdx + icon: fa-solid fa-lightbulb + - page: TTS Best Practices + path: ../pages/v4.0.0/best-practices/tts-best-practices.mdx + icon: fa-solid fa-lightbulb + + - tab: api-reference + layout: + - section: API References + contents: + - page: Authentication + path: ../pages/v4.0.0/api-references/authentication.mdx + icon: fa-solid fa-key + - page: Concurrency and Limits + path: ../pages/v4.0.0/api-references/concurrency-and-limits.mdx + icon: fa-solid fa-gauge-high + - page: WebSocket + path: ../pages/v4.0.0/api-references/websocket.mdx + icon: fa-solid fa-plug + - api: API Reference + api-name: waves-v4 + snippets: + python: smallest-ai + typescript: smallest-ai + audiences: + - v4docs + flattened: true + layout: + - section: Text to Speech + contents: + - endpoint: POST /waves/v1/lightning-v3.1/get_speech + title: "Lightning v3.1" + - endpoint: POST /waves/v1/lightning-v3.1/stream + title: "Lightning v3.1 SSE" + - endpoint: WSS /waves/v1/lightning-v3.1/get_speech/stream + title: "Lightning v3.1 WebSocket" + - endpoint: POST /waves/v1/lightning-v2/get_speech + title: "Lightning v2" + - endpoint: POST /waves/v1/lightning-v2/stream + title: "Lightning v2 SSE" + - endpoint: WSS /waves/v1/lightning-v2/get_speech/stream + title: "Lightning v2 WebSocket" + - section: Speech to Text + contents: + - endpoint: POST /waves/v1/pulse/get_text + title: "Pulse (Pre-Recorded)" + - endpoint: WSS /waves/v1/pulse/get_text + title: "Pulse (Realtime)" + - section: Voices + contents: + - endpoint: GET /waves/v1/{model}/get_voices + title: "Get Voices" + - section: Voice Cloning + contents: + - endpoint: POST /waves/v1/lightning-large/add_voice + title: "Add Voice" + - endpoint: GET /waves/v1/lightning-large/get_cloned_voices + title: "Get Cloned Voices" + - endpoint: DELETE /waves/v1/lightning-large + title: "Delete Cloned Voice" + - section: Pronunciation Dictionaries + contents: + - endpoint: GET /waves/v1/pronunciation-dicts + title: "Get Pronunciation Dictionaries" + - endpoint: POST /waves/v1/pronunciation-dicts + title: "Create Pronunciation Dictionary" + - endpoint: PUT /waves/v1/pronunciation-dicts + title: "Update Pronunciation Dictionary" + - endpoint: DELETE /waves/v1/pronunciation-dicts + title: "Delete Pronunciation Dictionary" + + - tab: self-host + layout: + - section: Getting Started contents: - page: Introduction - path: ./v4.0.0/on-prem/introduction.mdx - - section: Docker + path: ../pages/v4.0.0/on-prem/getting-started/introduction.mdx + icon: fa-solid fa-server + - page: Prerequisites + path: ../pages/v4.0.0/on-prem/getting-started/prerequisites.mdx + icon: fa-solid fa-list-check + - page: Why Self-Host? + path: ../pages/v4.0.0/on-prem/getting-started/why-self-host.mdx + icon: fa-solid fa-question + - page: Architecture + path: ../pages/v4.0.0/on-prem/getting-started/architecture.mdx + icon: fa-solid fa-diagram-project + - section: Docker Setup + contents: + - section: STT Deployment + icon: fa-solid fa-microphone contents: - - page: Prerequisites - path: ./v4.0.0/on-prem/docker/prerequisites.mdx + - section: Prerequisites + contents: + - page: Hardware Requirements + path: ../pages/v4.0.0/on-prem/docker/stt/prerequisites/hardware-requirements.mdx + - page: Software Requirements + path: ../pages/v4.0.0/on-prem/docker/stt/prerequisites/software-requirements.mdx + - page: Credentials + path: ../pages/v4.0.0/on-prem/docker/stt/prerequisites/credentials.mdx + - page: Verification + path: ../pages/v4.0.0/on-prem/docker/stt/prerequisites/verification.mdx - page: Quick Start - path: ./v4.0.0/on-prem/docker/quick-start.mdx + path: ../pages/v4.0.0/on-prem/docker/stt/quick-start.mdx - page: Configuration - path: ./v4.0.0/on-prem/docker/configuration.mdx + path: ../pages/v4.0.0/on-prem/docker/stt/configuration.mdx - page: Troubleshooting - path: ./v4.0.0/on-prem/docker/docker-troubleshooting.mdx - - section: Kubernetes + path: ../pages/v4.0.0/on-prem/docker/stt/docker-troubleshooting.mdx + - section: TTS Deployment + icon: fa-solid fa-volume-high contents: - - page: Prerequisites - path: ./v4.0.0/on-prem/kubernetes/prerequisites.mdx + - section: Prerequisites + contents: + - page: Hardware Requirements + path: ../pages/v4.0.0/on-prem/docker/tts/prerequisites/hardware-requirements.mdx + - page: Software Requirements + path: ../pages/v4.0.0/on-prem/docker/tts/prerequisites/software-requirements.mdx + - page: Credentials + path: ../pages/v4.0.0/on-prem/docker/tts/prerequisites/credentials.mdx + - page: Verification + path: ../pages/v4.0.0/on-prem/docker/tts/prerequisites/verification.mdx - page: Quick Start - path: ./v4.0.0/on-prem/kubernetes/quick-start.mdx + path: ../pages/v4.0.0/on-prem/docker/tts/quick-start.mdx + - page: Configuration + path: ../pages/v4.0.0/on-prem/docker/tts/configuration.mdx - page: Troubleshooting - path: ./v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx - - tab: api-reference + path: ../pages/v4.0.0/on-prem/docker/tts/docker-troubleshooting.mdx + - section: Kubernetes Setup + contents: + - section: Prerequisites + icon: fa-solid fa-list-check + contents: + - page: Hardware Requirements + path: ../pages/v4.0.0/on-prem/kubernetes/prerequisites/hardware-requirements.mdx + - page: Software Requirements + path: ../pages/v4.0.0/on-prem/kubernetes/prerequisites/software-requirements.mdx + - page: Credentials + path: ../pages/v4.0.0/on-prem/kubernetes/prerequisites/credentials.mdx + - page: Verification + path: ../pages/v4.0.0/on-prem/kubernetes/prerequisites/verification.mdx + - page: Quick Start + path: ../pages/v4.0.0/on-prem/kubernetes/quick-start.mdx + icon: fa-solid fa-rocket + - section: AWS + icon: fa-brands fa-aws + contents: + - page: EKS Setup + path: ../pages/v4.0.0/on-prem/kubernetes/aws/eks-setup.mdx + - page: GPU Nodes + path: ../pages/v4.0.0/on-prem/kubernetes/aws/gpu-nodes.mdx + - page: IAM IRSA + path: ../pages/v4.0.0/on-prem/kubernetes/aws/iam-irsa.mdx + - section: Storage & PVC + icon: fa-solid fa-hard-drive + contents: + - page: EFS Configuration + path: ../pages/v4.0.0/on-prem/kubernetes/storage/efs-configuration.mdx + - page: Model Storage + path: ../pages/v4.0.0/on-prem/kubernetes/storage/model-storage.mdx + - page: Redis Persistence + path: ../pages/v4.0.0/on-prem/kubernetes/storage/redis-persistence.mdx + - section: Autoscaling + icon: fa-solid fa-chart-line + contents: + - page: HPA Configuration + path: ../pages/v4.0.0/on-prem/kubernetes/autoscaling/hpa-configuration.mdx + - page: Cluster Autoscaler + path: ../pages/v4.0.0/on-prem/kubernetes/autoscaling/cluster-autoscaler.mdx + - page: Metrics Setup + path: ../pages/v4.0.0/on-prem/kubernetes/autoscaling/metrics-setup.mdx + - page: Grafana Dashboards + path: ../pages/v4.0.0/on-prem/kubernetes/autoscaling/grafana-dashboards.mdx + - page: Troubleshooting + path: ../pages/v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx + icon: fa-solid fa-wrench + - section: Troubleshooting + contents: + - page: Common Issues + path: ../pages/v4.0.0/on-prem/troubleshooting/common-issues.mdx + icon: fa-solid fa-circle-exclamation + - page: Debugging Guide + path: ../pages/v4.0.0/on-prem/troubleshooting/debugging-guide.mdx + icon: fa-solid fa-bug + - page: Logs Analysis + path: ../pages/v4.0.0/on-prem/troubleshooting/logs-analysis.mdx + icon: fa-solid fa-file-lines + + - tab: model-cards layout: - - api: API Reference - api-name: waves + - section: Text to Speech + contents: + - page: Lightning v3.1 + path: ../pages/v4.0.0/text-to-speech/model-cards/lightning-v3-1.mdx + icon: fa-solid fa-sparkles + - section: Speech to Text + contents: + - page: Pulse + path: ../pages/v4.0.0/speech-to-text/model-cards/pulse.mdx + icon: fa-solid fa-microphone + + - tab: client-libraries + layout: + - section: Client Libraries + contents: + - page: Overview + path: ../pages/v4.0.0/client-libraries/overview.mdx + icon: fa-solid fa-code + + - tab: changelog + layout: + - changelog: ../pages/v4.0.0/changelog-entries diff --git a/fern/products/waves/versions/v4.0.0/api-references/add-voice-api.mdx b/fern/products/waves/versions/v4.0.0/api-references/add-voice-api.mdx index fa4fc8c..b1911c3 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/add-voice-api.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/add-voice-api.mdx @@ -1,7 +1,7 @@ --- title: "Add your Voice" description: "Add your voice using the Waves API." -openapi: "POST /api/v1/lightning-large/add_voice" +openapi: "POST /waves/v1/lightning-large/add_voice" hideApiMarker: False --- @@ -9,7 +9,7 @@ hideApiMarker: False ## Sample cURL Example ```bash -curl -X POST https://waves-api.smallest.ai/api/v1/lightning-large/add_voice \ +curl -X POST https://api.smallest.ai/waves/v1/lightning-large/add_voice \ -H "Authorization: Bearer YOUR_API_KEY" \ -F "displayName=my voice" \ -F "file=@my_voice.wav;type=audio/wav" @@ -20,7 +20,7 @@ Here is a Python example using the `requests` library: ```python python import requests -url = "https://waves-api.smallest.ai/api/v1/lightning-large/add_voice" +url = "https://api.smallest.ai/waves/v1/lightning-large/add_voice" payload = {'displayName': 'my voice'} files=[ ('file', ('my_voice.wav', open('my_voice.wav','rb'), 'audio/wav')) diff --git a/fern/products/waves/versions/v4.0.0/api-references/asr-websocket-api.mdx b/fern/products/waves/versions/v4.0.0/api-references/asr-websocket-api.mdx deleted file mode 100644 index 2c98218..0000000 --- a/fern/products/waves/versions/v4.0.0/api-references/asr-websocket-api.mdx +++ /dev/null @@ -1,355 +0,0 @@ ---- -title: "ASR (Websocket)" -description: "Complete parameter and response reference for the ASR WebSocket API" -asyncapi: "/asyncapi-spec/asr-ws.json /api/v1/asr" ---- - -# ASR WebSocket API Reference - -Complete reference documentation for the Waves ASR WebSocket API parameters, responses, and error codes. - -## Endpoint - -**Production URL**: `wss://waves-api.smallest.ai/api/v1/asr` - -## Connection Parameters - -All parameters are passed as query strings in the WebSocket URL. Parameters are **case-sensitive** and unknown or invalid parameters will result in an error response and connection closure. - -### Core Parameters - - - Language of the audio input - - **Currently Supported Languages:** - - **IN Region:** - - `en` - English - - `hi` - Hindi - - **US Region:** - - `en` - English - - `hi` - Hindi - - `es` - Spanish - - `fr` - French - - `de` - German - - `ru` - Russian - - `pt` - Portuguese - - `ja` - Japanese - - `it` - Italian - - `nl` - Dutch - - **Available on Request (Both Regions):** - - `zh` - Chinese Mandarin - - `zh-hk` - Chinese Cantonese - - `tr` - Turkish - - `vi` - Vietnamese - - `th` - Thai - - `id` - Indonesian - - `uk` - Ukrainian - - `ta` - Tamil - - `mr` - Marathi - - `te` - Telugu - - `pl` - Polish - - `el` - Greek - - `hu` - Hungarian - - `ro` - Romanian - - `cs` - Czech - - `sv` - Swedish - - `bg` - Bulgarian - - `da` - Danish - - `fi` - Finnish - - - - - Audio encoding format - - **Valid Values:** - - `linear16` - 16-bit linear PCM - - `flac` - FLAC compressed - - `mulaw` - μ-law encoded - - `opus` - Opus compressed - - - - Sample rate in Hz - - **Range:** 8000-48000 (integer) - **Recommended:** 16000 for optimal performance - - - - Number of audio channels - - **Range:** 1+ (integer) - **Recommended:** 1 (mono) for efficiency - - -### Optional Parameters - - - Add punctuation to transcripts - - **Values:** `true`, `false` - - - - Duration in milliseconds to determine end of speech - - **Range:** 10-60000 (integer) - **Default:** 300ms - - - - Emit voice activity detection events - - **Values:** `true`, `false` - - - - Redact sensitive data types - - **Values:** Comma-separated array - - `"pci"` - Payment card information - - `"ssn"` - Social security numbers - - `"numbers"` - Generic number redaction - - - - - Speech endpointing behavior - - **Values:** - - `"true"` - Enable automatic endpointing - - `"false"` - Disable endpointing - - `10-60000` - Custom threshold in milliseconds - - -## URL Construction Examples - -### Basic Connection -``` -wss://waves-api.smallest.ai/api/v1/asr?api_key=YOUR_API_KEY&audioEncoding=linear16&audioSampleRate=16000&audioChannels=1 -``` - -### Advanced Configuration -``` -wss://waves-api.smallest.ai/api/v1/asr?api_key=YOUR_API_KEY&audioLanguage=en&audioEncoding=linear16&audioSampleRate=16000&audioChannels=1&addPunctuation=true&speechEndThreshold=500&redactSensitiveData=pci,ssn -``` - -## Audio Data Format - -### Sending Audio - -Send audio data as **binary messages** that match your specified encoding, sample rate, and channels. - -#### Format Specifications - - - - **16-bit Linear PCM** - - Bit depth: 16-bit signed integers - - Byte order: Little-endian - - Sample rate: Match `audioSampleRate` parameter - - Channels: Match `audioChannels` parameter - - Recommended chunk size: 32,000 bytes (1 second at 16kHz mono) - - - - **FLAC Compressed** - - Standard FLAC encoding - - Sample rate: Match `audioSampleRate` parameter - - Channels: Match `audioChannels` parameter - - Chunk size: Variable based on compression - - - - **μ-law Encoded** - - 8-bit μ-law encoding - - Typically used for telephony (8kHz) - - Sample rate: Match `audioSampleRate` parameter - - Channels: Match `audioChannels` parameter - - - - **Opus Compressed** - - Standard Opus encoding - - Supports browser-native formats (WebM/Opus) - - Sample rate: Match `audioSampleRate` parameter - - Channels: Match `audioChannels` parameter - - - -### Recommended Chunk Sizes - -For optimal real-time performance: - -| Sample Rate | Bit Depth | Channels | Duration | Chunk Size | -|-------------|-----------|----------|----------|------------| -| 16kHz | 16-bit | 1 (mono) | 1 second | 32,000 bytes | -| 16kHz | 16-bit | 1 (mono) | 2 seconds | 64,000 bytes | -| 8kHz | 16-bit | 1 (mono) | 1 second | 16,000 bytes | -| 44.1kHz | 16-bit | 1 (mono) | 1 second | 88,200 bytes | - -## Response Format - -The API returns JSON responses with the following structure: - -### Standard Response - -```json -{ - "text": "transcribed text here", - "isEndOfTurn": false -} -``` - -### Response Fields - - - The transcribed text content - - - - Indicates if this marks the end of a speech turn - - `true`: End of speech segment detected - - `false`: More speech expected - - -### Response Flow Examples - -#### 1. End of Turn Detection -```json -// Final result with end of turn -{ "text": "Hello, this is the end.", "isEndOfTurn": true } -``` - -### Voice Activity Events - -When `emitVoiceActivity=true`, additional events may be sent: - -```json -{ - "event": "voice_activity", - "speaking": true, - "timestamp": 1234567890 -} -``` - -## Error Responses - -Errors are sent as JSON before closing the connection: - -### Error Format - -```json -{ - "message": "error message", - "error": "detailed error info" -} -``` - -### Common Error Types - -#### Parameter Validation Errors - - -```json -{ - "message": "Invalid input data", - "error": "audioSampleRate must be at least 8000" -} -``` -**Cause:** Parameter value outside allowed range -**Solution:** Check parameter constraints and adjust values - - - -```json -{ - "message": "Invalid input data", - "error": "audioLanguage must be one of the following values: en, hi" -} -``` -**Cause:** Invalid enum value provided -**Solution:** Use only supported enum values - - - -```json -{ - "message": "Invalid input data", - "error": "audioEncoding is required" -} -``` -**Cause:** Required parameter not provided -**Solution:** Include all required parameters - - -#### Authentication Errors - - -```json -{ - "message": "Unauthorized", - "error": "Invalid or missing API key" -} -``` -**Cause:** Invalid, missing, or malformed API key -**Solution:** Verify API key format and validity - - - -```json -{ - "message": "No subscription", - "error": "ASR requires Enterprise plan" -} -``` -**Cause:** Account lacks required Enterprise subscription -**Solution:** Upgrade to Enterprise Monthly or Enterprise Yearly - - - -```json -{ - "message": "Insufficient credits", - "error": "Account balance too low" -} -``` -**Cause:** Account balance insufficient for operation -**Solution:** Add credits to account or upgrade plan - - -#### Connection Errors - - -```json -{ - "message": "socket timeout" -} -``` -**Cause:** No audio received for 30 seconds -**Solution:** Ensure continuous audio streaming or implement keep-alive - - - -```json -{ - "message": "Rate limit exceeded", - "error": "Too many concurrent connections" -} -``` -**Cause:** Exceeded concurrent connection limits -**Solution:** Implement connection pooling and respect rate limits - - -## Rate Limits & Quotas - -| Limit Type | Enterprise Plan | Notes | -|------------|-----------------|-------| -| Connection Timeout | 30 seconds | If no audio received | -| Concurrent Connections | Plan-dependent | Contact support for limits | -| Audio Duration | Plan-dependent | Based on subscription tier | -| API Rate | Plan-dependent | Requests per minute limit | diff --git a/fern/products/waves/versions/v4.0.0/api-references/authentication.mdx b/fern/products/waves/versions/v4.0.0/api-references/authentication.mdx index 11d559d..f69f4a7 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/authentication.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/authentication.mdx @@ -4,13 +4,9 @@ description: Learn how to authenticate requests using API keys. icon: key --- -# Authentication - -Our API requires authentication using API keys to ensure secure access. - ## Obtaining Your API Key -To access our API, you need an API key. You can find your API key by visiting [Smallest AI Console](https://console.smallest.ai/apikeys). +To access our API, you need an API key. You can generate your API key by visiting the [Smallest AI Console](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=api-references). ## Using API Keys @@ -31,4 +27,4 @@ Authorization: Bearer YOUR_API_KEY_HERE - **Use environment variables**: Store API keys securely instead of hardcoding them in your source code. - **Monitor API usage**: Keep track of your API calls to detect any unauthorized access. -For more details, visit our [API Documentation](/v4.0.0/content/getting-started/authentication#model-overview). +For more details, visit our [API Documentation](/waves/documentation/getting-started/authentication#model-overview). diff --git a/fern/products/waves/versions/v4.0.0/api-references/concurrency-and-limits.mdx b/fern/products/waves/versions/v4.0.0/api-references/concurrency-and-limits.mdx index 054ed2e..fc18626 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/concurrency-and-limits.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/concurrency-and-limits.mdx @@ -13,7 +13,7 @@ Waves API implements concurrency limits to ensure fair usage and optimal perform **Concurrency** refers to the number of simultaneous requests that can be processed at any given moment. In the context of Waves API: - **1 TTS request concurrency**: Only 1 Text-to-Speech request can be actively processed at a time per account -- This applies to all TTS endpoints including Lightning, Lightning v2, Lightning Large, and streaming variants +- This applies to all TTS endpoints including Lightning v2, Lightning v3.1, and streaming variants ## How Concurrency Works @@ -39,7 +39,7 @@ Check your usage patterns in the Waves dashboard to: - Identify peak usage times - Plan capacity requirements -Link to dashboard: https://waves.smallest.ai/developers/usage +Link to dashboard: https://app.smallest.ai/waves/developers/usage?utm_source=documentation&utm_medium=api-references ## Parallel Conversational Bots diff --git a/fern/products/waves/versions/v4.0.0/api-references/delete-cloned-voice.mdx b/fern/products/waves/versions/v4.0.0/api-references/delete-cloned-voice.mdx index 29b3853..a9d01e8 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/delete-cloned-voice.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/delete-cloned-voice.mdx @@ -1,6 +1,6 @@ --- title: 'Delete Cloned Voice' description: 'Delete a cloned voice using the new Waves API.' -openapi: 'DELETE /api/v1/lightning-large' +openapi: 'DELETE /waves/v1/lightning-large' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v4.0.0/api-references/get-cloned-voices-api.mdx b/fern/products/waves/versions/v4.0.0/api-references/get-cloned-voices-api.mdx index 6658398..397899e 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/get-cloned-voices-api.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/get-cloned-voices-api.mdx @@ -1,6 +1,6 @@ --- title: "Get your cloned Voices" description: "Retrieve your cloned voices." -openapi: "GET /api/v1/lightning-large/get_cloned_voices" +openapi: "GET /waves/v1/lightning-large/get_cloned_voices" hideApiMarker: False --- \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/api-references/get-voices-api.mdx b/fern/products/waves/versions/v4.0.0/api-references/get-voices-api.mdx index 3b25253..6491278 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/get-voices-api.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/get-voices-api.mdx @@ -1,6 +1,6 @@ --- title: "Get Voices" description: "Get voices supported for a given model using the new Waves API." -openapi: "GET /api/v1/{model}/get_voices" +openapi: "GET /waves/v1/{model}/get_voices" hideApiMarker: False --- \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-asr-ws.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-asr-ws.mdx index 467a05b..586c440 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-asr-ws.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-asr-ws.mdx @@ -1,7 +1,7 @@ --- title: "Lightning (Realtime)" description: The Lightning STT WebSocket API provides real-time speech-to-text transcription capabilities with streaming audio input. This API uses WebSocket to deliver transcription results as audio is processed, enabling low-latency transcription without waiting for the entire audio file to upload. Perfect for live transcription, voice assistants, and real-time communication systems that require immediate speech recognition. Supports multiple languages, word-level timestamps, sentence-level timestamps (utterances), PII and PCI redaction, cumulative transcripts, and more advanced features. -asyncapi: "/asyncapi-spec/lightning-asr-ws.json /api/v1/lightning/get_text" +asyncapi: "/asyncapi-spec/lightning-asr-ws.json /waves/v1/lightning/get_text" --- ## Query Parameters @@ -31,7 +31,6 @@ The WebSocket connection accepts the following query parameters: | `redact_pii` | string | `false` | Redact personally identifiable information (name, surname, address). Options: `true`, `false` | | `redact_pci` | string | `false` | Redact payment card information (credit card, CVV, zip, account number). Options: `true`, `false` | | `numerals` | string | `auto` | "Convert spoken numerals into digit form (e.g., 'twenty five' to '25') and `auto` enables automatic detection based on context. Options: `true`, `false`, `auto` | -| `keywords` | list | None | List of keywords to boost during transcription. Each keyword is formatted as 'word:weight' where weight is a positive number indicating boost intensity (e.g., ['word:5.0', 'name:4.0']). Higher weights increase the likelihood of the keyword being recognized. Recommended weight range is 1 to 10; extremely high values may degrade transcription accuracy. | | `diarize` | string | `false` | Enable speaker diarization to identify and label different speakers in the audio. When enabled, each word in the transcription includes `speaker` (integer ID) and `speaker_confidence` (float 0-1) fields. Options: `true`, `false` | ### Webhook Configuration @@ -41,7 +40,7 @@ The WebSocket connection accepts the following query parameters: ### Example Connection URL ```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); +const url = new URL("wss://api.smallest.ai/waves/v1/lightning/get_text"); url.searchParams.append("language", "en"); url.searchParams.append("encoding", "linear16"); url.searchParams.append("sample_rate", "16000"); @@ -51,7 +50,6 @@ url.searchParams.append("sentence_timestamps", "true"); url.searchParams.append("redact_pii", "true"); url.searchParams.append("redact_pci", "true"); url.searchParams.append("numerals", "true"); -url.searchParams.append("keywords", JSON.stringify(["product:5.0"])); url.searchParams.append("diarize", "true"); const ws = new WebSocket(url.toString(), { @@ -74,11 +72,11 @@ ws.send(audioChunk); ### End Signal (JSON) -Signal the end of audio stream: +Signal the end of audio stream. This is used to flush the transcription and receive the final response with `is_last=true`: ```json { - "type": "end" + "type": "finalize" } ``` @@ -194,9 +192,9 @@ import websockets import librosa from urllib.parse import urlencode -BASE_WS_URL = "wss://waves-api.smallest.ai/api/v1/lightning/get_text" +BASE_WS_URL = "wss://api.smallest.ai/waves/v1/lightning/get_text" -async def stream_audio(audio_file, api_key, language="en", encoding="linear16", sample_rate=16000, word_timestamps="true", full_transcript="false", sentence_timestamps="false", redact_pii="false", redact_pci="false", numerals="auto", keywords="auto", diarize="false"): +async def stream_audio(audio_file, api_key, language="en", encoding="linear16", sample_rate=16000, word_timestamps="true", full_transcript="false", sentence_timestamps="false", redact_pii="false", redact_pci="false", numerals="auto", diarize="false"): params = { "language": language, "encoding": encoding, @@ -207,7 +205,6 @@ async def stream_audio(audio_file, api_key, language="en", encoding="linear16", "redact_pii": redact_pii, "redact_pci": redact_pci, "numerals": numerals, - "keywords": keywords, "diarize": diarize } ws_url = f"{BASE_WS_URL}?{urlencode(params)}" @@ -224,7 +221,7 @@ async def stream_audio(audio_file, api_key, language="en", encoding="linear16", await ws.send((chunk * 32768.0).astype(np.int16).tobytes()) await asyncio.sleep(len(chunk) / sample_rate) - await ws.send(json.dumps({"type": "end"})) + await ws.send(json.dumps({"type": "finalize"})) sender = asyncio.create_task(send()) @@ -249,7 +246,6 @@ if __name__ == "__main__": parser.add_argument("--redact-pii", default="false") parser.add_argument("--redact-pci", default="false") parser.add_argument("--numerals", default="auto") - parser.add_argument("--keywords", default=None) parser.add_argument("--diarize", default="false") args = parser.parse_args() @@ -265,7 +261,6 @@ if __name__ == "__main__": args.redact_pii, args.redact_pci, args.numerals, - args.keywords, args.diarize )) ``` diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-asr.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-asr.mdx index c6fd33b..be3d5a1 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-asr.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-asr.mdx @@ -1,7 +1,7 @@ --- title: "Lightning (Pre-Recorded)" description: "Convert speech to text using file upload with the Lightning STT POST API" -openapi: "POST /api/v1/lightning/get_text" +openapi: "POST /waves/v1/lightning/get_text" --- The STT POST API allows you to convert speech to text using two different input methods: @@ -36,7 +36,7 @@ Choose the input method that best fits your use case: ```bash cURL - Raw Bytes curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ + --url "https://api.smallest.ai/waves/v1/lightning/get_text?language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ --header 'Authorization: Bearer ' \ --header 'Content-Type: audio/wav' \ --data-binary '@/path/to/your/audio.wav' @@ -45,13 +45,12 @@ curl --request POST \ ```python Python - Raw Bytes import requests -url = "https://waves-api.smallest.ai/api/v1/lightning/get_text" +url = "https://api.smallest.ai/waves/v1/lightning/get_text" headers = { "Authorization": "Bearer YOUR_API_KEY", "Content-Type": "audio/wav" } params = { - "model": "lightning", "language": "en", "word_timestamps": "true", "diarize": "true", @@ -73,7 +72,6 @@ const audioFile = await fetch("/path/to/audio.wav"); const audioBuffer = await audioFile.arrayBuffer(); const params = new URLSearchParams({ - model: "lightning", language: "en", word_timestamps: "true", diarize: "true", @@ -83,7 +81,7 @@ const params = new URLSearchParams({ }); const response = await fetch( - `https://waves-api.smallest.ai/api/v1/lightning/get_text?${params}`, + `https://api.smallest.ai/waves/v1/lightning/get_text?${params}`, { method: "POST", headers: { @@ -106,7 +104,7 @@ console.log("Transcription:", result.transcription); ```bash cURL - Audio URL curl --request POST \ - --url "https://waves-api.smallest.ai/api/v1/lightning/get_text?model=lightning&language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ + --url "https://api.smallest.ai/waves/v1/lightning/get_text?language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ --header 'Authorization: Bearer ' \ --header 'Content-Type: application/json' \ --data '{ @@ -118,13 +116,12 @@ curl --request POST \ import requests import json -url = "https://waves-api.smallest.ai/api/v1/lightning/get_text" +url = "https://api.smallest.ai/waves/v1/lightning/get_text" headers = { "Authorization": "Bearer YOUR_API_KEY", "Content-Type": "application/json" } params = { - "model": "lightning", "language": "en", "word_timestamps": "true", "diarize": "true", @@ -143,7 +140,6 @@ print(f"Transcription: {result['transcription']}") ```javascript JavaScript - Audio URL const params = new URLSearchParams({ - model: "lightning", language: "en", word_timestamps: "true", diarize: "true", @@ -157,7 +153,7 @@ const payload = { }; const response = await fetch( - `https://waves-api.smallest.ai/api/v1/lightning/get_text?${params}`, + `https://api.smallest.ai/waves/v1/lightning/get_text?${params}`, { method: "POST", headers: { @@ -178,7 +174,7 @@ console.log("Transcription:", result.transcription); The Lightning STT model supports **automatic language detection** and transcription across **30+ languages**. -For the full list of supported languages, please check [**STT Supported Languages**](/v4.0.0/content/getting-started/models#model-overview-stt). +For the full list of supported languages, please check [**STT Supported Languages**](/waves/documentation/getting-started/models#model-overview-stt). Specify the **language** of the input audio using its [ISO diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-large-stream.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-large-stream.mdx index 54008dc..e314d6e 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-large-stream.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-large-stream.mdx @@ -2,7 +2,7 @@ title: 'Text to Speech' sidebarTitle: 'Text to Speech (SSE)' description: 'Stream speech for given text using the Lightning-Large SSE API' -openapi: 'POST /api/v1/lightning-large/stream' +openapi: 'POST /waves/v1/lightning-large/stream' --- ## Overview diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-large-ws.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-large-ws.mdx index b97812e..9660c2b 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-large-ws.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-large-ws.mdx @@ -15,7 +15,7 @@ Connect to the WebSocket endpoint: ```javascript const socket = new WebSocket( - 'wss://api.smallest.ai/api/v1/lightning-large/stream' + 'wss://api.smallest.ai/waves/v1/lightning-large/stream' ); ``` @@ -25,7 +25,7 @@ Authentication is required. Include your API key in the connection headers: ```javascript const socket = new WebSocket( - 'wss://api.smallest.ai/api/v1/lightning-large/stream', + 'wss://api.smallest.ai/waves/v1/lightning-large/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -110,7 +110,7 @@ If an error occurs: const WebSocket = require('ws'); const ws = new WebSocket( - 'wss://waves-api.smallest.ai/api/v1/lightning-large/get_speech/stream', + 'wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -164,7 +164,7 @@ import json import base64 API_KEY = "YOUR_API_KEY" -WS_URL = "wss://waves-api.smallest.ai/api/v1/lightning-large/get_speech/stream" +WS_URL = "wss://api.smallest.ai/waves/v1/lightning-large/get_speech/stream" async def text_to_speech(): async with websockets.connect(WS_URL, extra_headers={"Authorization": f"Bearer {API_KEY}"}) as ws: diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-large.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-large.mdx index 69784e5..d4fc470 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-large.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-large.mdx @@ -1,6 +1,6 @@ --- title: 'Text to Speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning-large/get_speech' +openapi: 'POST /waves/v1/lightning-large/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-tts-ws.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-tts-ws.mdx index 3cfeb08..bfb2538 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-tts-ws.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-tts-ws.mdx @@ -15,7 +15,7 @@ Connect to the SSE endpoint: ```javascript const eventSource = new EventSource( - 'https://api.smallest.ai/api/v1/lightning/stream', + 'https://api.smallest.ai/waves/v1/lightning/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' @@ -50,7 +50,7 @@ Send a POST request with the following JSON structure: ```javascript const eventSource = new EventSource( - 'https://api.smallest.ai/api/v1/lightning/stream', + 'https://api.smallest.ai/waves/v1/lightning/stream', { headers: { Authorization: 'Bearer YOUR_API_KEY' diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-v2-stream.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-v2-stream.mdx index 900d62e..0285393 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-v2-stream.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-v2-stream.mdx @@ -1,8 +1,8 @@ --- -title: "Lightning V2 SSE" -sidebarTitle: "Lightning V2 SSE" +title: "Lightning v2 SSE" +sidebarTitle: "Lightning v2 SSE" description: "Stream speech for given text using the Lightning v2 SSE API" -openapi: "POST /api/v1/lightning-v2/stream" +openapi: "POST /waves/v1/lightning-v2/stream" --- ## Overview diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-v2-ws.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-v2-ws.mdx index 6028c77..936a720 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-v2-ws.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-v2-ws.mdx @@ -1,7 +1,7 @@ --- -title: "Lightning V2 WebSocket" +title: "Lightning v2 WebSocket" description: The Lightning v2 WebSocket API provides real-time text-to-speech streaming capabilities with high-quality voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. For an end-to-end example of how to use the Lightning v2 WebSocket API, check out [Text to Speech (WS) Example](https://github.com/smallest-inc/waves-examples/tree/main/lightning_v2/ws_streaming) -asyncapi: "/asyncapi-spec/lightning-v2-ws.json /api/v1/lightning-v2/get_speech/stream" +asyncapi: "/asyncapi-spec/lightning-v2-ws.json /waves/v1/lightning-v2/get_speech/stream" --- ## Overview @@ -41,7 +41,7 @@ This WebSocket API is subject to concurrency limits to ensure optimal performanc While you can maintain multiple WebSocket connections, only your concurrency limit number of requests can be actively processed at once. Additional requests sent through any connection while at the concurrency limit will be rejected with an error. -For detailed information about concurrency limits, rate limiting, and best practices for handling these constraints, see our [Concurrency and Limits](/api-references/concurrency-and-limits) documentation. +For detailed information about concurrency limits, rate limiting, and best practices for handling these constraints, see our [Concurrency and Limits](concurrency-and-limits) documentation. When multiple requests are sent simultaneously beyond your concurrency limit, diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-v2.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-v2.mdx index 7613c19..17dca03 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning-v2.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-v2.mdx @@ -1,6 +1,6 @@ --- -title: "Lightning V2" +title: "Lightning v2" description: "Get speech for given text using the Waves API" -openapi: "POST /api/v1/lightning-v2/get_speech" +openapi: "POST /waves/v1/lightning-v2/get_speech" hideApiMarker: False --- diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1-stream.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1-stream.mdx new file mode 100644 index 0000000..eac93ac --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1-stream.mdx @@ -0,0 +1,26 @@ +--- +title: "Lightning v3.1 SSE" +sidebarTitle: "Lightning v3.1 SSE" +description: "Stream speech for given text using the Lightning v3.1 SSE API" +openapi: "POST /waves/v1/lightning-v3.1/stream" +--- + +## Overview + +The Lightning v3.1 SSE API provides real-time text-to-speech streaming capabilities with natural, expressive voice synthesis. This API uses Server-Sent Events (SSE) to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. + +Lightning v3.1 is a 44 kHz model that produces natural, expressive, and realistic speech, with support for voice cloning. + +## When to Use + +- **Interactive Applications**: Perfect for chatbots, virtual assistants, and other applications requiring immediate voice responses +- **Long-Form Content**: Efficiently stream audio for articles, stories, or other long-form content without buffering delays +- **Voice User Interfaces**: Create natural-sounding voice interfaces with minimal perceived latency +- **Accessibility Solutions**: Provide real-time audio versions of written content for users with visual impairments + +## How It Works + +1. **Make a POST Request**: Send your text and voice settings to the API endpoint +2. **Receive Audio Chunks**: The API processes your text and streams audio back as base64-encoded chunks with 1024 byte size +3. **Process the Stream**: Handle the SSE events to decode and play audio chunks sequentially +4. **End of Stream**: The API sends a completion event when all audio has been delivered diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1-ws.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1-ws.mdx new file mode 100644 index 0000000..b8e7503 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1-ws.mdx @@ -0,0 +1,53 @@ +--- +title: "Lightning v3.1 WebSocket" +description: The Lightning v3.1 WebSocket API provides real-time text-to-speech streaming capabilities with natural, expressive voice synthesis. This API uses WebSocket to deliver audio chunks as they're generated, enabling low-latency audio playback without waiting for the entire audio file to process. +asyncapi: "/asyncapi-spec/lightning-v3.1-ws.json /waves/v1/lightning-v3.1/get_speech/stream" +--- + +## Overview + +The Lightning v3.1 WebSocket API delivers state-of-the-art text-to-speech synthesis through a persistent WebSocket connection, providing ultra-low latency audio streaming for applications that demand real-time voice generation. Unlike traditional HTTP-based TTS APIs, this WebSocket implementation streams audio chunks as they're processed, significantly reducing perceived latency and enabling seamless user experiences. + +Lightning v3.1 is a 44 kHz model that produces natural, expressive, and realistic speech, with support for voice cloning. + +## Key Benefits + +- **Ultra-Low Latency**: Audio chunks delivered as soon as they're generated +- **Real-Time Streaming**: Continuous audio delivery without waiting for complete text processing +- **Natural Speech**: Expressive synthesis that sounds realistic +- **Voice Cloning Support**: Compatible with cloned voices +- **Persistent Connection**: Maintains connection for multiple requests, reducing connection overhead +- **Interactive Applications**: Perfect for chatbots, voice assistants, and live communication systems + +## Use Cases + +- **Voice Assistants**: Real-time response generation for conversational AI +- **Interactive Chatbots**: Immediate audio feedback for user interactions +- **Live Streaming**: Real-time narration and commentary +- **Accessibility Tools**: Screen readers and text-to-speech applications +- **Gaming**: Dynamic voice generation for characters and narration +- **Customer Service**: Automated voice responses with natural speech patterns + +## Concurrency and Rate Limits + +This WebSocket API is subject to concurrency limits to ensure optimal performance for all users. Here's how it works: + +- **1 Concurrency Unit** = 1 active TTS request that can be processed at any given time +- **5 WebSocket Connections** can be established per concurrency unit +- **Total Connections** = Your concurrency limit × 5 + +**Examples:** + +- **1 concurrency** = Up to 5 WebSocket connections, but only 1 active request +- **3 concurrency** = Up to 15 WebSocket connections, but only 3 active requests simultaneously +- **5 concurrency** = Up to 25 WebSocket connections, but only 5 active requests simultaneously + +While you can maintain multiple WebSocket connections, only your concurrency limit number of requests can be actively processed at once. Additional requests sent through any connection while at the concurrency limit will be rejected with an error. + +For detailed information about concurrency limits, rate limiting, and best practices for handling these constraints, see our [Concurrency and Limits](concurrency-and-limits) documentation. + + + When multiple requests are sent simultaneously beyond your concurrency limit, + additional requests will be rejected with an error. Implement proper error + handling and request queuing to manage concurrency effectively. + diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1.mdx new file mode 100644 index 0000000..69d72ef --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning-v3.1.mdx @@ -0,0 +1,19 @@ +--- +title: "Lightning v3.1" +description: "Get speech for given text using the Lightning v3.1 model" +openapi: "POST /waves/v1/lightning-v3.1/get_speech" +hideApiMarker: False +--- + +## Overview + +Lightning v3.1 is a 44 kHz text-to-speech model that delivers natural, expressive, and realistic speech synthesis. + +## Key Features + +- **Voice Cloning Support**: Compatible with cloned voices +- **Ultra-Low Latency**: Optimized for real-time applications +- **Multi-Language**: Supports English (en) and Hindi (hi) +- **Multiple Output Formats**: PCM, MP3, WAV, and mulaw +- **Flexible Sample Rates**: 8000 Hz to 44100 Hz +- **Speed Control**: Adjustable from 0.5x to 2x speed diff --git a/fern/products/waves/versions/v4.0.0/api-references/lightning.mdx b/fern/products/waves/versions/v4.0.0/api-references/lightning.mdx index ca23c1e..34c162f 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/lightning.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/lightning.mdx @@ -2,6 +2,6 @@ title: 'Text to speech' sidebarTitle: 'Text to speech' description: 'Get speech for given text using the Waves API' -openapi: 'POST /api/v1/lightning/get_speech' +openapi: 'POST /waves/v1/lightning/get_speech' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/create.mdx b/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/create.mdx index 4cdbc04..59f0b72 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/create.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/create.mdx @@ -2,6 +2,6 @@ title: 'Create Pronunciations Dict' sidebarTitle: 'Create' description: 'Create pronunciations dicts using the Waves API' -openapi: 'POST /api/v1/pronunciation-dicts' +openapi: 'POST /waves/v1/pronunciation-dicts' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/delete.mdx b/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/delete.mdx index 18c2db7..d09435c 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/delete.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/delete.mdx @@ -2,6 +2,6 @@ title: 'Delete Pronunciations Dict' sidebarTitle: 'Delete' description: 'Delete pronunciations dicts using the Waves API' -openapi: 'DELETE /api/v1/pronunciation-dicts' +openapi: 'DELETE /waves/v1/pronunciation-dicts' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/list.mdx b/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/list.mdx index bae70e7..56b89bf 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/list.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/list.mdx @@ -2,6 +2,6 @@ title: 'List Pronunciations Dicts' sidebarTitle: 'List' description: 'Get pronunciations dicts using the Waves API' -openapi: 'GET /api/v1/pronunciation-dicts' +openapi: 'GET /waves/v1/pronunciation-dicts' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/update.mdx b/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/update.mdx index 3b8b420..58de225 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/update.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/pronunciations-dict/update.mdx @@ -2,6 +2,6 @@ title: 'Update Pronunciations Dict' sidebarTitle: 'Update' description: 'Update pronunciations dicts using the Waves API' -openapi: 'PUT /api/v1/pronunciation-dicts' +openapi: 'PUT /waves/v1/pronunciation-dicts' hideApiMarker: False --- diff --git a/fern/products/waves/versions/v4.0.0/api-references/pulse-stt-ws.mdx b/fern/products/waves/versions/v4.0.0/api-references/pulse-stt-ws.mdx new file mode 100644 index 0000000..12aaa1f --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/api-references/pulse-stt-ws.mdx @@ -0,0 +1,267 @@ +--- +title: "Pulse (Realtime)" +description: The Pulse STT WebSocket API provides real-time speech-to-text transcription capabilities with streaming audio input. This API uses WebSocket to deliver transcription results as audio is processed, enabling low-latency transcription without waiting for the entire audio file to upload. Perfect for live transcription, voice assistants, and real-time communication systems that require immediate speech recognition. Supports multiple languages, word-level timestamps, sentence-level timestamps (utterances), PII and PCI redaction, cumulative transcripts, and more advanced features. +asyncapi: "/asyncapi-spec/pulse-stt-ws.json /waves/v1/pulse/get_text" +--- + +## Query Parameters + +The WebSocket connection accepts the following query parameters: + +### Audio Configuration + +| Parameter | Type | Default | Description | +| ------------- | ------ | ---------- | ------------------------------------------------------------------------------------- | +| `encoding` | string | `linear16` | Audio encoding format. Options: `linear16`, `linear32`, `alaw`, `mulaw`, `opus`, `ogg_opus` | +| `sample_rate` | string | `16000` | Audio sample rate in Hz. Options: `8000`, `16000`, `22050`, `24000`, `44100`, `48000` | + +### Language & Detection + +| Parameter | Type | Default | Description | +| ---------- | ------ | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `language` | string | `en` | Language code for transcription. Use `multi` for automatic language detection. Supported: `it`, `es`, `en`, `pt`, `hi`, `de`, `fr`, `uk`, `ru`, `kn`, `ml`, `pl`, `mr`, `gu`, `cs`, `sk`, `te`, `or`, `nl`, `bn`, `lv`, `et`, `ro`, `pa`, `fi`, `sv`, `bg`, `ta`, `hu`, `da`, `lt`, `mt`, `multi` | + +### Feature Flags + +| Parameter | Type | Default | Description | +| --------------------- | ------ | ------- | ------------------------------------------------------------------------ | +| `word_timestamps` | string | `true` | Include word-level timestamps in transcription. Options: `true`, `false` | +| `full_transcript` | string | `false` | Include cumulative transcript received till now in responses where `is_final` is `true`. Options: `true`, `false` | +| `sentence_timestamps` | string | `false` | Include sentence-level timestamps (utterances) in transcription. Options: `true`, `false` | +| `redact_pii` | string | `false` | Redact personally identifiable information (name, surname, address). Options: `true`, `false` | +| `redact_pci` | string | `false` | Redact payment card information (credit card, CVV, zip, account number). Options: `true`, `false` | +| `numerals` | string | `auto` | "Convert spoken numerals into digit form (e.g., 'twenty five' to '25') and `auto` enables automatic detection based on context. Options: `true`, `false`, `auto` | +| `diarize` | string | `false` | Enable speaker diarization to identify and label different speakers in the audio. When enabled, each word in the transcription includes `speaker` (integer ID) and `speaker_confidence` (float 0-1) fields. Options: `true`, `false` | + +### Webhook Configuration + +## Connection Flow + +### Example Connection URL + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("word_timestamps", "true"); +url.searchParams.append("full_transcript", "true"); +url.searchParams.append("sentence_timestamps", "true"); +url.searchParams.append("redact_pii", "true"); +url.searchParams.append("redact_pci", "true"); +url.searchParams.append("numerals", "true"); +url.searchParams.append("diarize", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Input Messages + +### Audio Data (Binary) + +Send raw audio bytes as binary WebSocket messages: + +```javascript +const audioChunk = new Uint8Array(4096); +ws.send(audioChunk); +``` + +### End Signal (JSON) + +Signal the end of audio stream. This is used to flush the transcription and receive the final response with `is_last=true`: + +```json +{ + "type": "finalize" +} +``` + +## Response Format + +The server responds with JSON messages containing transcription results: + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "Hello, how are you?", + "is_final": true, + "is_last": false, + "language": "en" +} +``` + +### Response Fields + +| Field | Type | Description | +| ----------------- | ------- | -------------------------------------------------------------------- | +| `session_id` | string | Unique identifier for the transcription session | +| `transcript` | string | Partial or complete transcription text for the current segment | +| `is_final` | boolean | Indicates if this is the final transcription for the current segment | +| `is_last` | boolean | Indicates if this is the last transcription in the session | +| `language` | string | Detected primary language code, returns only when `is_final=True` | +| `languages` | array | List of languages detected in the audio included in Responses where `is_final` is `true` | + +### Optional Response Fields (Based on Query Parameters) + +| Field | Type | When Included | Description | +| ----------------- | ------ | ------------------------ | ---------------------------------------------------------------------- | +| `full_transcript` | string | `full_transcript=true` AND `is_final=true` | Complete transcription text accumulated till now. Only present in responses when `full_transcript=true` query parameter is set AND `is_final=true` | +| `words` | array | `word_timestamps=true` | Word-level timestamps with `word`, `start`, `end`, and `confidence` fields. When `diarize=true`, also includes `speaker` and `speaker_confidence` fields | +| `utterances` | array | `sentence_timestamps=true` | Sentence-level timestamps with `text`, `start`, and `end` fields | +| `redacted_entities`| array | `redact_pii=true` or `redact_pci=true` | List of redacted entity placeholders (e.g., `[FIRSTNAME_1]`, `[CREDITCARDCVV_1]`) | + +### Example Response with All Features + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "[CREDITCARDCVV_1] and expiry [TIME_2].", + "is_final": true, + "is_last": true, + "full_transcript": "Hi, my name is [FIRSTNAME_1] [FIRSTNAME_2] You can reach me at [PHONENUMBER_1] and I paid using my Visa card [ZIPCODE_1] [ACCOUNTNUMBER_1] with [CREDITCARDCVV_1] and expiry [TIME_1].", + "language": "en", + "languages": ["en"], + "words": [ + { + "word": "[creditcardcvv_1]", + "start": 15.44, + "end": 17.36, + "confidence": 0.97, + "speaker": 0, + "speaker_confidence": 0.67 + }, + { + "word": "and", + "start": 18.0, + "end": 18.32, + "confidence": 0.94, + "speaker": 0, + "speaker_confidence": 0.76 + }, + { + "word": "expiry", + "start": 18.32, + "end": 19.2, + "confidence": 1.0, + "speaker": 0, + "speaker_confidence": 0.91 + }, + { + "word": "[time_2]", + "start": 19.2, + "end": 19.92, + "confidence": 0.91, + "speaker": 0, + "speaker_confidence": 0.82 + }, + ], + "utterances": [ + { + "text": "Hi, my name is Hans Miller.", + "start": 0.0, + "end": 2.64, + "speaker": 0 + }, + { + "text": "You can reach me at [PHONENUMBER_1], and I paid using my Visa card 4242 42424242 with CVV123 and expiry [TIME_1].", + "start": 2.64, + "end": 21.04, + "speaker": 0 + } + ], + "redacted_entities": [ + "[CREDITCARDCVV_1]", + "[TIME_2]" + ] +} +``` + +## Code Examples + + +```python python +import asyncio +import json +import argparse +import numpy as np +import websockets +import librosa +from urllib.parse import urlencode + +BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text" + +async def stream_audio(audio_file, api_key, language="en", encoding="linear16", sample_rate=16000, word_timestamps="true", full_transcript="false", sentence_timestamps="false", redact_pii="false", redact_pci="false", numerals="auto", diarize="false"): + params = { + "language": language, + "encoding": encoding, + "sample_rate": sample_rate, + "word_timestamps": word_timestamps, + "full_transcript": full_transcript, + "sentence_timestamps": sentence_timestamps, + "redact_pii": redact_pii, + "redact_pci": redact_pci, + "numerals": numerals, + "diarize": diarize + } + ws_url = f"{BASE_WS_URL}?{urlencode(params)}" + + async with websockets.connect(ws_url, additional_headers={"Authorization": f"Bearer {api_key}"}) as ws: + print(f"Connected: {ws_url}") + + async def send(): + audio, _ = librosa.load(audio_file, sr=sample_rate, mono=True) + chunk_size = int(0.160 * sample_rate) + + for i in range(0, len(audio), chunk_size): + chunk = audio[i:i + chunk_size] + await ws.send((chunk * 32768.0).astype(np.int16).tobytes()) + await asyncio.sleep(len(chunk) / sample_rate) + + await ws.send(json.dumps({"type": "finalize"})) + + sender = asyncio.create_task(send()) + + async for message in ws: + data = json.loads(message) + print("Received:", json.dumps(data, indent=2)) + if data.get("is_last"): + break + + await sender + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("audio_file", nargs="?", default="path/to/audio.wav") + parser.add_argument("--api-key", "-k", default="your_api_key_here") + parser.add_argument("--language", "-l", default="en") + parser.add_argument("--encoding", "-e", default="linear16") + parser.add_argument("--sample-rate", "-sr", type=int, default=16000) + parser.add_argument("--word-timestamps", "-wt", default="true") + parser.add_argument("--full-transcript", "-ft", default="false") + parser.add_argument("--sentence-timestamps", "-st", default="false") + parser.add_argument("--redact-pii", default="false") + parser.add_argument("--redact-pci", default="false") + parser.add_argument("--numerals", default="auto") + parser.add_argument("--diarize", default="false") + + args = parser.parse_args() + asyncio.run(stream_audio( + args.audio_file, + args.api_key, + args.language, + args.encoding, + args.sample_rate, + args.word_timestamps, + args.full_transcript, + args.sentence_timestamps, + args.redact_pii, + args.redact_pci, + args.numerals, + args.diarize + )) +``` + diff --git a/fern/products/waves/versions/v4.0.0/api-references/pulse-stt.mdx b/fern/products/waves/versions/v4.0.0/api-references/pulse-stt.mdx new file mode 100644 index 0000000..c9775e6 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/api-references/pulse-stt.mdx @@ -0,0 +1,184 @@ +--- +title: "Pulse (Pre-Recorded)" +description: "Convert speech to text using file upload with the Pulse STT POST API" +openapi: "POST /waves/v1/pulse/get_text" +--- + +The STT POST API allows you to convert speech to text using two different input methods: + +1. **Raw Audio Bytes** (`application/octet-stream`) - Send raw audio data with all parameters as query parameters +2. **Audio URL** (`application/json`) - Provide only a URL to an audio file in the JSON body, with all other parameters as query parameters + +Both methods use our Pulse STT model with automatic language detection across 30+ languages. + +## Authentication + +This endpoint requires authentication using a Bearer token in the Authorization header: + +```bash +Authorization: Bearer YOUR_API_KEY +``` + +## Input Methods + +Choose the input method that best fits your use case: + +| Method | Content Type | Use Case | Parameters | +| ------------- | -------------------------- | ------------------------------------------ | ---------------- | +| **Raw Bytes** | `application/octet-stream` | Streaming audio data, real-time processing | Query parameters | +| **Audio URL** | `application/json` | Remote audio files, webhook processing | Query parameters | + +## Code Examples + +### Method 1: Raw Audio Bytes (application/octet-stream) + + + +```bash cURL - Raw Bytes +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ + --header 'Authorization: Bearer ' \ + --header 'Content-Type: audio/wav' \ + --data-binary '@/path/to/your/audio.wav' +``` + +```python Python - Raw Bytes +import requests + +url = "https://api.smallest.ai/waves/v1/pulse/get_text" +headers = { + "Authorization": "Bearer YOUR_API_KEY", + "Content-Type": "audio/wav" +} +params = { + "language": "en", + "word_timestamps": "true", + "diarize": "true", + "age_detection": "true", + "gender_detection": "true", + "emotion_detection": "true" +} + +with open("path/to/your/audio.wav", "rb") as audio_file: + audio_data = audio_file.read() + +response = requests.post(url, headers=headers, params=params, data=audio_data) +result = response.json() +print(f"Transcription: {result['transcription']}") +``` + +```javascript JavaScript - Raw Bytes +const audioFile = await fetch("/path/to/audio.wav"); +const audioBuffer = await audioFile.arrayBuffer(); + +const params = new URLSearchParams({ + language: "en", + word_timestamps: "true", + diarize: "true", + age_detection: "true", + gender_detection: "true", + emotion_detection: "true", +}); + +const response = await fetch( + `https://api.smallest.ai/waves/v1/pulse/get_text?${params}`, + { + method: "POST", + headers: { + Authorization: "Bearer YOUR_API_KEY", + "Content-Type": "audio/wav", + }, + body: audioBuffer, + } +); + +const result = await response.json(); +console.log("Transcription:", result.transcription); +``` + + + +### Method 2: Audio URL (application/json) + + + +```bash cURL - Audio URL +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true&diarize=true&age_detection=true&gender_detection=true&emotion_detection=true" \ + --header 'Authorization: Bearer ' \ + --header 'Content-Type: application/json' \ + --data '{ + "url": "https://example.com/audio.mp3" + }' +``` + +```python Python - Audio URL +import requests +import json + +url = "https://api.smallest.ai/waves/v1/pulse/get_text" +headers = { + "Authorization": "Bearer YOUR_API_KEY", + "Content-Type": "application/json" +} +params = { + "language": "en", + "word_timestamps": "true", + "diarize": "true", + "age_detection": "true", + "gender_detection": "true", + "emotion_detection": "true" +} +payload = { + "url": "https://example.com/audio.mp3" +} + +response = requests.post(url, headers=headers, params=params, data=json.dumps(payload)) +result = response.json() +print(f"Transcription: {result['transcription']}") +``` + +```javascript JavaScript - Audio URL +const params = new URLSearchParams({ + language: "en", + word_timestamps: "true", + diarize: "true", + age_detection: "true", + gender_detection: "true", + emotion_detection: "true", +}); + +const payload = { + url: "https://example.com/audio.mp3", +}; + +const response = await fetch( + `https://api.smallest.ai/waves/v1/pulse/get_text?${params}`, + { + method: "POST", + headers: { + Authorization: "Bearer YOUR_API_KEY", + "Content-Type": "application/json", + }, + body: JSON.stringify(payload), + } +); + +const result = await response.json(); +console.log("Transcription:", result.transcription); +``` + + + +## Supported Languages + +The Pulse STT model supports **automatic language detection** and transcription across **30+ languages**. + +For the full list of supported languages, please check [**STT Supported Languages**](/waves/documentation/getting-started/models#model-overview-stt). + + + Specify the **language** of the input audio using its [ISO + 639-1](https://en.wikipedia.org/wiki/ISO_639-1) code. Use **`multi`** to + enable automatic language detection from the supported list. The default is + **`en`** (English). + diff --git a/fern/products/waves/versions/v4.0.0/api-references/websocket.mdx b/fern/products/waves/versions/v4.0.0/api-references/websocket.mdx index ae738c7..bfb415e 100644 --- a/fern/products/waves/versions/v4.0.0/api-references/websocket.mdx +++ b/fern/products/waves/versions/v4.0.0/api-references/websocket.mdx @@ -1,11 +1,11 @@ --- -title: WebSocket Support for TTS API +title: WebSocket Support for Text to Speech (TTS) API sidebarTitle: WebSocket description: Learn about WebSocket support for our Text-to-Speech (TTS) API, how it works, and when to use it. icon: arrow-down-wide-short --- -Our Text-to-Speech (TTS) API supports WebSocket communication, providing a real-time, low-latency streaming experience for applications that require instant speech synthesis. WebSockets allow continuous data exchange, making them ideal for use cases that demand uninterrupted audio generation. +Our Text to Speech (TTS) API supports WebSocket communication, providing a real-time, low-latency streaming experience for applications that require instant speech synthesis. WebSockets allow continuous data exchange, making them ideal for use cases that demand uninterrupted audio generation. --- @@ -46,7 +46,7 @@ To support longer sessions for use cases where clients need more time (e.g., lon You can include the `timeout` parameter in the WebSocket URL like so: ```link -wss://waves-api.smallest.ai/api/v1/lightning-v2/get_speech/stream?timeout=60 +wss://api.smallest.ai/waves/v1/lightning-v3.1/get_speech/stream?timeout=60 ``` @@ -103,4 +103,4 @@ The WebSocket TTS API is optimized to handle real-time text-to-speech conversion } ``` -For implementation details, check our [WebSocket API documentation](/v3.0.1/content/api-references/lightning-v2-ws). +For implementation details, check our [WebSocket API documentation](/waves/documentation/api-references/lightning-v3.1-ws). diff --git a/fern/products/waves/versions/v4.0.0/best-practices/tts-best-practices.mdx b/fern/products/waves/versions/v4.0.0/best-practices/tts-best-practices.mdx index e09c166..0a88271 100644 --- a/fern/products/waves/versions/v4.0.0/best-practices/tts-best-practices.mdx +++ b/fern/products/waves/versions/v4.0.0/best-practices/tts-best-practices.mdx @@ -1,5 +1,5 @@ --- -title: "Text to Speech - Best Practices" +title: "Text to Speech (TTS) - Best Practices" description: "Learn best practices for text formatting for optimal Audio Generation." icon: "comment-dots" --- diff --git a/fern/products/waves/versions/v4.0.0/best-practices/vc-best-practices.mdx b/fern/products/waves/versions/v4.0.0/best-practices/vc-best-practices.mdx index 115b010..538529e 100644 --- a/fern/products/waves/versions/v4.0.0/best-practices/vc-best-practices.mdx +++ b/fern/products/waves/versions/v4.0.0/best-practices/vc-best-practices.mdx @@ -6,7 +6,7 @@ icon: "clone" To achieve the best results when cloning your voice, it's essential to provide high-quality reference audio. Below are some best practices, dos and don'ts, and examples to guide you. -Ready to Clone Your Voice? Try it out on our platform [waves.smallest.ai](https://waves.smallest.ai/studio/create) +Ready to Clone Your Voice? Try it out on our [platform](https://app.smallest.ai/waves/voice-cloning?utm_source=documentation&utm_medium=best-practices) --- @@ -36,24 +36,24 @@ Ready to Clone Your Voice? Try it out on our platform [waves.smallest.ai](https: - High-quality, clear, and consistent tone. ### Bad Reference Audio 1. **With Background Noise** 2. **Inconsistent Speaking Style** 3. **Overlapping Voices** --- @@ -81,35 +81,35 @@ Our platform supports emotional reference audio, meaning the emotions, pitch or ### Angry Tone - **Reference Audio Sample**: - **Output Audio Example**: ### Silent Tone - **Reference Audio Sample**: - **Output Audio Example**: ### Fast-Paced Tone - **Reference Audio Sample**: - **Output Audio Example**: diff --git a/fern/products/waves/versions/v4.0.0/changelog/announcements.mdx b/fern/products/waves/versions/v4.0.0/changelog/announcements.mdx index af82811..3a49ae3 100644 --- a/fern/products/waves/versions/v4.0.0/changelog/announcements.mdx +++ b/fern/products/waves/versions/v4.0.0/changelog/announcements.mdx @@ -4,6 +4,20 @@ description: "New updates and improvements from Smallest AI." mode: "center" --- + + ## Introducing Lightning v3.1 + + We are excited to announce the release of Lightning v3.1, our most natural-sounding text-to-speech model yet. Key features include: + + - **Natural, Expressive Speech**: A 44 kHz model that sounds realistic. + - **Ultra-Low Latency**: Optimized for real-time applications. + - **Voice Cloning Support**: Full compatibility with cloned voices. + - **Multiple Output Formats**: PCM, MP3, WAV, and mulaw. + - **Language Support**: English, Hindi, Tamil, and Spanish. + + Experience the new capabilities of Lightning v3.1 on our [platform](https://app.smallest.ai/waves/studio/create?utm_source=documentation&utm_medium=changelog) or via the [API](/waves/documentation/api-references/lightning-v3.1). + + ## Introducing Lightning v2 @@ -13,7 +27,7 @@ mode: "center" - **100ms TTFB**: Superfast and scalable to support your realtime applications. - **0.05 per 10K characters**: 3x cheaper than other providers. - Experience the new capabilities of Lightning v2 on our [Platform](https://waves.smallest.ai). + Experience the new capabilities of Lightning v2 on our [Platform](https://app.smallest.ai/waves/studio/create). @@ -25,7 +39,7 @@ mode: "center" - **Versatile Applications**: Ideal for global applications requiring diverse language support. - **Beta Stage**: Currently in beta, with ongoing improvements and updates. - Experience the new capabilities of Lightning Multilingual on our [Platform](https://waves.smallest.ai). + Experience the new capabilities of Lightning Multilingual on our [Platform](https://app.smallest.ai/waves/studio/create). @@ -35,13 +49,13 @@ mode: "center" - **Enhanced Voice Cloning**: More accurate and lifelike voice clones. - **Best-in-Market Latency**: Instant voice cloning with a latency of under 300ms. - Experience the new capabilities of Lightning Large via our [Platform](https://waves.smallest.ai) or the [API](/v3.0.1/content/api-references/lightning). + Experience the new capabilities of Lightning Large via our [Platform](https://app.smallest.ai/waves/studio/create) or the [API](/v3.0.1/content/api-references/lightning). ## Introducing Waves - [Waves](https://waves.smallest.ai) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. + [Waves](https://app.smallest.ai/waves/studio/create) is our new unified platform for speech synthesis. It supports various models designed for real-time applications such as voicebots. However, waves will also be supporting various features such as voice cloning, accent cloning, style transfer, speech to speech etc in the near future, making it a ubiquitous platform for anyone who needs AI generated speech. diff --git a/fern/products/waves/versions/v4.0.0/cookbooks/speech-to-text.mdx b/fern/products/waves/versions/v4.0.0/cookbooks/speech-to-text.mdx new file mode 100644 index 0000000..09e62dd --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/cookbooks/speech-to-text.mdx @@ -0,0 +1,25 @@ +--- +title: "Speech to Text Examples" +sidebarTitle: "Speech to Text" +description: "Production-ready code examples for Pulse STT - from real-time streaming to batch transcription." +icon: "code" +--- + +Explore complete, runnable examples from our cookbook repository. + + + + Stream audio from your microphone over WebSocket and get real-time transcriptions. + + + Automatically transcribe and take notes from online meetings with speaker identification. + + + Transcribe podcast episodes and generate concise summaries. + + + Generate SRT/VTT subtitle files from audio and video content. + + + +Browse all examples on our [GitHub repository](https://github.com/smallest-inc/cookbook/tree/main/speech-to-text). diff --git a/fern/products/waves/versions/v4.0.0/getting-started/authentication.mdx b/fern/products/waves/versions/v4.0.0/getting-started/authentication.mdx index 1c530c8..42a78c3 100644 --- a/fern/products/waves/versions/v4.0.0/getting-started/authentication.mdx +++ b/fern/products/waves/versions/v4.0.0/getting-started/authentication.mdx @@ -30,7 +30,7 @@ Authorization: Bearer YOUR_API_KEY Test the API with this curl command by replacing `YOUR_API_KEY` with your actual key: ```bash -curl 'https://waves-api.smallest.ai/api/v1/lightning/get_voices' \ +curl 'https://api.smallest.ai/waves/v1/lightning-v3.1/get_voices' \ -H 'Content-Type: application/json' \ -H 'Authorization: Bearer YOUR_API_KEY' ``` @@ -44,6 +44,6 @@ from smallestai.waves import WavesClient client = WavesClient(api_key="YOUR_API_KEY") # Retrieve available voices -print(f"Available Voices: {client.get_voices(model='lightning')}") +print(f"Available Voices: {client.get_voices(model='lightning-v3.1')}") ``` \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/getting-started/introduction.mdx b/fern/products/waves/versions/v4.0.0/getting-started/introduction.mdx new file mode 100644 index 0000000..054dfbe --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/getting-started/introduction.mdx @@ -0,0 +1,53 @@ +--- +title: "Introduction" +description: "Build with Waves - real-time speech AI for text-to-speech and speech-to-text." +icon: "book-open" +--- + +Waves is a speech AI platform by [Smallest AI](https://smallest.ai?utm_source=documentation&utm_medium=getting-started) that provides fast, accurate text-to-speech and speech-to-text APIs. + +## Products + + + + Convert text to natural-sounding speech with multiple voices and languages. + + + Transcribe audio in real-time or from files with high accuracy and low latency. + + + +## Get Started + + + + Console API Keys + + Head over to the [Smallest AI Console API Keys](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=getting-started) to create your API key. + + + Follow the [TTS Quickstart](/waves/documentation/text-to-speech/quickstart) or [STT Quickstart](/waves/documentation/speech-to-text/quickstart) to start building. + + + +## Resources + + + + Explore all endpoints + + + Available TTS & STT models + + + Example projects + + + Latest updates + + + +## Support + +- **Email**: [support@smallest.ai](mailto:support@smallest.ai) +- **Discord**: [Join our community](https://discord.gg/5evETqguJs) diff --git a/fern/products/waves/versions/v4.0.0/getting-started/models.mdx b/fern/products/waves/versions/v4.0.0/getting-started/models.mdx index 5fcd235..bc6717f 100644 --- a/fern/products/waves/versions/v4.0.0/getting-started/models.mdx +++ b/fern/products/waves/versions/v4.0.0/getting-started/models.mdx @@ -4,31 +4,23 @@ description: "Find detailed description of each model along with their capabilit icon: "cube" --- -## Text-to-Speech (TTS) Models +## Text to Speech (TTS) Models - - Our fastest model, optimized for low-latency applications. It can generate - 10 seconds of audio in just 100 milliseconds, making it ideal for real-time - applications such as voicebots and interactive systems. - An upgrade from the Lightning Large model, offering improved performance and quality. It supports 16 languages, making it suitable for a wider range of applications requiring expressive and high-quality speech synthesis. - - Offers more emotional depth and expressiveness compared to the Lightning - model. It supports voice cloning and has a latency of just under 300 - milliseconds, making it suitable for applications requiring high-quality, - expressive speech. + + A 44 kHz model delivering natural, expressive, and realistic speech. Supports voice cloning with ultra-low latency. Supports English, Hindi, Tamil, and Spanish. ## Speech to Text (STT) Models - + High-accuracy, low-latency automatic speech recognition model built for real-time transcription. It supports automatic language detection across 32 languages and delivers fast, reliable results. @@ -48,15 +40,14 @@ Our routing system automatically detects the client's geographical location and | Model ID | Description | Languages Supported | | ------------------- | -------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| **lightning** | Fastest model with an RTF of 0.01, generating 10 seconds of audio in 100 ms. | `English`
`Hindi` | -| **lightning-large** | More emotional depth and expressiveness, supports voice cloning, latency under 300 ms. | `English`
`Hindi` | | **lightning-v2** | 100ms TTFB, Supports 16 languages with voice cloning. | `English`
`Hindi`
`Tamil`
`Kannada`
`Malayalam`
`Telugu`
`Gujarati`
`Bengali`
`Marathi`
`German`
`French`
`Spanish`
`Italian`
`Polish`
`Dutch`
`Russian`
`Arabic`
`Hebrew`
`Swedish` | +| **lightning-v3.1** | 44 kHz model, natural expressive speech, ultra-low latency, supports voice cloning. | `English`
`Hindi`
`Tamil`
`Spanish` | ## Model Overview (STT) -| Model ID | Description | Languages Supported | -| ------------- | ----------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| **lightning** | Low-latency speech-to-text model supporting automatic language detection and real-time transcription. | `Italian`
`Spanish`
`English`
`Portuguese`
`Hindi`
`German`
`French`
`Ukrainian`
`Russian`
`Kannada`
`Malayalam`
`Polish`
`Marathi`
`Gujarati`
`Czech`
`Slovak`
`Telugu`
`Oriya (Odia)`
`Dutch`
`Bengali`
`Latvian`
`Estonian`
`Romanian`
`Punjabi`
`Finnish`
`Swedish`
`Bulgarian`
`Tamil`
`Hungarian`
`Danish`
`Lithuanian`
`Maltese` | +| Model ID | Description | Languages Supported | +| --------- | ----------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **pulse** | Low-latency speech-to-text model supporting automatic language detection and real-time transcription. | `Italian`
`Spanish`
`English`
`Portuguese`
`Hindi`
`German`
`French`
`Ukrainian`
`Russian`
`Kannada`
`Malayalam`
`Polish`
`Marathi`
`Gujarati`
`Czech`
`Slovak`
`Telugu`
`Oriya (Odia)`
`Dutch`
`Bengali`
`Latvian`
`Estonian`
`Romanian`
`Punjabi`
`Finnish`
`Swedish`
`Bulgarian`
`Tamil`
`Hungarian`
`Danish`
`Lithuanian`
`Maltese` | Note: The API uses [ISO 639-1 language codes - Set diff --git a/fern/products/waves/versions/v4.0.0/on-prem/api-reference/authentication.mdx b/fern/products/waves/versions/v4.0.0/on-prem/api-reference/authentication.mdx index 146f10e..174f2e6 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/api-reference/authentication.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/api-reference/authentication.mdx @@ -117,15 +117,52 @@ Authorization: Token YOUR_LICENSE_KEY ## Response Codes -| Code | Status | Description | -|------|--------|-------------| -| 200 | OK | Request successful | -| 400 | Bad Request | Invalid request parameters | -| 401 | Unauthorized | Invalid or missing license key | -| 403 | Forbidden | License expired or quota exceeded | -| 429 | Too Many Requests | Rate limit exceeded | -| 500 | Internal Server Error | Server error | -| 503 | Service Unavailable | Service temporarily unavailable | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeStatusDescription
200OKRequest successful
400Bad RequestInvalid request parameters
401UnauthorizedInvalid or missing license key
403ForbiddenLicense expired or quota exceeded
429Too Many RequestsRate limit exceeded
500Internal Server ErrorServer error
503Service UnavailableService temporarily unavailable
## Error Responses @@ -175,7 +212,7 @@ Authorization: Token YOUR_LICENSE_KEY ## Security Best Practices - + Never hardcode license keys in source code. **Use environment variables**: @@ -189,7 +226,7 @@ Authorization: Token YOUR_LICENSE_KEY - Kubernetes Secrets - + Always use HTTPS for API requests in production: ```javascript @@ -205,7 +242,7 @@ Authorization: Token YOUR_LICENSE_KEY ``` - + Implement key rotation policy: - Rotate keys every 90 days @@ -213,7 +250,7 @@ Authorization: Token YOUR_LICENSE_KEY - Revoke compromised keys immediately - + Track API usage to detect anomalies: - Unusual traffic patterns @@ -221,7 +258,7 @@ Authorization: Token YOUR_LICENSE_KEY - Quota approaching limits - + Add client-side rate limiting: ```python @@ -307,15 +344,15 @@ Successful authentication returns transcription results. ## What's Next? - + Learn about the transcription API - + Monitor service health - + See complete integration examples diff --git a/fern/products/waves/versions/v4.0.0/on-prem/api-reference/endpoints/health-check.mdx b/fern/products/waves/versions/v4.0.0/on-prem/api-reference/endpoints/health-check.mdx index 716af23..5f2f355 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/api-reference/endpoints/health-check.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/api-reference/endpoints/health-check.mdx @@ -298,7 +298,7 @@ Include health checks in CI/CD: ## Best Practices - + Configure reasonable timeouts: - **Timeout**: 5 seconds max @@ -313,7 +313,7 @@ Include health checks in CI/CD: ``` - + Always configure health checks in load balancers: - Prevents traffic to unhealthy instances @@ -321,7 +321,7 @@ Include health checks in CI/CD: - Reduces user-facing errors - + Set up continuous monitoring: - External uptime monitoring @@ -329,7 +329,7 @@ Include health checks in CI/CD: - Alerting on failures - + Regularly test health check behavior: ```bash @@ -385,11 +385,11 @@ kubectl logs -l app=api-server -n smallest ## What's Next? - + Learn about the transcription endpoint - + See complete integration examples diff --git a/fern/products/waves/versions/v4.0.0/on-prem/api-reference/endpoints/transcription.mdx b/fern/products/waves/versions/v4.0.0/on-prem/api-reference/endpoints/transcription.mdx index 199ec7a..c3e4155 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/api-reference/endpoints/transcription.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/api-reference/endpoints/transcription.mdx @@ -21,7 +21,7 @@ Requires Bearer token authentication with your license key. Authorization: Token YOUR_LICENSE_KEY ``` -See [Authentication](/api-reference/authentication) for details. +See [Authentication](/waves/self-host/api-reference/authentication) for details. ## Request @@ -345,13 +345,42 @@ Later, webhook receives: ### Supported Formats -| Format | Extension | Notes | -|--------|-----------|-------| -| WAV | `.wav` | Recommended for best quality | -| MP3 | `.mp3` | Widely supported | -| FLAC | `.flac` | Lossless compression | -| OGG | `.ogg` | Open format | -| M4A | `.m4a` | Apple format | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FormatExtensionNotes
WAV.wavRecommended for best quality
MP3.mp3Widely supported
FLAC.flacLossless compression
OGG.oggOpen format
M4A.m4aApple format
### Recommended Specifications @@ -385,12 +414,32 @@ Contact support@smallest.ai to increase limits for your license. Typical performance metrics: -| Metric | Value | -|--------|-------| -| Real-time Factor | 0.05-0.15x | -| Latency (1 min audio) | 3-9 seconds | -| Concurrent capacity | 100+ requests | -| Throughput | 100+ hours/hour | + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricValue
Real-time Factor0.05-0.15x
Latency (1 min audio)3-9 seconds
Concurrent capacity100+ requests
Throughput100+ hours/hour
Performance varies based on: @@ -403,13 +452,13 @@ Performance varies based on: ## Best Practices - + - Use lossless formats (WAV, FLAC) when possible - Ensure clear audio with minimal background noise - Use appropriate sample rate (16 kHz minimum) - + Implement retry logic with exponential backoff: ```python @@ -428,7 +477,7 @@ Performance varies based on: ``` - + For audio longer than 5 minutes, use callback URL: ```json @@ -439,7 +488,7 @@ Performance varies based on: ``` - + Cache transcription results to avoid duplicate processing: ```python @@ -462,11 +511,11 @@ Performance varies based on: ## What's Next? - + Monitor service availability - + Complete integration examples diff --git a/fern/products/waves/versions/v4.0.0/on-prem/api-reference/examples.mdx b/fern/products/waves/versions/v4.0.0/on-prem/api-reference/examples.mdx index 0b3a686..f5189db 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/api-reference/examples.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/api-reference/examples.mdx @@ -462,7 +462,7 @@ func main() { ## Best Practices - + Never hardcode credentials: ```bash @@ -471,7 +471,7 @@ func main() { ``` - + Always handle errors gracefully: ```python @@ -486,7 +486,7 @@ func main() { ``` - + Implement exponential backoff: ```python @@ -501,7 +501,7 @@ func main() { ``` - + Reuse connections for better performance: ```python @@ -510,7 +510,7 @@ func main() { ``` - + Track API usage and errors: ```python @@ -529,11 +529,11 @@ func main() { ## What's Next? - + Learn about API authentication - + Debug common integration issues diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/configuration.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/configuration.mdx index a1afe22..2cb856e 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/configuration.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/configuration.mdx @@ -574,11 +574,11 @@ volumes: ## What's Next? - + Learn about each service component in detail - + Debug common issues and optimize performance diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/docker-troubleshooting.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/docker-troubleshooting.mdx index 7a2e60f..53fb0b5 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/docker-troubleshooting.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/docker-troubleshooting.mdx @@ -19,14 +19,14 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + ```bash sudo systemctl restart docker docker compose up -d ``` - + ```bash sudo apt-get remove nvidia-container-toolkit sudo apt-get update @@ -36,7 +36,7 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + ```bash nvidia-smi ``` @@ -48,7 +48,7 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + Verify `/etc/docker/daemon.json` contains: ```json { @@ -83,7 +83,7 @@ docker compose logs license-proxy ``` - + Check `.env` file: ```bash cat .env | grep LICENSE_KEY @@ -100,10 +100,10 @@ docker compose logs license-proxy ``` - + Test connection to license server: ```bash - curl -v https://console-api.smallest.ai + curl -v https://api.smallest.ai ``` If this fails, check: @@ -112,7 +112,7 @@ docker compose logs license-proxy - DNS resolution - + If the key appears correct and network is accessible, your license may be: - Expired - Revoked @@ -140,7 +140,7 @@ docker compose logs lightning-asr ``` - + Check `.env` file: ```bash cat .env | grep MODEL_URL @@ -152,7 +152,7 @@ docker compose logs lightning-asr ``` - + Models require ~20-30 GB: ```bash df -h @@ -164,7 +164,7 @@ docker compose logs lightning-asr ``` - + Download model manually and use volume mount: ```bash @@ -181,7 +181,7 @@ docker compose logs lightning-asr ``` - + For slow connections, increase download timeout: ```yaml @@ -207,7 +207,7 @@ sudo netstat -tulpn | grep 7100 ``` - + If another service is using the port: ```bash sudo systemctl stop [service-name] @@ -219,7 +219,7 @@ sudo netstat -tulpn | grep 7100 ``` - + Modify docker-compose.yml to use different port: ```yaml api-server: @@ -230,7 +230,7 @@ sudo netstat -tulpn | grep 7100 Access API at http://localhost:8080 instead - + Old containers may still be bound: ```bash docker compose down @@ -256,7 +256,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Lightning ASR requires minimum 16 GB RAM Check current memory: @@ -265,7 +265,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Prevent one service from consuming all memory: ```yaml services: @@ -279,7 +279,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Add swap space (temporary solution): ```bash sudo fallocate -l 16G /swapfile @@ -289,7 +289,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Use smaller model or reduce batch size: ```yaml lightning-asr: @@ -314,7 +314,7 @@ docker compose logs --tail=100 [service-name] ``` - + ```bash docker inspect [container-name] --format='{{.State.ExitCode}}' ``` @@ -325,7 +325,7 @@ docker compose logs --tail=100 [service-name] - `1`: General error - + Temporarily disable restart to debug: ```yaml lightning-asr: @@ -338,7 +338,7 @@ docker compose logs --tail=100 [service-name] ``` - + Ensure required services are healthy: ```bash docker compose ps @@ -368,7 +368,7 @@ docker stats ``` - + Ensure GPU is not throttling: ```bash nvidia-smi -q -d PERFORMANCE @@ -380,7 +380,7 @@ docker stats ``` - + ```yaml lightning-asr: deploy: @@ -390,7 +390,7 @@ docker stats ``` - + For maximum performance (loses isolation): ```yaml api-server: @@ -398,7 +398,7 @@ docker stats ``` - + Use Redis with persistence disabled for speed: ```yaml redis: @@ -406,7 +406,7 @@ docker stats ``` - + Scale Lightning ASR workers: ```bash docker compose up -d --scale lightning-asr=2 @@ -644,11 +644,11 @@ Include: ## What's Next? - + Advanced configuration options - + Integrate with your applications diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/prerequisites.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/prerequisites.mdx index 51ac585..3a39900 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/prerequisites.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/prerequisites.mdx @@ -12,26 +12,26 @@ Before deploying Smallest Self-Host with Docker, ensure your system meets the ha ### Minimum Specifications - + **4 cores** minimum 8+ cores recommended for production - + **16 GB** minimum 32+ GB recommended for production - + **NVIDIA GPU required** - L40s or A10 (recommended) - A10, A100, H100, T4, L4 (supported) - + **100 GB** minimum - 50 GB for models @@ -225,13 +225,13 @@ You should see your GPU information displayed. Before installation, obtain the following from Smallest.ai: - + Your unique license key for validation Contact: **support@smallest.ai** - + Credentials to pull Docker images: - Registry URL: `quay.io` - Username @@ -241,7 +241,7 @@ Before installation, obtain the following from Smallest.ai: Contact: **support@smallest.ai** - + Download URLs for ASR models Contact: **support@smallest.ai** @@ -252,16 +252,41 @@ Before installation, obtain the following from Smallest.ai: Ensure the following ports are available: -| Port | Service | Purpose | -|------|---------|---------| -| 7100 | API Server | Client API requests | -| 2233 | Lightning ASR | Internal ASR processing | -| 6699 | License Proxy | Internal license validation | -| 6379 | Redis | Internal caching | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PortServicePurpose
7100API ServerClient API requests
2233Lightning ASRInternal ASR processing
6699License ProxyInternal license validation
6379RedisInternal caching
The License Proxy requires outbound HTTPS access to Smallest's license servers for validation. Ensure your firewall allows outbound connections to: -- `console-api.smallest.ai` (port 443) +- `api.smallest.ai` (port 443) ## Verification Checklist @@ -301,7 +326,7 @@ Before proceeding to installation, verify: Once all prerequisites are met, proceed to the quick start guide: - + Deploy Smallest Self-Host with Docker Compose diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/quick-start.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/quick-start.mdx index 61b06e8..99a3596 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/quick-start.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/quick-start.mdx @@ -8,7 +8,7 @@ description: Deploy Smallest Self-Host with Docker Compose in under 15 minutes This guide walks you through deploying Smallest Self-Host using Docker Compose. You'll have a fully functional speech-to-text service running in under 15 minutes. - Ensure you've completed all [prerequisites](/docker/prerequisites) before + Ensure you've completed all [prerequisites](/waves/self-host/docker-setup/stt-deployment/prerequisites/hardware-requirements) before starting this guide. @@ -342,7 +342,7 @@ Expected response: `{"status": "healthy"}` ## Common Startup Issues - + **Error:** `could not select device driver "nvidia"` **Solution:** @@ -354,16 +354,16 @@ Expected response: `{"status": "healthy"}` If this fails, reinstall NVIDIA Container Toolkit. - + **Error:** `License validation failed` **Solution:** - Verify LICENSE_KEY in `.env` is correct - Check internet connectivity - - Ensure firewall allows HTTPS to console-api.smallest.ai + - Ensure firewall allows HTTPS to api.smallest.ai - + **Error:** `Failed to download model` **Solution:** @@ -372,7 +372,7 @@ Expected response: `{"status": "healthy"}` - Check internet connectivity - + **Error:** `port is already allocated` **Solution:** @@ -443,19 +443,19 @@ docker compose down -v ## What's Next? - + Customize your deployment with advanced configuration options - + Learn about each service component in detail - + Debug common issues and optimize performance - + Integrate with your applications using the API diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/configuration.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/configuration.mdx index 714d931..8d9b3bb 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/configuration.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/configuration.mdx @@ -574,11 +574,11 @@ volumes: ## What's Next? - + Learn about each service component in detail - + Debug common issues and optimize performance diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/docker-troubleshooting.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/docker-troubleshooting.mdx index 7db499c..0059f81 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/docker-troubleshooting.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/docker-troubleshooting.mdx @@ -19,14 +19,14 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + ```bash sudo systemctl restart docker docker compose up -d ``` - + ```bash sudo apt-get remove nvidia-container-toolkit sudo apt-get update @@ -36,7 +36,7 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + ```bash nvidia-smi ``` @@ -48,7 +48,7 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + Verify `/etc/docker/daemon.json` contains: ```json { @@ -83,7 +83,7 @@ docker compose logs license-proxy ``` - + Check `.env` file: ```bash cat .env | grep LICENSE_KEY @@ -100,10 +100,10 @@ docker compose logs license-proxy ``` - + Test connection to license server: ```bash - curl -v https://console-api.smallest.ai + curl -v https://api.smallest.ai ``` If this fails, check: @@ -112,7 +112,7 @@ docker compose logs license-proxy - DNS resolution - + If the key appears correct and network is accessible, your license may be: - Expired - Revoked @@ -140,7 +140,7 @@ docker compose logs lightning-asr ``` - + Check `.env` file: ```bash cat .env | grep MODEL_URL @@ -152,7 +152,7 @@ docker compose logs lightning-asr ``` - + Models require ~20-30 GB: ```bash df -h @@ -164,7 +164,7 @@ docker compose logs lightning-asr ``` - + Download model manually and use volume mount: ```bash @@ -181,7 +181,7 @@ docker compose logs lightning-asr ``` - + For slow connections, increase download timeout: ```yaml @@ -207,7 +207,7 @@ sudo netstat -tulpn | grep 7100 ``` - + If another service is using the port: ```bash sudo systemctl stop [service-name] @@ -219,7 +219,7 @@ sudo netstat -tulpn | grep 7100 ``` - + Modify docker-compose.yml to use different port: ```yaml api-server: @@ -230,7 +230,7 @@ sudo netstat -tulpn | grep 7100 Access API at http://localhost:8080 instead - + Old containers may still be bound: ```bash docker compose down @@ -256,7 +256,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Lightning ASR requires minimum 16 GB RAM Check current memory: @@ -265,7 +265,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Prevent one service from consuming all memory: ```yaml services: @@ -279,7 +279,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Add swap space (temporary solution): ```bash sudo fallocate -l 16G /swapfile @@ -289,7 +289,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Use smaller model or reduce batch size: ```yaml lightning-asr: @@ -314,7 +314,7 @@ docker compose logs --tail=100 [service-name] ``` - + ```bash docker inspect [container-name] --format='{{.State.ExitCode}}' ``` @@ -325,7 +325,7 @@ docker compose logs --tail=100 [service-name] - `1`: General error - + Temporarily disable restart to debug: ```yaml lightning-asr: @@ -338,7 +338,7 @@ docker compose logs --tail=100 [service-name] ``` - + Ensure required services are healthy: ```bash docker compose ps @@ -368,7 +368,7 @@ docker stats ``` - + Ensure GPU is not throttling: ```bash nvidia-smi -q -d PERFORMANCE @@ -380,7 +380,7 @@ docker stats ``` - + ```yaml lightning-asr: deploy: @@ -390,7 +390,7 @@ docker stats ``` - + For maximum performance (loses isolation): ```yaml api-server: @@ -398,7 +398,7 @@ docker stats ``` - + Use Redis with persistence disabled for speed: ```yaml redis: @@ -406,7 +406,7 @@ docker stats ``` - + Scale Lightning ASR workers: ```bash docker compose up -d --scale lightning-asr=2 @@ -644,11 +644,11 @@ Include: ## What's Next? - + Advanced configuration options - + Integrate with your applications diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/credentials.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/credentials.mdx new file mode 100644 index 0000000..48118c0 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/credentials.mdx @@ -0,0 +1,52 @@ +--- +title: Credentials & Access +description: License keys and registry credentials for STT Docker deployment +--- + +## Required Credentials + +Before installation, obtain the following from Smallest.ai: + + + + Your unique license key for validation + + Contact: **support@smallest.ai** + + + + Credentials to pull Docker images: + - Registry URL: `quay.io` + - Username + - Password + - Email + + Contact: **support@smallest.ai** + + + + Download URLs for ASR models + + Contact: **support@smallest.ai** + + + +## Login to Container Registry + +Once you have your credentials, authenticate with the registry: + +```bash +docker login quay.io -u -p +``` + +## Environment Variables + +You'll need to set these in your deployment: + +```bash +export LICENSE_KEY="your-license-key" +export QUAY_USERNAME="your-username" +export QUAY_PASSWORD="your-password" +export MODEL_URL="your-model-url" +``` + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/hardware-requirements.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/hardware-requirements.mdx new file mode 100644 index 0000000..14c7273 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/hardware-requirements.mdx @@ -0,0 +1,52 @@ +--- +title: Hardware Requirements +description: Hardware specifications for deploying Speech-to-Text with Docker +--- + +## Minimum Specifications + + + + **4 cores** minimum + + 8+ cores recommended for production + + + + **16 GB** minimum + + 32+ GB recommended for production + + + + **NVIDIA GPU required** + + - L4 or L40s (recommended) + - A10, A100, H100, T4 (supported) + - Minimum 16GB VRAM + + + + **100 GB** minimum + + - 50 GB for models + - 50 GB for logs and data + + + +## Network Requirements + +Ensure the following ports are available: + +| Port | Service | Purpose | +|------|---------|---------| +| 7100 | API Server | Client API requests | +| 2233 | Lightning ASR | Internal ASR processing | +| 6699 | License Proxy | Internal license validation | +| 6379 | Redis | Internal caching | + + +The License Proxy requires outbound HTTPS access to Smallest's license servers for validation. Ensure your firewall allows outbound connections to: +- `api.smallest.ai` (port 443) + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/software-requirements.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/software-requirements.mdx new file mode 100644 index 0000000..cab73d5 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/software-requirements.mdx @@ -0,0 +1,180 @@ +--- +title: Software Requirements +description: Software and dependencies for deploying Speech-to-Text with Docker +--- + +## Operating System + + + + ```bash + Ubuntu 20.04 LTS or later + Ubuntu 22.04 LTS (recommended) + Debian 11 or later + ``` + + + + ```bash + CentOS 8 or later + RHEL 8 or later + Rocky Linux 8 or later + ``` + + + + Most modern Linux distributions with kernel 4.15+ + + + +## Required Software + + + + Docker Engine 20.10 or later + + ```bash + docker --version + ``` + + Expected output: `Docker version 20.10.0 or higher` + + + + Docker Compose 2.0 or later + + ```bash + docker compose version + ``` + + Expected output: `Docker Compose version v2.0.0 or higher` + + + + NVIDIA Driver 525+ for newer GPUs (A10, A100, L4) + + NVIDIA Driver 470+ for older GPUs (T4, V100) + + ```bash + nvidia-smi + ``` + + Should display GPU information without errors + + + + Required for GPU access in containers + + ```bash + nvidia-container-cli --version + ``` + + + +## Install Docker + + + + ```bash + sudo apt-get update + sudo apt-get install -y ca-certificates curl gnupg + + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \ + sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \ + https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io \ + docker-buildx-plugin docker-compose-plugin + + sudo usermod -aG docker $USER + newgrp docker + ``` + + + + ```bash + sudo yum install -y yum-utils + sudo yum-config-manager --add-repo \ + https://download.docker.com/linux/centos/docker-ce.repo + + sudo yum install -y docker-ce docker-ce-cli containerd.io \ + docker-buildx-plugin docker-compose-plugin + + sudo systemctl start docker + sudo systemctl enable docker + + sudo usermod -aG docker $USER + newgrp docker + ``` + + + +## Install NVIDIA Driver + + + + ```bash + sudo apt-get update + sudo apt-get install -y ubuntu-drivers-common + + sudo ubuntu-drivers autoinstall + + sudo reboot + ``` + + After reboot, verify: + ```bash + nvidia-smi + ``` + + + + ```bash + sudo yum install -y kernel-devel kernel-headers gcc make + + distribution=$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.//g') + + sudo yum-config-manager --add-repo \ + http://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-$distribution.repo + + sudo yum clean all + sudo yum -y install nvidia-driver-latest-dkms + + sudo reboot + ``` + + + +## Install NVIDIA Container Toolkit + +```bash +distribution=$(. /etc/os-release;echo $ID$VERSION_ID) +curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | \ + sudo apt-key add - +curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + +sudo apt-get update +sudo apt-get install -y nvidia-container-toolkit + +sudo systemctl restart docker +``` + +## Verify GPU Access + +Test that Docker can access the GPU: + +```bash +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi +``` + +You should see your GPU information displayed. + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/verification.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/verification.mdx new file mode 100644 index 0000000..6360c0a --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/prerequisites/verification.mdx @@ -0,0 +1,94 @@ +--- +title: Verification Checklist +description: Verify all prerequisites before deploying STT with Docker +--- + +## Pre-Deployment Checklist + +Before proceeding to installation, verify each item: + + + + ```bash + docker ps + ``` + Should execute without errors + + + + ```bash + docker compose version + ``` + Should show version 2.0 or higher + + + + ```bash + docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi + ``` + Should display GPU information + + + + ```bash + docker login quay.io + ``` + Should show "Login Succeeded" + + + + - [ ] License key obtained + - [ ] Container registry username and password + - [ ] Model download URLs + + + + ```bash + sudo netstat -tuln | grep -E '(7100|2233|6699|6379)' + ``` + Should return no results (ports free) + + + + ```bash + df -h / + ``` + Should show at least 100 GB available + + + +## Quick Verification Script + +Run this script to check all prerequisites at once: + +```bash +#!/bin/bash +echo "=== Docker STT Prerequisites Check ===" + +echo -n "Docker: " +docker --version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Docker Compose: " +docker compose version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "NVIDIA Driver: " +nvidia-smi &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "NVIDIA Container Toolkit: " +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Port 7100 (API Server): " +netstat -tuln 2>/dev/null | grep -q ':7100 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 2233 (Lightning ASR): " +netstat -tuln 2>/dev/null | grep -q ':2233 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 6699 (License Proxy): " +netstat -tuln 2>/dev/null | grep -q ':6699 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 6379 (Redis): " +netstat -tuln 2>/dev/null | grep -q ':6379 ' && echo "IN USE" || echo "FREE" + +echo "=== Check Complete ===" +``` + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/quick-start.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/quick-start.mdx index 1700485..a38f227 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/quick-start.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/quick-start.mdx @@ -8,7 +8,7 @@ description: Deploy Smallest Self-Host Speech-to-Text with Docker Compose in und This guide walks you through deploying Smallest Self-Host using Docker Compose. You'll have a fully functional speech-to-text service running in under 15 minutes. - Ensure you've completed all [prerequisites](/docker/stt/prerequisites) before + Ensure you've completed all [prerequisites](/waves/self-host/docker-setup/stt-deployment/prerequisites/hardware-requirements) before starting this guide. @@ -201,7 +201,7 @@ Look for these success indicators: ## Common Startup Issues - + **Error:** `could not select device driver "nvidia"` **Solution:** @@ -213,16 +213,16 @@ Look for these success indicators: If this fails, reinstall NVIDIA Container Toolkit. - + **Error:** `License validation failed` **Solution:** - Verify LICENSE_KEY in `.env` is correct - Check internet connectivity - - Ensure firewall allows HTTPS to console-api.smallest.ai + - Ensure firewall allows HTTPS to api.smallest.ai - + **Error:** `Failed to download model` **Solution:** @@ -231,7 +231,7 @@ Look for these success indicators: - Check internet connectivity - + **Error:** `port is already allocated` **Solution:** @@ -302,19 +302,19 @@ docker compose down -v ## What's Next? - + Customize your deployment with advanced configuration options - + Learn about each service component in detail - + Debug common issues and optimize performance - + Integrate with your applications using the API diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/services-overview.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/services-overview.mdx index b851a14..875dd1c 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/services-overview.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/stt/services-overview.mdx @@ -51,11 +51,32 @@ The API Server is the main entry point for all client requests. ### Key Endpoints -| Endpoint | Method | Purpose | -|----------|--------|---------| -| `/health` | GET | Health check | -| `/v1/listen` | POST | Synchronous transcription | -| `/v1/listen/stream` | WebSocket | Streaming transcription | + + + + + + + + + + + + + + + + + + + + + + + + + +
EndpointMethodPurpose
/healthGETHealth check
/v1/listenPOSTSynchronous transcription
/v1/listen/streamWebSocketStreaming transcription
### Environment Variables @@ -112,12 +133,37 @@ The core speech recognition engine powered by GPU acceleration. Lightning ASR requires NVIDIA GPU with CUDA support: -| GPU Model | VRAM | Performance | -|-----------|------|-------------| -| A100 | 40-80 GB | Excellent | -| A10 | 24 GB | Excellent | -| L4 | 24 GB | Very Good | -| T4 | 16 GB | Good | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GPU ModelVRAMPerformance
A10040-80 GBExcellent
A1024 GBExcellent
L424 GBVery Good
T416 GBGood
### Environment Variables @@ -158,12 +204,32 @@ Key log messages: Typical performance metrics: -| Metric | Value | -|--------|-------| -| Real-time Factor | 0.05-0.15x | -| Cold Start | 30-60 seconds | -| Warm Inference | 50-200ms latency | -| Throughput | 100+ hours/hour (A10) | + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricValue
Real-time Factor0.05-0.15x
Cold Start30-60 seconds
Warm Inference50-200ms latency
Throughput100+ hours/hour (A10)
### Dependencies @@ -208,7 +274,7 @@ LICENSE_KEY: Your license key License Proxy requires outbound HTTPS access to: -- `console-api.smallest.ai` on port 443 +- `api.smallest.ai` on port 443 Ensure your firewall allows these connections. @@ -224,12 +290,32 @@ Ensure your firewall allows these connections. License Proxy reports only metadata: -| Data Reported | Example | -|---------------|---------| -| Audio duration | 3600 seconds | -| Request count | 150 requests | -| Features used | streaming, punctuation | -| Response codes | 200, 400, 500 | + + + + + + + + + + + + + + + + + + + + + + + + + +
Data ReportedExample
Audio duration3600 seconds
Request count150 requests
Features usedstreaming, punctuation
Response codes200, 400, 500
**No audio or transcript data is transmitted** to Smallest servers. @@ -449,11 +535,11 @@ docker compose logs -f [service-name] ## What's Next? - + Customize service configuration and resource allocation - + Debug issues and optimize performance diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/configuration.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/configuration.mdx index 65051e4..056f4b5 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/configuration.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/configuration.mdx @@ -303,11 +303,11 @@ VOICE_SPEED=1.0 ## What's Next? - + Learn about each TTS service component - + Debug configuration issues diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/docker-troubleshooting.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/docker-troubleshooting.mdx index 60d59c5..431ed26 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/docker-troubleshooting.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/docker-troubleshooting.mdx @@ -19,14 +19,14 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + ```bash sudo systemctl restart docker docker compose up -d ``` - + ```bash sudo apt-get remove nvidia-container-toolkit sudo apt-get update @@ -36,7 +36,7 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + ```bash nvidia-smi ``` @@ -48,7 +48,7 @@ docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi ``` - + Verify `/etc/docker/daemon.json` contains: ```json { @@ -83,7 +83,7 @@ docker compose logs license-proxy ``` - + Check `.env` file: ```bash cat .env | grep LICENSE_KEY @@ -100,10 +100,10 @@ docker compose logs license-proxy ``` - + Test connection to license server: ```bash - curl -v https://console-api.smallest.ai + curl -v https://api.smallest.ai ``` If this fails, check: @@ -112,7 +112,7 @@ docker compose logs license-proxy - DNS resolution - + If the key appears correct and network is accessible, your license may be: - Expired - Revoked @@ -140,7 +140,7 @@ docker compose logs lightning-tts ``` - + Verify GPU has enough VRAM: ```bash nvidia-smi @@ -149,7 +149,7 @@ docker compose logs lightning-tts Lightning TTS requires minimum 16GB VRAM. - + Models require space: ```bash df -h @@ -161,7 +161,7 @@ docker compose logs lightning-tts ``` - + Models may need more time to load: ```yaml lightning-tts: @@ -186,7 +186,7 @@ sudo netstat -tulpn | grep 7100 ``` - + If another service is using the port: ```bash sudo systemctl stop [service-name] @@ -198,7 +198,7 @@ sudo netstat -tulpn | grep 7100 ``` - + Modify docker-compose.yml to use different port: ```yaml api-server: @@ -209,7 +209,7 @@ sudo netstat -tulpn | grep 7100 Access API at http://localhost:8080 instead - + Old containers may still be bound: ```bash docker compose down @@ -235,7 +235,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Lightning TTS requires minimum 16 GB RAM Check current memory: @@ -244,7 +244,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Prevent one service from consuming all memory: ```yaml services: @@ -258,7 +258,7 @@ docker inspect [container-name] | grep OOMKilled ``` - + Add swap space (temporary solution): ```bash sudo fallocate -l 16G /swapfile @@ -289,7 +289,7 @@ docker stats ``` - + Ensure GPU is not throttling: ```bash nvidia-smi -q -d PERFORMANCE @@ -301,7 +301,7 @@ docker stats ``` - + ```yaml lightning-tts: deploy: @@ -311,7 +311,7 @@ docker stats ``` - + Use Redis with persistence disabled for speed: ```yaml redis: @@ -502,11 +502,11 @@ Include: ## What's Next? - + Advanced configuration options - + Integrate with your applications diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/credentials.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/credentials.mdx new file mode 100644 index 0000000..26698df --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/credentials.mdx @@ -0,0 +1,51 @@ +--- +title: Credentials & Access +description: License keys and registry credentials for TTS Docker deployment +--- + +## Required Credentials + +Before installation, obtain the following from Smallest.ai: + + + + Your unique license key for validation + + Contact: **support@smallest.ai** + + + + Credentials to pull Docker images: + - Registry URL: `quay.io` + - Username + - Password + - Email + + Contact: **support@smallest.ai** + + + + Download URLs for TTS models (if required) + + Contact: **support@smallest.ai** + + + +## Login to Container Registry + +Once you have your credentials, authenticate with the registry: + +```bash +docker login quay.io -u -p +``` + +## Environment Variables + +You'll need to set these in your deployment: + +```bash +export LICENSE_KEY="your-license-key" +export QUAY_USERNAME="your-username" +export QUAY_PASSWORD="your-password" +``` + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/hardware-requirements.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/hardware-requirements.mdx new file mode 100644 index 0000000..6c29386 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/hardware-requirements.mdx @@ -0,0 +1,52 @@ +--- +title: Hardware Requirements +description: Hardware specifications for deploying Text-to-Speech with Docker +--- + +## Minimum Specifications + + + + **4 cores** minimum + + 8+ cores recommended for production + + + + **16 GB** minimum + + 32+ GB recommended for production + + + + **NVIDIA GPU required** + + - L40s or A10 (recommended) + - A10, A100, H100, T4, L4 (supported) + - Minimum 16GB VRAM + + + + **100 GB** minimum + + - 50 GB for models + - 50 GB for logs and data + + + +## Network Requirements + +Ensure the following ports are available: + +| Port | Service | Purpose | +|------|---------|---------| +| 7100 | API Server | Client API requests | +| 8876 | Lightning TTS | TTS service endpoint | +| 3369 | License Proxy | Internal license validation | +| 6379 | Redis | Internal caching | + + +The License Proxy requires outbound HTTPS access to Smallest's license servers for validation. Ensure your firewall allows outbound connections to: +- `api.smallest.ai` (port 443) + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/software-requirements.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/software-requirements.mdx new file mode 100644 index 0000000..250c4b0 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/software-requirements.mdx @@ -0,0 +1,180 @@ +--- +title: Software Requirements +description: Software and dependencies for deploying Text-to-Speech with Docker +--- + +## Operating System + + + + ```bash + Ubuntu 20.04 LTS or later + Ubuntu 22.04 LTS (recommended) + Debian 11 or later + ``` + + + + ```bash + CentOS 8 or later + RHEL 8 or later + Rocky Linux 8 or later + ``` + + + + Most modern Linux distributions with kernel 4.15+ + + + +## Required Software + + + + Docker Engine 20.10 or later + + ```bash + docker --version + ``` + + Expected output: `Docker version 20.10.0 or higher` + + + + Docker Compose 2.0 or later + + ```bash + docker compose version + ``` + + Expected output: `Docker Compose version v2.0.0 or higher` + + + + NVIDIA Driver 525+ for newer GPUs (A10, A100, L4) + + NVIDIA Driver 470+ for older GPUs (T4, V100) + + ```bash + nvidia-smi + ``` + + Should display GPU information without errors + + + + Required for GPU access in containers + + ```bash + nvidia-container-cli --version + ``` + + + +## Install Docker + + + + ```bash + sudo apt-get update + sudo apt-get install -y ca-certificates curl gnupg + + sudo install -m 0755 -d /etc/apt/keyrings + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | \ + sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg + sudo chmod a+r /etc/apt/keyrings/docker.gpg + + echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] \ + https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + + sudo apt-get update + sudo apt-get install -y docker-ce docker-ce-cli containerd.io \ + docker-buildx-plugin docker-compose-plugin + + sudo usermod -aG docker $USER + newgrp docker + ``` + + + + ```bash + sudo yum install -y yum-utils + sudo yum-config-manager --add-repo \ + https://download.docker.com/linux/centos/docker-ce.repo + + sudo yum install -y docker-ce docker-ce-cli containerd.io \ + docker-buildx-plugin docker-compose-plugin + + sudo systemctl start docker + sudo systemctl enable docker + + sudo usermod -aG docker $USER + newgrp docker + ``` + + + +## Install NVIDIA Driver + + + + ```bash + sudo apt-get update + sudo apt-get install -y ubuntu-drivers-common + + sudo ubuntu-drivers autoinstall + + sudo reboot + ``` + + After reboot, verify: + ```bash + nvidia-smi + ``` + + + + ```bash + sudo yum install -y kernel-devel kernel-headers gcc make + + distribution=$(. /etc/os-release;echo $ID$VERSION_ID | sed -e 's/\.//g') + + sudo yum-config-manager --add-repo \ + http://developer.download.nvidia.com/compute/cuda/repos/$distribution/x86_64/cuda-$distribution.repo + + sudo yum clean all + sudo yum -y install nvidia-driver-latest-dkms + + sudo reboot + ``` + + + +## Install NVIDIA Container Toolkit + +```bash +distribution=$(. /etc/os-release;echo $ID$VERSION_ID) +curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | \ + sudo apt-key add - +curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | \ + sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + +sudo apt-get update +sudo apt-get install -y nvidia-container-toolkit + +sudo systemctl restart docker +``` + +## Verify GPU Access + +Test that Docker can access the GPU: + +```bash +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi +``` + +You should see your GPU information displayed. + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/verification.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/verification.mdx new file mode 100644 index 0000000..da2665c --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/prerequisites/verification.mdx @@ -0,0 +1,94 @@ +--- +title: Verification Checklist +description: Verify all prerequisites before deploying TTS with Docker +--- + +## Pre-Deployment Checklist + +Before proceeding to installation, verify each item: + + + + ```bash + docker ps + ``` + Should execute without errors + + + + ```bash + docker compose version + ``` + Should show version 2.0 or higher + + + + ```bash + docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi + ``` + Should display GPU information + + + + ```bash + docker login quay.io + ``` + Should show "Login Succeeded" + + + + - [ ] License key obtained + - [ ] Container registry username and password + - [ ] Model download URLs (if required) + + + + ```bash + sudo netstat -tuln | grep -E '(7100|8876|3369|6379)' + ``` + Should return no results (ports free) + + + + ```bash + df -h / + ``` + Should show at least 100 GB available + + + +## Quick Verification Script + +Run this script to check all prerequisites at once: + +```bash +#!/bin/bash +echo "=== Docker TTS Prerequisites Check ===" + +echo -n "Docker: " +docker --version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Docker Compose: " +docker compose version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "NVIDIA Driver: " +nvidia-smi &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "NVIDIA Container Toolkit: " +docker run --rm --gpus all nvidia/cuda:11.8.0-base-ubuntu22.04 nvidia-smi &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Port 7100 (API Server): " +netstat -tuln 2>/dev/null | grep -q ':7100 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 8876 (Lightning TTS): " +netstat -tuln 2>/dev/null | grep -q ':8876 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 3369 (License Proxy): " +netstat -tuln 2>/dev/null | grep -q ':3369 ' && echo "IN USE" || echo "FREE" + +echo -n "Port 6379 (Redis): " +netstat -tuln 2>/dev/null | grep -q ':6379 ' && echo "IN USE" || echo "FREE" + +echo "=== Check Complete ===" +``` + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/quick-start.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/quick-start.mdx index 854a6c6..1d3bdb1 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/quick-start.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/quick-start.mdx @@ -8,7 +8,7 @@ description: Deploy Smallest Self-Host Text-to-Speech with Docker Compose in und This guide walks you through deploying Smallest Self-Host Text-to-Speech (TTS) using Docker Compose. You'll have a fully functional text-to-speech service running in under 15 minutes. -Ensure you've completed all [prerequisites](/docker/tts/prerequisites) before starting this guide. +Ensure you've completed all [prerequisites](/waves/self-host/docker-setup/tts-deployment/prerequisites/hardware-requirements) before starting this guide. ## Step 1: Create Project Directory @@ -235,7 +235,7 @@ Expected response: `{"status": "healthy"}` ## Common Startup Issues - + **Error:** `could not select device driver "nvidia"` **Solution:** @@ -247,16 +247,16 @@ Expected response: `{"status": "healthy"}` If this fails, reinstall NVIDIA Container Toolkit. - + **Error:** `License validation failed` **Solution:** - Verify LICENSE_KEY in `.env` is correct - Check internet connectivity - - Ensure firewall allows HTTPS to console-api.smallest.ai + - Ensure firewall allows HTTPS to api.smallest.ai - + **Error:** `port is already allocated` **Solution:** @@ -325,19 +325,19 @@ Using `-v` flag will delete all data. Models will need to be re-downloaded on ne ## What's Next? - + Customize your TTS deployment with advanced configuration options - + Learn about each TTS service component in detail - + Debug common issues and optimize performance - + Integrate with your applications using the API diff --git a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/services-overview.mdx b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/services-overview.mdx index 5dd0272..6861ce0 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/services-overview.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/docker/tts/services-overview.mdx @@ -51,11 +51,32 @@ The API Server is the main entry point for all client requests. ### Key Endpoints -| Endpoint | Method | Purpose | -|----------|--------|---------| -| `/health` | GET | Health check | -| `/v1/speak` | POST | Synchronous text-to-speech | -| `/v1/speak/stream` | WebSocket | Streaming text-to-speech | + + + + + + + + + + + + + + + + + + + + + + + + + +
EndpointMethodPurpose
/healthGETHealth check
/v1/speakPOSTSynchronous text-to-speech
/v1/speak/streamWebSocketStreaming text-to-speech
### Environment Variables @@ -112,12 +133,37 @@ The core text-to-speech engine powered by GPU acceleration. Lightning TTS requires NVIDIA GPU with CUDA support: -| GPU Model | VRAM | Performance | -|-----------|------|-------------| -| A100 | 40-80 GB | Excellent | -| A10 | 24 GB | Excellent | -| L4 | 24 GB | Very Good | -| T4 | 16 GB | Good | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GPU ModelVRAMPerformance
A10040-80 GBExcellent
A1024 GBExcellent
L424 GBVery Good
T416 GBGood
### Environment Variables @@ -156,12 +202,32 @@ Key log messages: Typical performance metrics: -| Metric | Value | -|--------|-------| -| Real-time Factor | 0.1-0.3x | -| Cold Start | 30-60 seconds | -| Warm Inference | 100-300ms latency | -| Throughput | 50+ hours/hour (A10) | + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricValue
Real-time Factor0.1-0.3x
Cold Start30-60 seconds
Warm Inference100-300ms latency
Throughput50+ hours/hour (A10)
### Dependencies @@ -206,7 +272,7 @@ LICENSE_KEY: Your license key License Proxy requires outbound HTTPS access to: -- `console-api.smallest.ai` on port 443 +- `api.smallest.ai` on port 443 Ensure your firewall allows these connections. @@ -222,12 +288,32 @@ Ensure your firewall allows these connections. License Proxy reports only metadata: -| Data Reported | Example | -|---------------|---------| -| Audio duration | 3600 seconds | -| Request count | 150 requests | -| Features used | streaming, voice selection | -| Response codes | 200, 400, 500 | + + + + + + + + + + + + + + + + + + + + + + + + + +
Data ReportedExample
Audio duration3600 seconds
Request count150 requests
Features usedstreaming, voice selection
Response codes200, 400, 500
**No audio or transcript data is transmitted** to Smallest servers. @@ -435,11 +521,11 @@ docker compose logs -f [service-name] ## What's Next? - + Customize service configuration and resource allocation - + Debug issues and optimize performance diff --git a/fern/products/waves/versions/v4.0.0/on-prem/getting-started/architecture.mdx b/fern/products/waves/versions/v4.0.0/on-prem/getting-started/architecture.mdx new file mode 100644 index 0000000..7dfd9df --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/getting-started/architecture.mdx @@ -0,0 +1,86 @@ +--- +title: Architecture Overview +description: Understanding the components and architecture of Smallest Self-Host deployments +--- + +## System Architecture + +```mermaid +graph TB + Client[Client Applications] -->|HTTP/WebSocket| API[API Server] + API -->|STT Requests| ASR[Lightning ASR] + API -->|TTS Requests| TTS[Lightning TTS] + API -->|Validate License| LP[License Proxy] + LP -->|Report Usage| LS[Smallest License Server] + + subgraph YourInfrastructure[Your Infrastructure] + API + ASR + TTS + LP + end + + subgraph SmallestCloud[Smallest Cloud] + LS + end + + style ASR fill:#0D9373 + style TTS fill:#0D9373 + style API fill:#07C983 + style LP fill:#1E90FF + style LS fill:#FF6B6B +``` + +## Components + + + + Routes requests to Lightning ASR/TTS workers, manages WebSocket connections, and provides a unified REST API interface. + + **Resources:** 0.5-2 CPU cores, 512 MB - 2 GB RAM, no GPU + + + + GPU-accelerated speech-to-text engine with 0.05-0.15x real-time factor. Supports real-time and batch transcription. + + **Resources:** 4-8 CPU cores, 12-16 GB RAM, 1x NVIDIA GPU (16+ GB VRAM) + + + + GPU-accelerated text-to-speech engine for natural voice synthesis. Supports streaming and batch generation. + + **Resources:** 4-8 CPU cores, 12-16 GB RAM, 1x NVIDIA GPU (16+ GB VRAM) + + + + Validates license keys and reports usage metadata. Supports offline grace periods. + + **Resources:** 0.25-1 CPU core, 256-512 MB RAM, no GPU + + + + Request queuing, session state, and caching. Can use embedded or external (ElastiCache). + + **Resources:** 0.5-1 CPU core, 512 MB - 2 GB RAM, no GPU + + + +## Data Flow + +1. **Client Request** — Your application sends audio (STT) or text (TTS) via HTTP or WebSocket +2. **API Server** — Routes the request to the appropriate worker and validates the license +3. **Worker Processing** — Lightning ASR or TTS processes the request on GPU +4. **Response** — Results stream back through the API server to your application + +All processing happens within your infrastructure. Only license validation metadata is sent to Smallest Cloud. + +## What's Next? + + + + License key, credentials, and infrastructure requirements + + + Benefits of self-hosting for your use case + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/getting-started/introduction.mdx b/fern/products/waves/versions/v4.0.0/on-prem/getting-started/introduction.mdx new file mode 100644 index 0000000..8c01ce5 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/getting-started/introduction.mdx @@ -0,0 +1,43 @@ +--- +title: Introduction +description: Deploy high-performance speech-to-text and text-to-speech models in your own infrastructure +--- + +Smallest Self-Host enables you to get the same powerful TTS and STT capabilities as our cloud service while keeping your data under complete control. + +## Deployment Options + + + + Deploy speech-to-text with Docker. Best for development, testing, and small-scale production. + + + Deploy text-to-speech with Docker. Quick setup for voice synthesis workloads. + + + Production-grade STT with autoscaling and high availability on Kubernetes. + + + + + Kubernetes deployment is currently available for **STT only**. TTS Kubernetes support is coming soon. + + +## Resources + + + + System components and data flow + + + Benefits of self-hosting + + + Requirements and credentials + + + +## Support + +- **Email**: [support@smallest.ai](mailto:support@smallest.ai) +- **Discord**: [Join our community](https://discord.gg/5evETqguJs) diff --git a/fern/products/waves/versions/v4.0.0/on-prem/getting-started/prerequisites.mdx b/fern/products/waves/versions/v4.0.0/on-prem/getting-started/prerequisites.mdx new file mode 100644 index 0000000..1a69168 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/getting-started/prerequisites.mdx @@ -0,0 +1,176 @@ +--- +title: Prerequisites +description: What you need before deploying Smallest Self-Host +--- + +## Overview + +Before deploying Smallest Self-Host, you'll need credentials from Smallest.ai and infrastructure with GPU support. + +## Credentials from Smallest.ai + +Contact **support@smallest.ai** to obtain the following: + + + + Your unique license key for validation. This is required for all deployments. + + You'll add this to your configuration: + ```yaml + global: + licenseKey: "your-license-key-here" + ``` + + Or as an environment variable: + ```bash + LICENSE_KEY=your-license-key-here + ``` + + + + Credentials to pull Docker images from `quay.io`: + - **Username** + - **Password** + - **Email** + + Login to the registry: + ```bash + docker login quay.io + ``` + + For Kubernetes, you'll add these to your `values.yaml`: + ```yaml + global: + imageCredentials: + create: true + registry: quay.io + username: "your-username" + password: "your-password" + email: "your-email@example.com" + ``` + + + + Download URLs for the AI models (STT and/or TTS). + + For Docker deployments, add to your `.env`: + ```bash + MODEL_URL=your-model-url-here + ``` + + For Kubernetes, add to `values.yaml`: + ```yaml + models: + asrModelUrl: "your-asr-model-url" + ttsModelUrl: "your-tts-model-url" + ``` + + + +## Infrastructure Requirements + + + + - **NVIDIA GPU** with 16+ GB VRAM + - Recommended: A10, L4, L40s, T4, or A100 + - NVIDIA Driver 525+ (for A10, A100, L4) + - NVIDIA Driver 470+ (for T4, V100) + + + + - Docker 20.10+ or Podman 4.0+ + - NVIDIA Container Toolkit + - For Kubernetes: GPU Operator or Device Plugin + + + +### Minimum Resources + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ComponentCPUMemoryGPUStorage
Lightning ASR4-8 cores12-16 GB1x NVIDIA (16+ GB VRAM)50+ GB
Lightning TTS4-8 cores12-16 GB1x NVIDIA (16+ GB VRAM)20+ GB
API Server0.5-2 cores512 MB - 2 GBNone1 GB
License Proxy0.25-1 core256-512 MBNone100 MB
Redis0.5-1 core512 MB - 2 GBNone1 GB
+ +## Network Requirements + +The License Proxy requires outbound HTTPS access to validate licenses: + + + + + + + + + + + + + + + + +
EndpointPortPurpose
api.smallest.ai443License validation and usage reporting
+ + +Ensure your firewall and network policies allow outbound HTTPS traffic to `api.smallest.ai`. + + +## Next Steps + +Choose your deployment method and follow the specific prerequisites: + + + + Setup requirements for Docker deployments including NVIDIA Container Toolkit installation. + + + + Cluster requirements, GPU node setup, and Helm configuration for Kubernetes deployments. + + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/getting-started/why-self-host.mdx b/fern/products/waves/versions/v4.0.0/on-prem/getting-started/why-self-host.mdx new file mode 100644 index 0000000..862d978 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/getting-started/why-self-host.mdx @@ -0,0 +1,181 @@ +--- +title: Why Self-Host? +description: Understand when self-hosting our models makes sense for your organization +--- + +## Overview + +Using Smallest as a managed service has many benefits: it's fast to start developing with, requires no infrastructure setup, and eliminates all hardware, installation, configuration, backup, and maintenance-related costs. However, there are situations where a self-hosted deployment makes more sense. + +## Performance Requirements + +Certain use cases have very sensitive latency and load requirements. If you need ultra-low latency with voice AI services colocated with your other services, self-hosting can meet these requirements. + + + + - **Real-time AI voicebots** requiring sub-100ms response times + - **Live transcription systems** for broadcasts or conferences + - **High-volume processing** with predictable costs + - **Edge deployments** with limited internet connectivity + + + + - Colocate speech services with your application infrastructure + - Scale independently based on your specific workload patterns + - Zero network latency to external APIs + - Consistent performance regardless of internet conditions + + + +### Zero Network Latency + +When you self-host, your speech services run within your own infrastructure—whether that's the same data center, VPC, or even the same machine as your application. This eliminates the round-trip time to external APIs entirely. + + + + + + + + + + + + + + + + + + + + + + + + + + +
ScenarioNetwork Latency
Self-hosted1-5ms
Same region20-50ms
Cross-region100-200ms
Edge/on-premises200-500ms+
+ +For real-time voice applications like AI agents, every millisecond matters. Self-hosting keeps your latency predictable and minimal, regardless of where your users are located or the state of the public internet. + +### Security & Data Privacy + +One of the most common use cases for self-hosting Smallest is to satisfy security or data privacy requirements. In a typical self-hosted deployment, no audio, transcripts, or other identifying markers of the request content are sent to Smallest servers. + + + + - **Healthcare applications** requiring HIPAA compliance + - **Financial services** with strict data governance + - **Government and defense** applications + - **Enterprise environments** with air-gapped networks + + + + - Your audio data never leaves your infrastructure + - Transcripts remain entirely within your control + - No data stored beyond the duration of the API request + - Self-hosted deployments do not persist request/response data + + + +### What Data is Reported? + + +In a typical self-hosted deployment, no audio or transcript data is sent to Smallest servers. Only usage metadata is reported to the license server for validation and billing purposes. + + +**Metadata reported:** +- Audio duration and character count +- Features requested (diarization, timestamps, etc.) +- Success/error response codes + +**Never reported:** +- Audio content +- Transcripts or synthesis output +- Personally identifiable information + +### Cost Optimization + +For high-volume or predictable workloads, self-hosting can be more cost-effective than per-request API pricing. + + + + + + + + + + + + + + + + + + + + + + +
BenefitDescription
Predictable costsInfrastructure-based pricing, not usage-based
Efficient utilizationPredictable autoscaling maximizes resource efficiency
Long-term savingsSignificant cost reduction for sustained high volumes
+ +### Reliability & Grace Periods + +Self-hosted deployments include built-in resilience against unforeseen network errors and temporary outages. The deployment won't suddenly stop working due to a transient network issue or external service disruption. + +This means: +- **Continuous operation** during network interruptions or license server maintenance +- **Protection against unforeseen errors** — your services keep running while issues are resolved +- **Time to recover** — grace periods provide a buffer to restore connectivity without impacting your users + + +The License Proxy supports **grace periods** that allow your deployment to continue operating even if connectivity to the Smallest license server is temporarily lost. + + +## Customization & Control + +Self-hosting provides complete control over your deployment: + + + + Optimize compute resources for your specific workload patterns. Allocate more GPU power during peak hours and scale down during off-peak times. + + + + Upgrade on your schedule. Test new versions in staging before production rollout. Roll back instantly if needed. + + + + Deploy in private networks, VPCs, or air-gapped environments. Full control over ingress and egress traffic. + + + + Direct integration with your monitoring, logging, and alerting infrastructure. Custom Prometheus metrics, Grafana dashboards, and alerting rules. + + + +## When to Use Managed Service Instead + +Self-hosting isn't always the right choice. Consider the managed Smallest API if: + +- You're building a prototype or MVP +- Your audio processing volume is low or unpredictable +- You don't have DevOps resources to manage infrastructure +- You need to get started quickly without infrastructure setup + +## Ready to Self-Host? + + + + Return to the introduction for deployment options + + + + Deploy in 15 minutes with Docker + + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/introduction.mdx b/fern/products/waves/versions/v4.0.0/on-prem/introduction.mdx deleted file mode 100644 index cf13837..0000000 --- a/fern/products/waves/versions/v4.0.0/on-prem/introduction.mdx +++ /dev/null @@ -1,389 +0,0 @@ ---- -title: Introduction -description: Deploy high-performance speech recognition and synthesis models in your own infrastructure ---- - -## What is Smallest Self-Host? - -Smallest Self-Host enables you to deploy state-of-the-art speech-to-text (STT) models in your own infrastructure, whether in the cloud or on-premises. Built for enterprises with stringent performance, security, or compliance requirements, it provides the same powerful AI capabilities as Smallest's cloud service while keeping your data under your complete control. - -## Why Self-Host? - -Using Smallest as a managed service has many benefits: it's fast to start developing with, requires no infrastructure setup, and eliminates all hardware, installation, configuration, backup, and maintenance-related costs. However, there are situations where a self-hosted deployment makes more sense. - -### Performance Requirements - -Certain use cases have very sensitive latency and load requirements. If you need ultra-low latency with voice AI services colocated with your other services, self-hosting can meet these requirements. - -**Ideal for:** - -- **Real-time AI voicebots** requiring <100ms response times -- **Live transcription systems** for broadcasts or conferences -- **High-volume processing** with predictable costs -- **Edge deployments** with limited internet connectivity - -**Benefits:** - -- Colocate speech services with your application infrastructure -- Scale independently based on your specific workload patterns -- No network latency to external APIs -- Consistent performance regardless of internet conditions - -### Security & Data Privacy - -One of the most common use cases for self-hosting Smallest is to satisfy security or data privacy requirements. In a typical self-hosted deployment, no audio, transcripts, or other identifying markers of the request content are sent to Smallest servers. - -**Ideal for:** - -- **Healthcare applications** requiring HIPAA compliance -- **Financial services** with strict data governance -- **Government and defense** applications -- **Enterprise environments** with air-gapped networks - -**Data Privacy:** - -- Your audio data never leaves your infrastructure -- Transcripts remain entirely within your control -- No data stored beyond the duration of the API request -- Self-hosted deployments do not persist request/response data - -**What is reported:** - -- Only metadata such as audio duration, character count, features requested, and success response codes -- No audio content, transcripts, or personally identifiable information - - - In a typical self-hosted deployment, no audio or transcript data is sent to - Smallest servers. Only usage metadata (duration, feature flags, response - codes) is reported to the license server for validation and billing purposes. - - -### Cost Optimization - -For high-volume or predictable workloads, self-hosting can be more cost-effective: - -- **Predictable costs** based on infrastructure, not usage -- **No per-minute charges** for audio processing -- **Efficient resource utilization** with autoscaling -- **Long-term savings** for sustained high volumes - -### Customization & Control - -Self-hosting provides complete control over your deployment: - -- **Custom resource allocation** optimized for your workload -- **Version control** - upgrade on your schedule -- **Network isolation** - deploy in private networks -- **Integration flexibility** - direct database access, custom monitoring - -## Components - -Before you deploy Smallest, you'll need to understand the components of your system, their relationships, and the interactions between components. A well-designed architecture will meet your business needs, optimize both performance and security, and provide a strong technical foundation for future growth. - -### Architecture Diagram - -```mermaid -graph TB - Client[Client Applications] -->|HTTP/WebSocket| API[API Server] - API -->|Route Requests| ASR[Lightning ASR] - API -->|Validate License| LP[License Proxy] - LP -->|Report Usage| LS[Smallest License Server] - - subgraph YourInfrastructure[Your Infrastructure] - API - ASR - LP - end - - subgraph SmallestCloud[Smallest Cloud] - LS - end - - style ASR fill:#0D9373 - style API fill:#07C983 - style LP fill:#1E90FF - style LS fill:#FF6B6B -``` - -### Component Details - - - - **Purpose:** The API server interfaces with Lightning ASR to expose endpoints for your requests. - - **Key Features:** - - Routes incoming API requests to available Lightning ASR workers - - Manages WebSocket connections for streaming transcription - - Handles request queuing and load balancing across workers - - Provides unified REST API interface - - **Resource Requirements:** - - CPU: 0.5-2 cores - - Memory: 512 MB - 2 GB - - No GPU required - - -{" "} - - - **Purpose:** The Lightning ASR engine performs the computationally intensive - task of speech recognition. It manages GPU devices and responds to requests - from the API layer. **Key Features:** - GPU-accelerated speech recognition - (0.05-0.15x real-time factor) - Real-time and batch audio transcription - - Automatic model loading and optimization - Horizontal scaling support - **Resource Requirements:** - CPU: 4-8 cores - Memory: 12-16 GB RAM - **GPU: 1x - NVIDIA GPU (16+ GB VRAM required)** - Storage: 50+ GB for models **Note:** - Because Lightning ASR is decoupled from the API Server, you can scale it - independently based on your transcription load. - - -{" "} - - - **Purpose:** Components register with the Smallest License Server to verify - licensing and report usage. API and Engine containers can be configured to - connect directly to the licensing server, or to proxy their communication - through the License Proxy. **Key Features:** - License key validation on - startup - Usage metadata reporting (no audio/transcript data) - Grace period - support for offline operation - Secure communication with Smallest License - Server **Resource Requirements:** - CPU: 0.25-1 core - Memory: 256-512 MB - No - GPU required **Network:** Requires outbound HTTPS to - `https://console-api.smallest.ai` - - - - **Purpose:** Provides caching and state management for the system. - - **Key Features:** - - Request queuing and coordination between API and ASR workers - - Session state for streaming connections - - Performance optimization through caching - - Can be embedded or external (AWS ElastiCache, etc.) - - **Resource Requirements:** - - CPU: 0.5-1 core - - Memory: 512 MB - 2 GB - - No GPU required - - - -## Common Setup Path - -All deployments follow the same initial setup path through environment preparation. Here's what to expect: - -### 1. Choose Your Deployment Method - - - - **Best for:** Development, testing, small-scale production - - **Timeline:** 15-30 minutes - - **Complexity:** Low - - - - **Best for:** Production deployments with autoscaling - - **Timeline:** 1-2 hours - - **Complexity:** Medium-High - - - -### 2. Prepare Infrastructure - -```mermaid -graph LR - A[Start] --> B{Deployment Type?} - B -->|Docker| C[Install Docker + NVIDIA Toolkit] - B -->|Kubernetes| D[Setup K8s Cluster] - C --> E[Configure GPU Access] - D --> F[Setup GPU Nodes] - E --> G[Obtain Credentials] - F --> G - G --> H[Deploy Services] - H --> I[Test & Verify] - I --> J[Production Ready] -``` - -**Steps:** - -1. **Obtain credentials** from Smallest.ai (license key, registry access, model URLs) -2. **Prepare infrastructure** (Docker host or Kubernetes cluster) -3. **Setup GPU support** (NVIDIA drivers, device plugins) -4. **Deploy components** (API Server, Lightning ASR, License Proxy, Redis) -5. **Configure autoscaling** (optional, Kubernetes only) -6. **Setup monitoring** (optional, Prometheus & Grafana) - -### What You'll Need - -Before starting, ensure you have: - - - - - License key - - Container registry credentials - - Model download URLs - - Contact: **support@smallest.ai** - - - - - GPU infrastructure (NVIDIA A10, T4, or better) - - Kubernetes cluster or Docker host - - Basic DevOps knowledge - - Network connectivity for license validation - - - -## Deployment Options - -Smallest Self-Host supports two primary deployment methods, each suited for different operational requirements: - - - - Best for development, testing, or small-scale production deployments - - **Pros:** - - Fastest setup (under 15 minutes) - - Minimal infrastructure requirements - - Single-machine deployment - - Easy configuration with docker-compose - - **Use Cases:** - - Development and testing - - Proof of concept - - Small-scale production - - Edge deployments - - - - Production-grade deployment with enterprise features - - - **Available for ASR only.** TTS Kubernetes support coming soon. - - - **Pros:** - - Auto-scaling based on load - - High availability and fault tolerance - - Advanced monitoring with Grafana - - Shared model storage - - **Use Cases:** - - Production workloads - - High-traffic applications - - Multi-region deployments - - Enterprise infrastructure - - - -## Prerequisites - -Before deploying Smallest Self-Host, ensure you have: - - - - Contact **support@smallest.ai** or your Smallest representative to obtain: - - License key for validation - - Container registry credentials - - - - - Provision compute resources: - **For Docker**: Single machine with NVIDIA GPU - - **For Kubernetes**: Cluster with GPU node pool - - - - Install NVIDIA drivers and container runtime: - - NVIDIA Driver 525+ (for A10, A100, L4) - - NVIDIA Driver 470+ (for T4, V100) - - NVIDIA Container Toolkit - - - -## What's Next? - -Choose your deployment path based on your needs: - -### For Quick Start & Testing - - - **Fastest path to get running** (15-30 minutes) Perfect if you're: - - Evaluating Smallest Self-Host for the first time - Building a proof-of-concept - - Setting up a development environment - Running on a single GPU server [Go to - Docker Setup →](/docker/stt/prerequisites) - - -### For Production Deployment - - - - **Full-featured production setup** - - - Auto-scaling (HPA + Cluster Autoscaler) - - High availability across zones - - Grafana monitoring dashboards - - Shared model storage with EFS - - [Setup AWS EKS →](/kubernetes/aws/eks-setup) - - - - **For any Kubernetes cluster** - - - Works on GCP, Azure, on-prem - - Full autoscaling support - - Advanced monitoring - - Production-ready - - [Setup Kubernetes →](/kubernetes/prerequisites) - - - -### Quick Links by Role - - - - Start here: - 1. [Kubernetes Prerequisites](/kubernetes/prerequisites) - Check cluster requirements - 2. [AWS EKS Setup](/kubernetes/aws/eks-setup) - Create EKS cluster (if on AWS) - 3. [Quick Start](/kubernetes/quick-start) - Deploy with Helm - 4. [Autoscaling](/kubernetes/autoscaling/hpa-configuration) - Configure HPA - 5. [Monitoring](/kubernetes/autoscaling/grafana-dashboards) - Setup Grafana - - -{" "} - - - Start here: 1. [Docker Prerequisites](/docker/prerequisites) - Setup local - environment 2. [Docker Quick Start](/docker/stt/quick-start) - Get running in - 15 minutes 3. [API Reference](/api-reference/authentication) - Integrate with - your app 4. [Examples](/api-reference/examples) - See code examples - - -{" "} - - - Start here: 1. [Docker Quick Start](/docker/stt/quick-start) - Fastest way to - test 2. [API Reference](/api-reference/endpoints/transcription) - See what you - can do 3. [Common Issues](/troubleshooting/common-issues) - Get help if stuck - 4. Then move to [Kubernetes](/kubernetes/quick-start) for production - - - - Resources: - - [Common Issues](/troubleshooting/common-issues) - Quick fixes - - [Debugging Guide](/troubleshooting/debugging-guide) - Advanced troubleshooting - - [Logs Analysis](/troubleshooting/logs-analysis) - Interpret error messages - - **Support:** support@smallest.ai - - - - - **Recommendation:** Start with Docker to familiarize yourself with the - components and API. Once you're comfortable, move to Kubernetes for production - deployments with autoscaling and high availability. - diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/cluster-autoscaler.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/cluster-autoscaler.mdx index b41fff4..c063c6a 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/cluster-autoscaler.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/cluster-autoscaler.mdx @@ -36,7 +36,7 @@ graph TD - Create IAM role with autoscaling permissions (see [IAM & IRSA](/kubernetes/aws/iam-irsa)) + Create IAM role with autoscaling permissions (see [IAM & IRSA](/waves/self-host/kubernetes-setup/aws/iam-irsa)) @@ -415,7 +415,7 @@ Import Cluster Autoscaler dashboard: Dashboard ID: 3831 -See [Grafana Dashboards](/kubernetes/autoscaling/grafana-dashboards) +See [Grafana Dashboards](/waves/self-host/kubernetes-setup/autoscaling/grafana-dashboards) ## Troubleshooting @@ -473,12 +473,12 @@ kubectl describe sa cluster-autoscaler -n kube-system kubectl logs -n kube-system -l app.kubernetes.io/name=aws-cluster-autoscaler | grep AccessDenied ``` -Update IAM policy if needed (see [IAM & IRSA](/kubernetes/aws/iam-irsa)) +Update IAM policy if needed (see [IAM & IRSA](/waves/self-host/kubernetes-setup/aws/iam-irsa)) ## Best Practices - + Always tag Auto Scaling Groups: ``` @@ -487,7 +487,7 @@ Update IAM policy if needed (see [IAM & IRSA](/kubernetes/aws/iam-irsa)) ``` - + Configure appropriate min/max for each node group: ```yaml @@ -497,7 +497,7 @@ Update IAM policy if needed (see [IAM & IRSA](/kubernetes/aws/iam-irsa)) ``` - + Protect critical workloads during scale-down: ```yaml @@ -513,13 +513,13 @@ Update IAM policy if needed (see [IAM & IRSA](/kubernetes/aws/iam-irsa)) ``` - + Track scaling decisions in Grafana Set alerts for scale failures - + Periodically test scale-up and scale-down: ```bash @@ -533,15 +533,15 @@ Update IAM policy if needed (see [IAM & IRSA](/kubernetes/aws/iam-irsa)) ## What's Next? - + Configure pod-level autoscaling - + Set up Prometheus metrics - + Visualize autoscaling behavior diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/grafana-dashboards.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/grafana-dashboards.mdx index 0b7422a..fe99d89 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/grafana-dashboards.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/grafana-dashboards.mdx @@ -426,7 +426,7 @@ count(kube_node_info{node=~".*gpu.*"}) * 1.00 ## Best Practices - + Organize dashboards by category: - **Smallest Overview**: High-level metrics @@ -435,7 +435,7 @@ count(kube_node_info{node=~".*gpu.*"}) * 1.00 - **Autoscaling**: HPA and scaling behavior - + Default time ranges for different views: - **Real-time monitoring**: Last 15 minutes @@ -444,7 +444,7 @@ count(kube_node_info{node=~".*gpu.*"}) * 1.00 - **Trends**: Last 7 days - + Mark important events: - Deployments @@ -453,7 +453,7 @@ count(kube_node_info{node=~".*gpu.*"}) * 1.00 - Configuration changes - + Create template dashboards for: - Different environments (dev, staging, prod) @@ -461,7 +461,7 @@ count(kube_node_info{node=~".*gpu.*"}) * 1.00 - Different models - + Save dashboard JSON to git: ```bash @@ -524,16 +524,12 @@ kube-prometheus-stack: ## What's Next? - + Use metrics for autoscaling - + Configure Prometheus metrics - - - Configure license validation - diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/hpa-configuration.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/hpa-configuration.mdx index ebe25a0..b1173d5 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/hpa-configuration.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/hpa-configuration.mdx @@ -341,7 +341,7 @@ maxReplicas: 10 ``` -For complete cost savings during off-hours, use [Cluster Autoscaler](/kubernetes/autoscaling/cluster-autoscaler) to scale nodes to zero. +For complete cost savings during off-hours, use [Cluster Autoscaler](/waves/self-host/kubernetes-setup/autoscaling/cluster-autoscaler) to scale nodes to zero. ## Troubleshooting @@ -432,7 +432,7 @@ Be careful: too aggressive scale-down causes flapping. ## Best Practices - + Choose `targetActiveRequests` based on your model performance: - Larger models (slower inference): Lower target (e.g., 3) @@ -441,7 +441,7 @@ Be careful: too aggressive scale-down causes flapping. Test with load to find optimal value. - + Scale up quickly, scale down slowly: ```yaml @@ -452,7 +452,7 @@ Be careful: too aggressive scale-down causes flapping. Prevents request failures during traffic fluctuations. - + Consider cluster capacity when setting `maxReplicas`: ```yaml @@ -462,16 +462,16 @@ Be careful: too aggressive scale-down causes flapping. Don't set higher than available GPU resources. - + Use Grafana to visualize: - Current vs target metrics - Pod count over time - Scale-up/down events - See [Grafana Dashboards](/kubernetes/autoscaling/grafana-dashboards) + See [Grafana Dashboards](/waves/self-host/kubernetes-setup/autoscaling/grafana-dashboards) - + Regularly load test to verify HPA behavior: ```bash @@ -484,15 +484,15 @@ Be careful: too aggressive scale-down causes flapping. ## What's Next? - + Scale cluster nodes automatically - + Configure Prometheus and custom metrics - + Visualize metrics and scaling behavior diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/metrics-setup.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/metrics-setup.mdx index 4b3dfa6..56f4d54 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/metrics-setup.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/autoscaling/metrics-setup.mdx @@ -87,15 +87,52 @@ prometheus-adapter: Lightning ASR exposes the following metrics: -| Metric | Type | Description | -|--------|------|-------------| -| `asr_active_requests` | Gauge | Current number of active transcription requests | -| `asr_total_requests` | Counter | Total requests processed | -| `asr_failed_requests` | Counter | Total failed requests | -| `asr_request_duration_seconds` | Histogram | Request processing time | -| `asr_model_load_time_seconds` | Gauge | Time to load model on startup | -| `asr_gpu_utilization` | Gauge | GPU utilization percentage | -| `asr_gpu_memory_used_bytes` | Gauge | GPU memory used | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricTypeDescription
asr_active_requestsGaugeCurrent number of active transcription requests
asr_total_requestsCounterTotal requests processed
asr_failed_requestsCounterTotal failed requests
asr_request_duration_secondsHistogramRequest processing time
asr_model_load_time_secondsGaugeTime to load model on startup
asr_gpu_utilizationGaugeGPU utilization percentage
asr_gpu_memory_used_bytesGaugeGPU memory used
## Verify Metrics Setup @@ -468,7 +505,7 @@ kube-prometheus-stack: ## Best Practices - + Pre-compute expensive queries: ```yaml @@ -479,7 +516,7 @@ kube-prometheus-stack: Then use in HPA instead of raw query - + Balance responsiveness vs storage: - Fast autoscaling: 15s @@ -487,7 +524,7 @@ kube-prometheus-stack: - Cost-optimized: 60s - + Always persist Prometheus data: ```yaml @@ -500,7 +537,7 @@ kube-prometheus-stack: ``` - + Track Prometheus performance: - Query duration @@ -509,23 +546,23 @@ kube-prometheus-stack: - TSDB size - + Don't rely on Prometheus UI Use Grafana dashboards for ops - See [Grafana Dashboards](/kubernetes/autoscaling/grafana-dashboards) + See [Grafana Dashboards](/waves/self-host/kubernetes-setup/autoscaling/grafana-dashboards) ## What's Next? - + Use metrics for autoscaling - + Visualize metrics diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/eks-setup.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/eks-setup.mdx index 1c650f8..6168a12 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/eks-setup.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/eks-setup.mdx @@ -163,13 +163,66 @@ Cluster creation takes 15-20 minutes. Monitor progress in the AWS CloudFormation Choose the right GPU instance type for your workload: -| Instance Type | GPU | VRAM | vCPUs | RAM | $/hour* | Recommended For | -|--------------|-----|------|-------|-----|---------|-----------------| -| g5.xlarge | 1x A10G | 24 GB | 4 | 16 GB | $1.00 | Development, testing | -| g5.2xlarge | 1x A10G | 24 GB | 8 | 32 GB | $1.21 | Small production | -| g5.4xlarge | 1x A10G | 24 GB | 16 | 64 GB | $1.63 | Medium production | -| g5.12xlarge | 4x A10G | 96 GB | 48 | 192 GB | $5.67 | High-volume production | -| p3.2xlarge | 1x V100 | 16 GB | 8 | 61 GB | $3.06 | Legacy workloads | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Instance TypeGPUVRAMvCPUsRAM$/hour*Recommended For
g5.xlarge1x A10G24 GB416 GB$1.00Development, testing
g5.2xlarge1x A10G24 GB832 GB$1.21Small production
g5.4xlarge1x A10G24 GB1664 GB$1.63Medium production
g5.12xlarge4x A10G96 GB48192 GB$5.67High-volume production
p3.2xlarge1x V10016 GB861 GB$3.06Legacy workloads
* Approximate on-demand pricing in us-east-1, subject to change @@ -325,7 +378,7 @@ kubectl get pods -n kube-system -l app=efs-csi-controller ## Enable Cluster Autoscaler -See the [Cluster Autoscaler](/kubernetes/autoscaling/cluster-autoscaler) guide for detailed setup. +See the [Cluster Autoscaler](/waves/self-host/kubernetes-setup/autoscaling/cluster-autoscaler) guide for detailed setup. Quick setup: @@ -457,19 +510,19 @@ kubectl logs -n kube-system -l app=ebs-csi-controller ## What's Next? - + Configure IAM roles for service accounts - + Advanced GPU node configuration and optimization - + Set up shared file storage for models - + Enable automatic node scaling diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/gpu-nodes.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/gpu-nodes.mdx index f02e065..be7daa8 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/gpu-nodes.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/gpu-nodes.mdx @@ -357,7 +357,7 @@ managedNodeGroups: ### Install Cluster Autoscaler -See [Cluster Autoscaler](/kubernetes/autoscaling/cluster-autoscaler) for full setup. +See [Cluster Autoscaler](/waves/self-host/kubernetes-setup/autoscaling/cluster-autoscaler) for full setup. Quick enable: @@ -545,7 +545,7 @@ kubectl exec -- nvidia-smi ## Best Practices - + Always use taints and tolerations to prevent non-GPU workloads from running on GPU nodes: ```yaml @@ -556,7 +556,7 @@ kubectl exec -- nvidia-smi ``` - + Always specify GPU resource requests and limits: ```yaml @@ -568,7 +568,7 @@ kubectl exec -- nvidia-smi ``` - + Configure auto-scaling to scale GPU nodes to zero during off-hours: ```yaml @@ -577,7 +577,7 @@ kubectl exec -- nvidia-smi ``` - + Use DCGM exporter and Grafana to monitor GPU metrics: - GPU utilization @@ -586,7 +586,7 @@ kubectl exec -- nvidia-smi - Power consumption - + Regularly test your application's response to spot interruptions: ```bash @@ -598,19 +598,19 @@ kubectl exec -- nvidia-smi ## What's Next? - + Set up shared storage for model caching - + Configure pod autoscaling based on metrics - + Enable automatic node scaling - + Set up Grafana dashboards diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/iam-irsa.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/iam-irsa.mdx index 5cbd487..b4d3f8b 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/iam-irsa.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/aws/iam-irsa.mdx @@ -446,19 +446,19 @@ eksctl utils associate-iam-oidc-provider \ ## Best Practices - + Grant only the minimum permissions required for each service account. Review and audit IAM policies regularly. - + Create separate IAM roles for each service account rather than sharing roles. This improves security and auditability. - + Monitor IAM role usage via CloudTrail: ```bash @@ -467,7 +467,7 @@ eksctl utils associate-iam-oidc-provider \ ``` - + Tag IAM roles and policies for easier management: ```bash @@ -556,11 +556,11 @@ chmod +x setup-irsa.sh ## What's Next? - + Optimize GPU node configuration - + Configure cluster autoscaling diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx index 4e47acc..aa24e9e 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/k8s-troubleshooting.mdx @@ -47,7 +47,7 @@ lightning-asr-xxx 0/1 Pending 0 5m **Causes and Solutions**: - + **Check**: ```bash kubectl describe pod lightning-asr-xxx -n smallest @@ -62,7 +62,7 @@ lightning-asr-xxx 0/1 Pending 0 5m - Reduce requested GPUs or add more nodes - + **Check**: ```bash kubectl get nodes --show-labels @@ -75,7 +75,7 @@ lightning-asr-xxx 0/1 Pending 0 5m - Add labels to nodes: `kubectl label nodes workload=gpu` - + **Check**: ```bash kubectl describe pod lightning-asr-xxx -n smallest | grep -A5 "Tolerations" @@ -93,7 +93,7 @@ lightning-asr-xxx 0/1 Pending 0 5m ``` - + **Check**: ```bash kubectl get pvc -n smallest @@ -127,7 +127,7 @@ Look for errors in Events section. **Solutions**: - + **Error**: `unauthorized: authentication required` **Solutions**: @@ -141,7 +141,7 @@ Look for errors in Events section. ``` - + **Error**: `manifest unknown` or `not found` **Solutions**: @@ -150,7 +150,7 @@ Look for errors in Events section. - Contact support@smallest.ai for access - + **Error**: `rate limit exceeded` **Solutions**: @@ -178,7 +178,7 @@ kubectl describe pod lightning-asr-xxx -n smallest **Common Causes**: - + **Error**: `License validation failed` or `Invalid license key` **Solutions**: @@ -188,7 +188,7 @@ kubectl describe pod lightning-asr-xxx -n smallest - Test License Proxy: `kubectl exec -it -- curl http://license-proxy:3369/health` - + **Error**: `Failed to download model` or `Connection timeout` **Solutions**: @@ -198,7 +198,7 @@ kubectl describe pod lightning-asr-xxx -n smallest - Test URL: `kubectl run test --rm -it --image=curlimages/curl -- curl -I $MODEL_URL` - + **Error**: Pod killed, exit code 137 **Solutions**: @@ -216,7 +216,7 @@ kubectl describe pod lightning-asr-xxx -n smallest - Check node capacity: `kubectl describe node ` - + **Error**: `No CUDA-capable device` or `GPU not found` **Solutions**: @@ -245,7 +245,7 @@ kubectl get endpoints -n smallest **Solutions**: - + **Issue**: Service has no endpoints **Check**: @@ -259,7 +259,7 @@ kubectl get endpoints -n smallest - Check pods are ready: `kubectl get pods -l app=api-server -o wide` - + **Solutions**: - Verify service port: ```bash @@ -268,7 +268,7 @@ kubectl get endpoints -n smallest - Use correct port in connections (7100 for API Server) - + **Check**: ```bash kubectl get networkpolicy -n smallest @@ -300,7 +300,7 @@ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . **Solutions**: - + **Check**: ```bash kubectl get servicemonitor -n smallest @@ -323,7 +323,7 @@ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . Query: `asr_active_requests` - + **Check**: ```bash kubectl get hpa lightning-asr -n smallest @@ -340,7 +340,7 @@ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . ``` - + **Solutions**: - Add more nodes - Enable Cluster Autoscaler @@ -358,7 +358,7 @@ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . **Solutions**: - + **Check**: ```bash kubectl get storageclass @@ -370,7 +370,7 @@ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . - Create storage class - + **Check**: ```bash kubectl describe pod | grep -A10 "Events" @@ -382,7 +382,7 @@ kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1" | jq . - Verify EFS CSI driver: `kubectl get pods -n kube-system -l app=efs-csi-controller` - + **Solutions**: - Check volume permissions - Add fsGroup to pod securityContext: @@ -497,11 +497,11 @@ Include: ## What's Next? - + Platform-agnostic troubleshooting guide - + API integration documentation diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites.mdx deleted file mode 100644 index 56b9702..0000000 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites.mdx +++ /dev/null @@ -1,371 +0,0 @@ ---- -title: Prerequisites -description: Requirements for deploying Smallest Self-Host on Kubernetes ---- - - -**Kubernetes deployment is currently only available for ASR (Speech-to-Text).** TTS (Text-to-Speech) Kubernetes support is coming soon. For TTS deployments, please use [Docker](/docker/tts/quick-start). - - -## Overview - -Before deploying Smallest Self-Host ASR on Kubernetes, ensure your cluster meets the requirements and you have the necessary tools and credentials. - -## Kubernetes Cluster Requirements - -### Minimum Cluster Specifications - - - - **v1.19 or higher** - - v1.24+ recommended - - - - **Minimum 2 nodes** - - - 1 CPU node (control plane/general) - - 1 GPU node (Lightning ASR) - - - - **Minimum cluster capacity** - - - 8 CPU cores - - 32 GB RAM - - 1 NVIDIA GPU - - - - **Persistent volume support** - - - Storage class available - - 100 GB minimum capacity - - - - - - We recommend using L4 or L40s for the best performance. - - -## Required Tools - -Install the following tools on your local machine: - -### Helm - -Helm 3.0 or higher is required. - - - - ```bash - brew install helm - ``` - - - - ```bash - curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash - ``` - - - - ```powershell - choco install kubernetes-helm - ``` - - - -Verify installation: -```bash -helm version -``` - -### kubectl - -Kubernetes CLI tool for cluster management. - - - - ```bash - brew install kubectl - ``` - - - - ```bash - curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" - chmod +x kubectl - sudo mv kubectl /usr/local/bin/ - ``` - - - - ```powershell - choco install kubernetes-cli - ``` - - - -Verify installation: -```bash -kubectl version --client -``` - -## Cluster Access - -### Configure kubectl - -Ensure kubectl is configured to access your cluster: - -```bash -kubectl cluster-info -kubectl get nodes -``` - -Expected output should show your cluster nodes. - -### Test Cluster Access - -Verify you have sufficient permissions: - -```bash -kubectl auth can-i create deployments -kubectl auth can-i create services -kubectl auth can-i create secrets -``` - -All should return `yes`. - -## GPU Support - -### NVIDIA GPU Operator - -For Kubernetes clusters, install the NVIDIA GPU Operator to manage GPU resources. - - -The Smallest Self-Host Helm chart includes the GPU Operator as an optional dependency. You can enable it during installation or install it separately. - - -#### Verify GPU Nodes - -Check that GPU nodes are properly labeled: - -```bash -kubectl get nodes -l node.kubernetes.io/instance-type -``` - -Verify GPU resources are available: - -```bash -kubectl get nodes -o json | jq '.items[].status.capacity' -``` - -Look for `nvidia.com/gpu` in the capacity. - -## Credentials - -Obtain the following from Smallest.ai before installation: - - - - Your unique license key for validation - - **Contact**: support@smallest.ai - - You'll add this to `values.yaml`: - ```yaml - global: - licenseKey: "your-license-key-here" - ``` - - - - Credentials to pull Docker images from `quay.io`: - - Username - - Password - - Email - - **Contact**: support@smallest.ai - - You'll add these to `values.yaml`: - ```yaml - global: - imageCredentials: - username: "your-username" - password: "your-password" - email: "your-email" - ``` - - - - Download URL for ASR models - - **Contact**: support@smallest.ai - - You'll add this to `values.yaml`: - ```yaml - models: - asrModelUrl: "your-model-url" - ``` - - - -## Storage Requirements - -### Storage Class - -Verify a storage class is available: - -```bash -kubectl get storageclass -``` - -You should see at least one storage class marked as `(default)` or available. - -### For AWS Deployments - -If deploying on AWS EKS, you'll need: - -- **EBS CSI Driver** for block storage -- **EFS CSI Driver** for shared file storage (recommended for model storage) - - -See the [AWS Deployment](/kubernetes/aws/eks-setup) guide for detailed setup instructions. - - -## Network Requirements - -### Required Ports - -Ensure the following ports are accessible within the cluster: - -| Port | Service | Purpose | -|------|---------|---------| -| 7100 | API Server | Client API requests | -| 2269 | Lightning ASR | Internal ASR processing | -| 3369 | License Proxy | Internal license validation | -| 6379 | Redis | Internal caching | - -### External Access - -The License Proxy requires outbound HTTPS access to: - -- `console-api.smallest.ai` (port 443) - - -Ensure your cluster's network policies and security groups allow outbound HTTPS traffic from pods. - - -## Optional Components - -### Prometheus & Grafana - -For monitoring and autoscaling based on custom metrics: - -- **Prometheus Operator** (included in chart) -- **Grafana** (included in chart) -- **Prometheus Adapter** (included in chart) - -These are required for: -- Custom metrics-based autoscaling -- Advanced monitoring dashboards -- Performance visualization - -### Cluster Autoscaler - -For automatic node scaling on AWS EKS: - -- IAM role with autoscaling permissions -- IRSA (IAM Roles for Service Accounts) configured - - -See the [Cluster Autoscaler](/kubernetes/autoscaling/cluster-autoscaler) guide for setup. - - -## Namespace - -Decide on a namespace for deployment: - - - - Deploy to the default namespace: - ```bash - kubectl config set-context --current --namespace=default - ``` - - - - Create and use a dedicated namespace: - ```bash - kubectl create namespace smallest - kubectl config set-context --current --namespace=smallest - ``` - - - -## Verification Checklist - -Before proceeding, ensure: - - - - ```bash - kubectl get nodes - ``` - Shows all cluster nodes in Ready state - - - - ```bash - kubectl get nodes -o json | jq '.items[].status.capacity."nvidia.com/gpu"' - ``` - Shows GPU count for GPU nodes - - - - ```bash - helm version - ``` - Shows Helm 3.x - - - - ```bash - kubectl get storageclass - ``` - Shows at least one storage class - - - - - [ ] License key obtained - - [ ] Container registry credentials - - [ ] Model download URL - - - - ```bash - kubectl top nodes - ``` - Shows available resources for deployment - - - -## AWS-Specific Prerequisites - -If deploying on AWS EKS, see: - - - Complete guide for setting up EKS cluster with GPU support - - -## What's Next? - -Once all prerequisites are met, proceed to the quick start: - - - Deploy Smallest Self-Host with Helm - - diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/credentials.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/credentials.mdx new file mode 100644 index 0000000..cf7aad7 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/credentials.mdx @@ -0,0 +1,85 @@ +--- +title: Credentials & Access +description: License keys and registry credentials for Kubernetes STT deployment +--- + +## Required Credentials + +Obtain the following from Smallest.ai before installation: + + + + Your unique license key for validation + + **Contact**: support@smallest.ai + + You'll add this to `values.yaml`: + ```yaml + global: + licenseKey: "your-license-key-here" + ``` + + + + Credentials to pull Docker images from `quay.io`: + - Username + - Password + - Email + + **Contact**: support@smallest.ai + + You'll add these to `values.yaml`: + ```yaml + global: + imageCredentials: + username: "your-username" + password: "your-password" + email: "your-email" + ``` + + + + Download URL for ASR models + + **Contact**: support@smallest.ai + + You'll add this to `values.yaml`: + ```yaml + models: + asrModelUrl: "your-model-url" + ``` + + + +## Create Kubernetes Secret + +Alternatively, create a secret for registry credentials: + +```bash +kubectl create secret docker-registry smallest-registry \ + --docker-server=quay.io \ + --docker-username= \ + --docker-password= \ + --docker-email= \ + -n smallest +``` + +## Namespace Setup + + + + Deploy to the default namespace: + ```bash + kubectl config set-context --current --namespace=default + ``` + + + + Create and use a dedicated namespace: + ```bash + kubectl create namespace smallest + kubectl config set-context --current --namespace=smallest + ``` + + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/hardware-requirements.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/hardware-requirements.mdx new file mode 100644 index 0000000..c3f8015 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/hardware-requirements.mdx @@ -0,0 +1,84 @@ +--- +title: Hardware Requirements +description: Cluster and hardware specifications for Kubernetes STT deployment +--- + +## Cluster Requirements + + + + **v1.19 or higher** + + v1.24+ recommended + + + + **Minimum 2 nodes** + + - 1 CPU node (control plane/general) + - 1 GPU node (Lightning ASR) + + + + **Minimum cluster capacity** + + - 8 CPU cores + - 32 GB RAM + - 1 NVIDIA GPU + + + + **Persistent volume support** + + - Storage class available + - 100 GB minimum capacity + + + + +We recommend using L4 or L40s GPUs for the best performance. + + +## Network Requirements + +Ensure the following ports are accessible within the cluster: + +| Port | Service | Purpose | +|------|---------|---------| +| 7100 | API Server | Client API requests | +| 2269 | Lightning ASR | Internal ASR processing | +| 3369 | License Proxy | Internal license validation | +| 6379 | Redis | Internal caching | + +### External Access + +The License Proxy requires outbound HTTPS access to: +- `api.smallest.ai` (port 443) + + +Ensure your cluster's network policies and security groups allow outbound HTTPS traffic from pods. + + +## Storage Requirements + +### Storage Class + +Verify a storage class is available: + +```bash +kubectl get storageclass +``` + +You should see at least one storage class marked as `(default)` or available. + +### For AWS Deployments + +If deploying on AWS EKS, you'll need: + +- **EBS CSI Driver** for block storage +- **EFS CSI Driver** for shared file storage (recommended for model storage) + + +See the [AWS EKS Setup](/waves/self-host/kubernetes-setup/aws/eks-setup) guide for detailed setup instructions. + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/software-requirements.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/software-requirements.mdx new file mode 100644 index 0000000..f351d3d --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/software-requirements.mdx @@ -0,0 +1,146 @@ +--- +title: Software Requirements +description: Tools and software for Kubernetes STT deployment +--- + +## Required Tools + +Install the following tools on your local machine. + +### Helm + +Helm 3.0 or higher is required. + + + + ```bash + brew install helm + ``` + + + + ```bash + curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash + ``` + + + + ```powershell + choco install kubernetes-helm + ``` + + + +Verify installation: +```bash +helm version +``` + +### kubectl + +Kubernetes CLI tool for cluster management. + + + + ```bash + brew install kubectl + ``` + + + + ```bash + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + sudo mv kubectl /usr/local/bin/ + ``` + + + + ```powershell + choco install kubernetes-cli + ``` + + + +Verify installation: +```bash +kubectl version --client +``` + +## Cluster Access + +### Configure kubectl + +Ensure kubectl is configured to access your cluster: + +```bash +kubectl cluster-info +kubectl get nodes +``` + +Expected output should show your cluster nodes. + +### Test Cluster Access + +Verify you have sufficient permissions: + +```bash +kubectl auth can-i create deployments +kubectl auth can-i create services +kubectl auth can-i create secrets +``` + +All should return `yes`. + +## GPU Support + +### NVIDIA GPU Operator + +For Kubernetes clusters, install the NVIDIA GPU Operator to manage GPU resources. + + +The Smallest Self-Host Helm chart includes the GPU Operator as an optional dependency. You can enable it during installation or install it separately. + + +#### Verify GPU Nodes + +Check that GPU nodes are properly labeled: + +```bash +kubectl get nodes -l node.kubernetes.io/instance-type +``` + +Verify GPU resources are available: + +```bash +kubectl get nodes -o json | jq '.items[].status.capacity' +``` + +Look for `nvidia.com/gpu` in the capacity. + +## Optional Components + +### Prometheus & Grafana + +For monitoring and autoscaling based on custom metrics: + +- **Prometheus Operator** (included in chart) +- **Grafana** (included in chart) +- **Prometheus Adapter** (included in chart) + +These are required for: +- Custom metrics-based autoscaling +- Advanced monitoring dashboards +- Performance visualization + +### Cluster Autoscaler + +For automatic node scaling on AWS EKS: + +- IAM role with autoscaling permissions +- IRSA (IAM Roles for Service Accounts) configured + + +See the [Cluster Autoscaler](/waves/self-host/kubernetes-setup/autoscaling/cluster-autoscaler) guide for setup. + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/verification.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/verification.mdx new file mode 100644 index 0000000..4677cda --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/prerequisites/verification.mdx @@ -0,0 +1,98 @@ +--- +title: Verification Checklist +description: Verify all prerequisites before deploying STT on Kubernetes +--- + +## Pre-Deployment Checklist + +Before proceeding, ensure each item passes: + + + + ```bash + kubectl get nodes + ``` + Shows all cluster nodes in Ready state + + + + ```bash + kubectl get nodes -o json | jq '.items[].status.capacity."nvidia.com/gpu"' + ``` + Shows GPU count for GPU nodes + + + + ```bash + helm version + ``` + Shows Helm 3.x + + + + ```bash + kubectl get storageclass + ``` + Shows at least one storage class + + + + - [ ] License key obtained + - [ ] Container registry credentials + - [ ] Model download URL + + + + ```bash + kubectl top nodes + ``` + Shows available resources for deployment + + + + ```bash + kubectl auth can-i create deployments + kubectl auth can-i create services + kubectl auth can-i create secrets + ``` + All return `yes` + + + +## Quick Verification Script + +Run this script to check all prerequisites at once: + +```bash +#!/bin/bash +echo "=== Kubernetes STT Prerequisites Check ===" + +echo -n "kubectl: " +kubectl version --client &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Helm: " +helm version &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "Cluster Access: " +kubectl cluster-info &>/dev/null && echo "OK" || echo "FAILED" + +echo -n "Nodes Ready: " +kubectl get nodes | grep -q "Ready" && echo "OK" || echo "FAILED" + +echo -n "Storage Class: " +kubectl get storageclass &>/dev/null && echo "OK" || echo "MISSING" + +echo -n "GPU Resources: " +kubectl get nodes -o json | jq -e '.items[].status.capacity."nvidia.com/gpu"' &>/dev/null && echo "OK" || echo "NOT DETECTED" + +echo "=== Check Complete ===" +``` + +## AWS-Specific Prerequisites + +If deploying on AWS EKS, see: + + + Complete guide for setting up EKS cluster with GPU support + + diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/quick-start.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/quick-start.mdx index f8a9815..899b4b2 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/quick-start.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/quick-start.mdx @@ -4,11 +4,11 @@ description: Deploy Smallest Self-Host on Kubernetes with Helm --- -Kubernetes deployment is currently available for **ASR (Speech-to-Text)** only. For TTS deployments, use [Docker](/docker/tts/quick-start). +Kubernetes deployment is currently available for **ASR (Speech-to-Text)** only. For TTS deployments, use [Docker](/waves/self-host/docker-setup/tts-deployment/quick-start). -Ensure you've completed all [prerequisites](/kubernetes/prerequisites) before starting. +Ensure you've completed all [prerequisites](/waves/self-host/kubernetes-setup/prerequisites/hardware-requirements) before starting. ## Add Helm Repository @@ -75,12 +75,37 @@ Monitor the deployment: kubectl get pods -w ``` -| Component | Startup Time | Ready Indicator | -|-----------|--------------|-----------------| -| Redis | ~30s | `1/1 Running` | -| License Proxy | ~1m | `1/1 Running` | -| Lightning ASR | 2-10m | `1/1 Running` (model download on first run) | -| API Server | ~30s | `1/1 Running` | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ComponentStartup TimeReady Indicator
Redis~30s1/1 Running
License Proxy~1m1/1 Running
Lightning ASR2-10m1/1 Running (model download on first run)
API Server~30s1/1 Running
Model downloads are cached when using shared storage (EFS). Subsequent starts complete in under a minute. @@ -94,12 +119,37 @@ kubectl get pods,svc All pods should show `Running` status with the following services available: -| Service | Port | Description | -|---------|------|-------------| -| api-server | 7100 | REST API endpoint | -| lightning-asr-internal | 2269 | ASR inference service | -| license-proxy | 3369 | License validation | -| redis-master | 6379 | Request queue | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ServicePortDescription
api-server7100REST API endpoint
lightning-asr-internal2269ASR inference service
license-proxy3369License validation
redis-master6379Request queue
## Test the API @@ -125,10 +175,30 @@ scaling: This deploys HorizontalPodAutoscalers that scale based on active requests: -| Component | Metric | Default Target | Behavior | -|-----------|--------|----------------|----------| -| Lightning ASR | `asr_active_requests` | 4 per pod | Scales GPU workers based on inference queue depth | -| API Server | `lightning_asr_replica_count` | 2:1 ratio | Maintains API capacity proportional to ASR workers | + + + + + + + + + + + + + + + + + + + + + + + +
ComponentMetricDefault TargetBehavior
Lightning ASRasr_active_requests4 per podScales GPU workers based on inference queue depth
API Serverlightning_asr_replica_count2:1 ratioMaintains API capacity proportional to ASR workers
### How It Works @@ -193,31 +263,56 @@ helm get values smallest-self-host -n smallest ## Troubleshooting -| Issue | Cause | Resolution | -|-------|-------|------------| -| Pods `Pending` | Insufficient resources or missing GPU nodes | Check `kubectl describe pod ` for scheduling errors | -| `ImagePullBackOff` | Invalid registry credentials | Verify `imageCredentials` in values.yaml | -| `CrashLoopBackOff` | Invalid license or insufficient memory | Check logs with `kubectl logs --previous` | -| Slow model download | Large model size (~20GB) | Use shared storage (EFS) for caching | - -For detailed troubleshooting, see [Troubleshooting Guide](/kubernetes/k8s-troubleshooting). + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
IssueCauseResolution
Pods PendingInsufficient resources or missing GPU nodesCheck kubectl describe pod <name> for scheduling errors
ImagePullBackOffInvalid registry credentialsVerify imageCredentials in values.yaml
CrashLoopBackOffInvalid license or insufficient memoryCheck logs with kubectl logs <pod> --previous
Slow model downloadLarge model size (~20GB)Use shared storage (EFS) for caching
+ +For detailed troubleshooting, see [Troubleshooting Guide](/waves/self-host/kubernetes-setup/k8s-troubleshooting). ## Next Steps - + EKS-specific configuration - + Shared storage for faster cold starts - + Fine-tune scaling behavior and thresholds - + Grafana dashboards and alerting diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/efs-configuration.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/efs-configuration.mdx index 8f6830a..713084b 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/efs-configuration.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/efs-configuration.mdx @@ -10,19 +10,19 @@ Amazon Elastic File System (EFS) provides shared, persistent file storage for Ku ## Benefits of EFS - + Multiple pods can read/write simultaneously (ReadWriteMany) - + Storage grows and shrinks automatically - + Models cached once, used by all pods - + Pay only for storage used, no upfront provisioning @@ -31,7 +31,7 @@ Amazon Elastic File System (EFS) provides shared, persistent file storage for Ku - Install the EFS CSI driver (see [IAM & IRSA](/kubernetes/aws/iam-irsa) guide) + Install the EFS CSI driver (see [IAM & IRSA](/waves/self-host/kubernetes-setup/aws/iam-irsa) guide) ```bash kubectl get pods -n kube-system -l app=efs-csi-controller @@ -459,11 +459,11 @@ EBS volumes can only be attached to one pod at a time. This prevents horizontal ## What's Next? - + Optimize model storage and caching strategies - + Enable autoscaling with shared model storage diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/model-storage.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/model-storage.mdx index a5da6a5..b378d45 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/model-storage.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/model-storage.mdx @@ -40,7 +40,7 @@ scaling: maxReplicas: 10 ``` -See [EFS Configuration](/kubernetes/storage/efs-configuration) for setup. +See [EFS Configuration](/waves/self-host/kubernetes-setup/storage-pvc/efs-configuration) for setup. ### Strategy 2: Container Image with Baked Model @@ -343,23 +343,58 @@ lightningAsr: ## Performance Comparison -| Strategy | First Start | Subsequent Starts | Scaling Speed | Cost | -|----------|-------------|-------------------|---------------|------| -| **EFS Shared** | 5-10 min | 30-60 sec | Fast | Medium | -| **Baked Image** | 3-5 min | 3-5 min | Medium | Low | -| **EmptyDir** | 5-10 min | 5-10 min | Slow | Low | -| **S3 Init** | 2-5 min | 2-5 min | Medium | Low | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
StrategyFirst StartSubsequent StartsScaling SpeedCost
EFS Shared5-10 min30-60 secFastMedium
Baked Image3-5 min3-5 minMediumLow
EmptyDir5-10 min5-10 minSlowLow
S3 Init2-5 min2-5 minMediumLow
## Best Practices - + Always use shared storage (EFS) for production deployments with autoscaling. The cost savings from reduced download time and faster scaling far outweigh EFS costs. - + Watch logs during first deployment: ```bash @@ -369,7 +404,7 @@ lightningAsr: Look for download progress indicators. - + Ensure sufficient storage for models: ```yaml @@ -385,7 +420,7 @@ lightningAsr: ``` - + Test new models in separate deployment before updating production: ```bash @@ -446,15 +481,15 @@ kubectl delete pod -n smallest ## What's Next? - + Set up EFS for shared model storage - + Configure Redis data persistence - + Enable autoscaling with fast pod startup diff --git a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/redis-persistence.mdx b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/redis-persistence.mdx index eb7ba7a..5b5f0f7 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/redis-persistence.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/kubernetes/storage/redis-persistence.mdx @@ -523,7 +523,7 @@ kubectl exec -it -n smallest -- redis-cli CONFIG GET appendonly ## Best Practices - + Enable password authentication even for internal Redis: ```yaml @@ -534,7 +534,7 @@ kubectl exec -it -n smallest -- redis-cli CONFIG GET appendonly ``` - + Use AOF for maximum durability: ```yaml @@ -548,7 +548,7 @@ kubectl exec -it -n smallest -- redis-cli CONFIG GET appendonly ``` - + At least 2 replicas for high availability: ```yaml @@ -558,7 +558,7 @@ kubectl exec -it -n smallest -- redis-cli CONFIG GET appendonly ``` - + Use Redis exporter for Prometheus: ```bash @@ -567,7 +567,7 @@ kubectl exec -it -n smallest -- redis-cli CONFIG GET appendonly ``` - + Schedule automatic backups: - ElastiCache: Enable automatic backups @@ -578,11 +578,11 @@ kubectl exec -it -n smallest -- redis-cli CONFIG GET appendonly ## What's Next? - + Configure autoscaling for Lightning ASR - + Set up Prometheus metrics collection diff --git a/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/common-issues.mdx b/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/common-issues.mdx index 6863748..66afc48 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/common-issues.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/common-issues.mdx @@ -33,7 +33,7 @@ This guide provides quick solutions to the most common issues encountered with S Contact license server directly: ```bash - curl -H "Authorization: Bearer ${LICENSE_KEY}" https://console-api.smallest.ai/validate + curl -H "Authorization: Bearer ${LICENSE_KEY}" https://api.smallest.ai/validate ```
@@ -129,7 +129,7 @@ This guide provides quick solutions to the most common issues encountered with S **Quick Fix**: - + ```bash nvidia-smi kubectl exec -it -- nvidia-smi @@ -145,7 +145,7 @@ This guide provides quick solutions to the most common issues encountered with S - Add more GPU nodes - + ```bash kubectl describe pod | grep -A5 "Limits" ``` @@ -160,7 +160,7 @@ This guide provides quick solutions to the most common issues encountered with S ``` - + ```bash sudo nvidia-smi -pm 1 ``` @@ -370,12 +370,12 @@ Response will include: 1. **Test connectivity**: ```bash - curl -v https://console-api.smallest.ai + curl -v https://api.smallest.ai ``` 2. **Check firewall rules**: - Allow outbound HTTPS (port 443) - - Whitelist `console-api.smallest.ai` + - Whitelist `api.smallest.ai` 3. **Review network policies** (Kubernetes): ```bash @@ -449,15 +449,15 @@ curl -X POST http://localhost:7100/v1/listen \ If issues persist: - + Advanced debugging techniques - + Interpret logs and error messages - + Email: **support@smallest.ai** Include: diff --git a/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/debugging-guide.mdx b/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/debugging-guide.mdx index 21bb886..e44b15c 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/debugging-guide.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/debugging-guide.mdx @@ -527,11 +527,11 @@ spec: ## What's Next? - + Learn to interpret logs and errors - + Quick fixes for frequent problems diff --git a/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/logs-analysis.mdx b/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/logs-analysis.mdx index 0276bda..06c7d9e 100644 --- a/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/logs-analysis.mdx +++ b/fern/products/waves/versions/v4.0.0/on-prem/troubleshooting/logs-analysis.mdx @@ -11,13 +11,42 @@ Understanding log messages is crucial for diagnosing issues. This guide helps yo All components use standard log levels: -| Level | Description | Example | -|-------|-------------|---------| -| `DEBUG` | Detailed diagnostic info | Variable values, function calls | -| `INFO` | Normal operation events | Request received, model loaded | -| `WARNING` | Potential issues | Slow response, retry attempt | -| `ERROR` | Error that needs attention | Failed request, connection error | -| `CRITICAL` | Severe error | Service crash, unrecoverable error | + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
LevelDescriptionExample
DEBUGDetailed diagnostic infoVariable values, function calls
INFONormal operation eventsRequest received, model loaded
WARNINGPotential issuesSlow response, retry attempt
ERRORError that needs attentionFailed request, connection error
CRITICALSevere errorService crash, unrecoverable error
## Lightning ASR Logs @@ -48,7 +77,7 @@ INFO: Confidence: 0.95 ### Common Errors - + ```log ERROR: No CUDA-capable device detected ERROR: nvidia-smi command not found @@ -63,7 +92,7 @@ INFO: Confidence: 0.95 - Check NVIDIA Container Toolkit (Docker) - + ```log ERROR: CUDA out of memory ERROR: Tried to allocate 2.5GB but only 1.2GB available @@ -78,7 +107,7 @@ INFO: Confidence: 0.95 - Scale horizontally (more pods) - + ```log INFO: Downloading model from https://example.com/model.bin WARNING: Download attempt 1 failed: Connection timeout @@ -95,7 +124,7 @@ INFO: Confidence: 0.95 - Use shared storage (EFS) - + ```log ERROR: Failed to process audio: req_xyz789 ERROR: Unsupported audio format: audio/webm @@ -138,7 +167,7 @@ INFO: Status: 200 OK ### Common Errors - + ```log WARNING: Invalid license key from 10.0.1.5 WARNING: Missing Authorization header @@ -153,7 +182,7 @@ INFO: Status: 200 OK - Renew expired license - + ```log ERROR: No Lightning ASR workers available WARNING: Request queued: req_abc123 @@ -168,7 +197,7 @@ INFO: Status: 200 OK - Check HPA configuration - + ```log ERROR: Request timeout after 300s ERROR: Lightning ASR pod not responding: lightning-asr-0 @@ -191,7 +220,7 @@ INFO: Status: 200 OK ```log INFO: Starting License Proxy v1.0.0 INFO: License key loaded -INFO: Connecting to console-api.smallest.ai +INFO: Connecting to api.smallest.ai INFO: License validated successfully INFO: License valid until: 2025-12-31T23:59:59Z INFO: Grace period: 24 hours @@ -210,7 +239,7 @@ INFO: Usage reported successfully ### Common Errors - + ```log ERROR: License validation failed: Invalid license key ERROR: License server returned 401 Unauthorized @@ -225,9 +254,9 @@ INFO: Usage reported successfully - Contact support@smallest.ai - + ```log - WARNING: Connection to console-api.smallest.ai failed + WARNING: Connection to api.smallest.ai failed WARNING: Connection timeout after 10s INFO: Using cached validation INFO: Grace period active (23h remaining) @@ -236,12 +265,12 @@ INFO: Usage reported successfully **Cause**: Network connectivity issue **Solution**: - - Test: `curl https://console-api.smallest.ai` + - Test: `curl https://api.smallest.ai` - Check firewall allows HTTPS - Restore connectivity before grace period expires - + ```log WARNING: Grace period expires in 1 hour WARNING: Cannot connect to license server @@ -271,7 +300,7 @@ DB 0: 1523 keys (expires: 0) ### Common Errors - + ```log WARNING: Memory usage: 95% ERROR: OOM command not allowed when used memory > 'maxmemory' @@ -283,7 +312,7 @@ DB 0: 1523 keys (expires: 0) - Clear old keys - + ```log ERROR: Failed writing the RDB file ERROR: Disk is full @@ -470,7 +499,7 @@ spec: ## Best Practices - + Prefer JSON format for easier parsing: ```json @@ -484,7 +513,7 @@ spec: ``` - + Always include relevant context in logs: - Request ID @@ -493,7 +522,7 @@ spec: - User/session info (if applicable) - + Use correct log levels: - DEBUG: Development only @@ -503,7 +532,7 @@ spec: - CRITICAL: Service-breaking issues - + Use centralized logging: - ELK Stack (Elasticsearch, Logstash, Kibana) @@ -516,11 +545,11 @@ spec: ## What's Next? - + Quick solutions to frequent problems - + Advanced debugging techniques diff --git a/fern/products/waves/versions/v4.0.0/product/projects.mdx b/fern/products/waves/versions/v4.0.0/product/projects.mdx index 1d603fe..752a469 100644 --- a/fern/products/waves/versions/v4.0.0/product/projects.mdx +++ b/fern/products/waves/versions/v4.0.0/product/projects.mdx @@ -112,7 +112,7 @@ Welcome to the official documentation for our text-to-speech (TTS) project. Our ### Installation & Setup -1. Register for an account and +1. Register for an account and log into the platform. 2. Create a new project or open an existing one. 3. Add or paste your text content to the project. diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/word-boosting.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/word-boosting.mdx deleted file mode 100644 index 1b23e2d..0000000 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/features/word-boosting.mdx +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: 'Word Boosting' -description: 'Improve recognition accuracy for important keywords' ---- - -Real-Time - -Word Boosting (also known as Keyword/Keyterm Prompting) allows you to improve Keyword Recall Rate (KRR) for important keywords or phrases by up to 90%. This feature helps ensure accurate transcription of industry-specific terminology, product names, company names, and specialized jargon. - -## Enabling Word Boosting - -Add `keywords` parameter as an array of strings in the format `word:weight` where weight is a positive number indicating boost intensity. - - -Word Boosting is currently only available for the Real-Time WebSocket API. - - -### Real-Time WebSocket API - -```javascript -const url = new URL("wss://waves-api.smallest.ai/api/v1/lightning/get_text"); -url.searchParams.append("language", "en"); -url.searchParams.append("encoding", "linear16"); -url.searchParams.append("sample_rate", "16000"); -url.searchParams.append("keywords", JSON.stringify(["nacho:5.0", "bacon cheeseburger:4.0"])); - -const ws = new WebSocket(url.toString(), { - headers: { - Authorization: `Bearer ${API_KEY}`, - }, -}); -``` - -## Case Sensitivity and Formatting - -Keywords preserve formatting (including case and punctuation) which helps control how proper nouns, product names, or company names are transcribed. - -Best practices for keyword formatting: - -- For proper nouns (names, brands, titles): Use appropriate capitalization (`Waves`, `iPhone`, `Dr. Smith`) -- For non-proper nouns: Use lowercase (`tretinoin`, `algorithm`, `protocol`) - -When smart formatting is applied to the transcript, words that start sentences may be automatically capitalized regardless of keyword formatting. - -## Expected Improvements - -Word Boosting can significantly improve recognition accuracy and confidence scores for industry-specific terminology. Typical improvements include: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
SourceBefore RecognitionAfter Recognition
nacho stack double crunch taco"macho stack""nacho stack double crunch taco"
bacon cheeseburger"bake in cheeseburger""bacon cheeseburger"
account number"a count number""account number"
billing department"building department""billing department"
technical support"tech nil call support""technical support"
- -### Best Practices for Keyword Selection - - - - Medical terms (`tretinoin:5.0`, `diagnosis:4.0`), technical jargon (`escalation:4.5`, `API:3.5`) - - - Brand names (`Waves:5.0`, `iPhone:5.0`), service names, competitor names - - - Common phrases in your domain (`account number:4.0`, `customer service:3.5`) - - - Names, brands, titles with appropriate capitalization (`Dr. Smith:5.0`) - - - Use lowercase (`algorithm:3.0`, `protocol:3.0`, `refill:4.0`) - - - -### What to Avoid - - - - Very common words that are rarely misrecognized (`the`, `and`, `is`) - don't boost these - - - Words that appear in many contexts without specific meaning - - - Focus on the most important 20-50 terms with appropriate weight values - - - Ensure capitalization matches your desired output - - - Avoid using only very high weights (10+) or very low weights (1) - use a range (1-10) to differentiate importance levels - - \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/features.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/features.mdx deleted file mode 100644 index 186dcc1..0000000 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/pre-recorded/features.mdx +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: 'Features' -description: 'Available features for Pre-Recorded Lightning STT API' ---- - -The Pre-Recorded Lightning STT API supports the following features: - -## Available Features - - - - Get precise timing information for each word in the transcription - - - Automatically detect the language of the audio - - - Identify and label different speakers in the audio - - - Predict demographic attributes alongside transcription - - - Detect emotional tone in the transcribed speech - - - diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/features.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/features.mdx deleted file mode 100644 index 75d3127..0000000 --- a/fern/products/waves/versions/v4.0.0/speech-to-text-new/realtime/features.mdx +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: 'Features' -description: 'Available features for Real-Time Lightning STT WebSocket API' ---- - -The Real-Time Lightning STT WebSocket API supports the following features: - -## Available Features - - - - Get precise timing information for each word in the transcription with confidence scores - - - Automatically detect the language of the audio - - - Get sentence-level transcription segments with timing information - - - Automatically redact personally identifiable information and payment card information - - - Get cumulative transcript received up to this point in responses where is_final is true - - - Control how numbers are formatted in transcriptions (digits, words, or auto-detect) - - - Improve recognition accuracy for important words - - - Identify and label different speakers in the audio with speaker confidence scores - - - diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/asr-benchmark.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/asr-benchmark.mdx deleted file mode 100644 index 47ead0a..0000000 --- a/fern/products/waves/versions/v4.0.0/speech-to-text/asr-benchmark.mdx +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: "ASR Benchmarks" -description: "Performance comparison of streaming transcription models across accuracy, latency, cost, and multilingual capabilities" -icon: "vials" ---- - -## Summary -Best Overall Model: 🏆 **smallestai_streaming** delivers the lowest average Word Error Rate (WER) across English, Hindi, and code-switched audio, while also handling noisy and disfluent speech effectively. It edges out deepgram_nova3_streaming in amost every category, making it the top choice for multilingual and mixed-context scenarios. - -Note: AssemblyAI’s streaming ASR does not support Hindi or code-switched transcription, it is English-only in streaming mode. - -## Test Dataset - Categories Overview - -The internal test dataset comprises a diverse set of speech samples designed to evaluate ASR performance across real-world conditions, including language variation, background noise, and natural conversation artifacts. - -- **Code-Switched**: Hindi-English mix within the same conversation. - Example: *"यह movie बहुत अच्छी थी but ending थोड़ी confusing लगी"* - - **How it was created:** Generated using our in-house TTS on curated code-switched text, ensuring natural alternation between languages in the same sentence or utterance. - -- **Hindi**: Traditional Hindi in Devanagari. - Example: *"हमने उसका जन्मदिन मनाया"* - - **How it was created:** Recorded and synthesized from native Hindi speakers using varied topics, from casual conversations to descriptive narratives. - -- **English**: Standard English in Latin script. - Example: *"jovial joggers joyfully joined jogging jaunts justifying joyful jolliness"* - - **How it was created:** Includes tongue twisters, technical/scientific terminology, and diverse domains such as technology, healthcare, and finance to evaluate robustness across vocabularies. - -- **Disfluency**: Audio containing hesitation words, repetitions, and self-corrections. - Example: *"see uh uh i i when i went i thought the food was not good"* - - **How it was created:** Real recordings of people speaking naturally, sourced from **Atoms** call recordings to capture genuine in-the-wild speech patterns. - -- **Noisy**: Audio with background interference, low quality mics, or multiple speakers. - - **How it was created:** Real recordings from **[Atoms](https://smallest.ai/voice-agents)** call data with actual customer interactions, ambient sounds, and overlapping speech, replicating real-world ASR deployment conditions. - - -## Model Overview - -| Model Name | Provider | Type | -|------------|----------|------| -| smallestai_streaming | Smallest AI | WebSocket Streaming | -| gpt4o_mini_streaming | OpenAI | WebSocket Streaming | -| gpt4o_streaming | OpenAI | WebSocket Streaming | -| assemblyai_streaming | Assembly AI | WebSocket Streaming | -| deepgram_nova3_streaming | Deepgram | WebSocket Streaming | - - -## Performance Benchmarks - -### Accuracy Metrics -| Rank | Model | English WER | Hindi WER | Code-Switched WER | Disfluency Terms | Noisy WER | Overall WER | -| ---- | ------------------------------ | ----------- | --------- | ----------------- | ---------------- | --------- | ----------- | -| 1 | **smallestai\_streaming** | 2.10% | 22.74% | 12.33% | 9.99% | 15.52% | **12.53%** | -| 2 | **deepgram\_nova3\_streaming** | 2.05% | 23.10% | 10.90% | 10.20% | 15.90% | 12.66% | -| 3 | **gpt4o\_streaming** | 10.19% | 9.93% | 29.58% | 12.00% | 22.06% | 16.75% | -| 4 | **gpt4o\_mini\_streaming** | 11.11% | 12.28% | 36.97% | 15.19% | 20.47% | 19.20% | -| 5 | **assemblyai\_streaming** | 3.94% | - | - | 14.01% | 14.56% | 10.83% | - - - diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/asr-best-practices.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/asr-best-practices.mdx deleted file mode 100644 index 427957d..0000000 --- a/fern/products/waves/versions/v4.0.0/speech-to-text/asr-best-practices.mdx +++ /dev/null @@ -1,182 +0,0 @@ ---- -title: "ASR Best Practices" -description: "Optimization, performance, and implementation best practices for the ASR WebSocket API" -icon: "comment-medical" ---- - -# ASR WebSocket Best Practices - -This guide covers best practices for implementing, optimizing, and troubleshooting the Waves ASR WebSocket API for production applications. - -## Audio Quality Optimization - -### Sample Rate Selection - -Choose the optimal sample rate for your use case: - - - - **Best for:** Speech recognition, real-time applications - - Optimal balance of quality and performance - - Lower bandwidth requirements - - Faster processing times - - - - **Best for:** Telephony applications - - Standard for phone call quality - - Minimal bandwidth usage - - Good for voice-only content - - - - **Best for:** High-fidelity audio - - Music or broadcast content - - Higher accuracy for complex audio - - Increased bandwidth and processing time - - - - **Best for:** Video/multimedia - - Professional audio production - - Maximum quality requirements - - Higher resource consumption - - - -### Audio Format Guidelines - -**Recommended Configuration:** -```javascript -const optimalConfig = { - audioEncoding: 'linear16', - audioSampleRate: '16000', - audioChannels: '1', // Mono for efficiency - addPunctuation: 'true', -}; -``` - -**Format-Specific Tips:** - - - - **16-bit Linear PCM (Recommended)** - - Uncompressed, high quality - - Predictable bandwidth usage - - Wide compatibility - - Best accuracy/performance ratio - - - - **Opus Compressed** - - Lower bandwidth (good for mobile) - - Browser-native support - - Variable bitrate compression - - Slightly higher latency - - - - **FLAC Compressed** - - Lossless compression - - Good for archival audio - - Higher CPU usage for encoding - - Larger chunks than Opus - - - - **μ-law Encoded** - - Telephony standard - - 8-bit encoding (lower quality) - - Very low bandwidth - - Best for phone call audio - - - -### Audio Preprocessing - -Implement client-side audio processing for better results: - -```javascript -// Audio preprocessing example -function preprocessAudio(audioBuffer) { - const processedBuffer = new Float32Array(audioBuffer.length); - - // 1. Normalize audio levels - const maxValue = Math.max(...audioBuffer.map(Math.abs)); - const normalizationFactor = maxValue > 0 ? 0.8 / maxValue : 1; - - // 2. Apply normalization and basic filtering - for (let i = 0; i < audioBuffer.length; i++) { - let sample = audioBuffer[i] * normalizationFactor; - - // Simple high-pass filter to reduce low-frequency noise - if (i > 0) { - sample = sample - 0.95 * processedBuffer[i - 1]; - } - - processedBuffer[i] = sample; - } - - return processedBuffer; -} - -// Apply in audio processor -processor.onaudioprocess = (e) => { - const inputData = e.inputBuffer.getChannelData(0); - const processedData = preprocessAudio(inputData); - - // Convert to Int16 and send - const int16Data = new Int16Array(processedData.length); - for (let i = 0; i < processedData.length; i++) { - int16Data[i] = Math.max(-32768, Math.min(32767, processedData[i] * 32768)); - } - - if (ws.readyState === WebSocket.OPEN) { - ws.send(int16Data.buffer); - } -}; -``` - -## Troubleshooting Guide - -### Common Issues and Solutions - - -**Symptoms:** -- WebSocket connection fails immediately -- Connection timeout errors -- "Failed to connect" messages - -**Solutions:** -1. **Verify API Key:** Ensure your API key is valid and properly formatted -2. **Check Subscription:** Confirm you have an active Enterprise plan -3. **Network Issues:** Test connectivity and check firewall settings -4. **Rate Limiting:** Implement exponential backoff for reconnections - - - - -**Symptoms:** -- WebSocket connects but no transcription responses -- Audio sent but no text returned -- Silent failures - -**Solutions:** -1. **Audio Format:** Verify audio encoding matches parameters -2. **Audio Quality:** Ensure audio contains actual speech -3. **Chunk Size:** Check if audio chunks are appropriate size -4. **Parameters:** Validate all connection parameters - - - -**Symptoms:** -- Slow transcription responses -- Poor real-time performance - -**Solutions:** -1. **Reduce Chunk Size:** Use smaller audio chunks (0.5-1 second) -2. **Optimize Audio Processing:** Minimize client-side processing -3. **Network Optimization:** Use faster network connection -4. **Parameter Tuning:** Adjust `speechEndThreshold` for faster responses - - diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/asr-code-examples.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/asr-code-examples.mdx deleted file mode 100644 index 0e50179..0000000 --- a/fern/products/waves/versions/v4.0.0/speech-to-text/asr-code-examples.mdx +++ /dev/null @@ -1,486 +0,0 @@ ---- -title: "ASR Code Examples" -description: "Working code examples for implementing the ASR WebSocket API" -icon: "compact-disc" ---- - -# Non-Streaming Transcription using Python SDK - -Use the official Python SDK to transcribe complete files without streaming. - - - -```python Sync -import os -from smallestai.waves import WavesClient - -api_key = os.getenv("SMALLEST_API_KEY") -client = WavesClient(api_key) - -result = client.transcribe( - file_path="path/to/audio.wav", - language="en", - word_timestamps=False, - age_detection=False, - gender_detection=False, - emotion_detection=False -) -print(result) -``` - -```python Async -import os -import asyncio -from smallestai.waves import AsyncWavesClient - -api_key = os.getenv("SMALLEST_API_KEY") -client = AsyncWavesClient(api_key) - -result = await client.transcribe( - file_path="path/to/audio.wav", - language="en", - word_timestamps=False, - emotion_detection=False, - gender_detection=False, - age_detection=False, - model="lightning" -) - -print(result) -``` - - - - -# ASR Streaming - WebSocket Code Examples - -This guide provides complete working examples for implementing the Waves ASR WebSocket API across different platforms and use cases. - -## Server-Side Implementations - - - -```python python -import asyncio -import websockets -import json - -class FileStreamer: - def __init__(self, api_key, audio_file): - self.api_key = api_key - self.audio_file = audio_file - self.ws = None - self.connected = False - self.transcription_text = [] - - self.audio_data = open(self.audio_file, 'rb').read() - - async def connect(self): - base_url = "wss://waves-api.smallest.ai/api/v1/asr" - - self.params = { - "audioLanguage": "en", - "audioEncoding": "linear16", - "audioSampleRate": "16000", - "audioChannels": "1", - "addPunctuation": "true", - "api_key": self.api_key - } - - query_string = "&".join([f"{key}={value}" for key, value in self.params.items()]) - url = f"{base_url}?{query_string}" - - print("🔌 Connecting...") - - self.ws = await websockets.connect(url) - print("✅ Connected") - self.connected = True - self.listener_task = asyncio.create_task(self._listen_for_messages()) - - async def _listen_for_messages(self): - try: - async for message in self.ws: - response = json.loads(message) - if "error" in response: - print(f"❌ Error: {response}") - elif "text" in response: - end_marker = " [END]" if response.get("isEndOfTurn", False) else "" - print(f"📝 {response['text']}{end_marker}") - self.transcription_text.append(response["text"]) - except websockets.exceptions.ConnectionClosed: - print("🔌 Connection closed") - self.connected = False - - async def stream_file(self): - CHUNK_SIZE_S = 0.3 # in seconds - - chunk_size = int(16000 * 2 * CHUNK_SIZE_S) # change sample rate here - - while len(self.audio_data): - chunk, self.audio_data = self.audio_data[:chunk_size], self.audio_data[chunk_size:] - await self.ws.send(chunk) - await asyncio.sleep(CHUNK_SIZE_MS) - - print("✅ Streaming complete") - await self.ws.send(b'') - - async def get_transcription(self): - return " ".join(self.transcription_text) - - async def close(self): - if self.ws: - await self.ws.close() - -async def main(): - api_key = "SMALLEST_API_KEY" - audio_file = "audio_path.wav" - - streamer = FileStreamer(api_key, audio_file) - - try: - await streamer.connect() - await streamer.stream_file() - if streamer.listener_task: - await streamer.listener_task - - full_text = await streamer.get_transcription() - print(f"\n🎯 Complete Transcription:\n{full_text}") - - except Exception as error: - print(f"❌ Error: {error}") - finally: - await streamer.close() - -if __name__ == "__main__": - asyncio.run(main()) -``` - -```javascript node.js -const WebSocket = require('ws'); -const fs = require('fs'); -const url = require('url'); - -class FileStreamer { - constructor(apiKey, audioFile) { - this.apiKey = apiKey; - this.audioFile = audioFile; - this.ws = null; - this.connected = false; - this.transcriptionText = []; - this.audioData = fs.readFileSync(this.audioFile); - } - - async connect() { - const baseUrl = "wss://waves-api.smallest.ai/api/v1/asr"; - const params = { - audioLanguage: "en", - audioEncoding: "linear16", - audioSampleRate: "16000", - audioChannels: "1", - addPunctuation: "true", - api_key: this.apiKey - }; - - const query = new url.URLSearchParams(params).toString(); - const fullUrl = `${baseUrl}?${query}`; - - console.log("🔌 Connecting..."); - - this.ws = new WebSocket(fullUrl); - - return new Promise((resolve, reject) => { - this.ws.onopen = () => { - console.log("✅ Connected"); - this.connected = true; - this.ws.onmessage = (event) => this._handleMessage(event); - this.ws.onclose = () => { - console.log("🔌 Connection closed"); - this.connected = false; - }; - this.ws.onerror = (error) => { - console.log("❌ WebSocket error:", error.message); - reject(error); - }; - resolve(); - }; - }); - } - - _handleMessage(event) { - try { - const response = JSON.parse(event.data); - if (response.error) { - console.log(`❌ Error: ${JSON.stringify(response)}`); - } else if (response.text) { - const endMarker = response.isEndOfTurn ? " [END]" : ""; - console.log(`📝 ${response.text}${endMarker}`); - this.transcriptionText.push(response.text); - } - } catch (error) { - console.error("❌ Failed to parse message:", error); - } - } - - async streamFile() { - const CHUNK_SIZE_MS = 0.3; - const sampleRate = parseInt(this.params.audioSampleRate, 10); - const chunkSizeBytes = Math.floor(sampleRate * 2 * CHUNK_SIZE_MS); - - let offset = 0; - while (offset < this.audioData.length) { - const end = Math.min(offset + chunkSizeBytes, this.audioData.length); - const chunk = this.audioData.slice(offset, end); - this.ws.send(chunk); - offset = end; - await new Promise(resolve => setTimeout(resolve, CHUNK_SIZE_MS * 1000)); - } - - console.log("✅ Streaming complete"); - this.ws.send(''); - } - - async getTranscription() { - return this.transcriptionText.join(" "); - } - - close() { - if (this.ws) { - this.ws.close(); - } - } -} - -async function main() { - const apiKey = "SMALLEST_API_KEY"; - const audioFile = "audio_path.wav"; - - const streamer = new FileStreamer(apiKey, audioFile); - - try { - await streamer.connect(); - await streamer.streamFile(); - - // Wait for the transcription to complete - await new Promise(resolve => { - streamer.ws.onclose = () => { - resolve(); - }; - }); - - const fullText = await streamer.getTranscription(); - console.log(`\n🎯 Complete Transcription:\n${fullText}`); - } catch (error) { - console.error(`❌ Error: ${error.message}`); - } finally { - streamer.close(); - } -} - -main(); -``` - - -## JavaScript (Browser with Microphone) - -Complete example for browser-based real-time transcription: - - - -```javascript Browser ASR Implementation -// Browser-based ASR with microphone input -let ws; -let audioContext; -let processor; -let source; -let stream; - -async function startASR() { - const apiKey = 'your-api-key'; // Replace with your API key - const baseUrl = 'wss://waves-api.smallest.ai/api/v1/asr'; - - // Configure parameters - const params = new URLSearchParams({ - api_key: apiKey, - audioEncoding: 'linear16', - audioSampleRate: '16000', - audioChannels: '1', - addPunctuation: 'true', - speechEndpointing: '300' - }); - - const url = `${baseUrl}?${params}`; - ws = new WebSocket(url); - - ws.onopen = async () => { - console.log('✅ Connected to ASR service'); - await setupMicrophone(); - }; - - ws.onmessage = (event) => { - try { - const response = JSON.parse(event.data); - handleTranscription(response); - } catch (err) { - console.error('❌ Parse error:', err); - } - }; - - ws.onerror = (error) => { - console.error('❌ WebSocket error:', error); - }; - - ws.onclose = (event) => { - console.log(`🔌 Connection closed: ${event.code} - ${event.reason}`); - stopASR(); - }; -} - -async function setupMicrophone() { - try { - // Request microphone access - stream = await navigator.mediaDevices.getUserMedia({ - audio: { - sampleRate: 16000, - channelCount: 1, - echoCancellation: true, - noiseSuppression: true - } - }); - - // Create audio context - audioContext = new AudioContext({ sampleRate: 16000 }); - source = audioContext.createMediaStreamSource(stream); - - // Create audio processor - processor = audioContext.createScriptProcessor(4096, 1, 1); - - processor.onaudioprocess = (e) => { - if (ws.readyState === WebSocket.OPEN) { - const inputData = e.inputBuffer.getChannelData(0); - - // Convert to 16-bit PCM - const int16Data = new Int16Array(inputData.length); - for (let i = 0; i < inputData.length; i++) { - int16Data[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768)); - } - - // Send audio data - ws.send(int16Data.buffer); - } - }; - - // Connect audio nodes - source.connect(processor); - processor.connect(audioContext.destination); - - console.log('🎤 Recording started. Speak now...'); - - } catch (err) { - console.error('❌ Microphone error:', err); - alert('Microphone access required for ASR functionality'); - } -} - -function handleTranscription(response) { - console.log('📝 Response:', response); - - if (response.error) { - console.error('❌ API Error:', response); - return; - } - - if (response.text) { - const endOfTurn = response.isEndOfTurn ? ' [END_OF_TURN]' : ''; - console.log(`📝 ${response.text}${endOfTurn}`); - updateTranscriptionDisplay(response.text); - } -} - -function updateTranscriptionDisplay(text) { - const container = document.getElementById('transcription'); - if (!container) return; - - const finalDiv = document.createElement('div'); - finalDiv.className = 'final-transcription'; - finalDiv.textContent = text; - container.appendChild(finalDiv); -} - -function stopASR() { - if (processor) { - processor.disconnect(); - processor = null; - } - if (source) { - source.disconnect(); - source = null; - } - if (audioContext) { - audioContext.close(); - audioContext = null; - } - if (stream) { - stream.getTracks().forEach(track => track.stop()); - stream = null; - } - if (ws) { - ws.close(); - ws = null; - } - console.log('⏹️ ASR stopped'); -} - -// Usage -document.addEventListener('DOMContentLoaded', () => { - const startBtn = document.getElementById('start-asr'); - const stopBtn = document.getElementById('stop-asr'); - - startBtn?.addEventListener('click', startASR); - stopBtn?.addEventListener('click', stopASR); -}); -``` - -```html HTML Setup - - - - Waves ASR Demo - - - -

Waves ASR Demo

- - -
- - - - -``` - -
diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/asr-websocket-overview.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/asr-websocket-overview.mdx deleted file mode 100644 index 3c82f5d..0000000 --- a/fern/products/waves/versions/v4.0.0/speech-to-text/asr-websocket-overview.mdx +++ /dev/null @@ -1,105 +0,0 @@ ---- -title: "ASR WebSocket API Overview" -description: "Real-time speech-to-text transcription using WebSocket connections" -icon: "network-wired" ---- - -# Waves ASR WebSocket API - -The ASR (Automatic Speech Recognition) WebSocket API provides real-time speech-to-text transcription capabilities. This API accepts audio streams and returns transcribed text with support for multiple languages and configurable parameters. - -## Key Features - -- **Real-time Transcription**: Stream audio and receive instant transcription results -- **Multi-language Support**: English and Hindi with mixed language capabilities -- **Multiple Audio Formats**: Support for linear16, FLAC, μ-law, and Opus encoding -- **Configurable Parameters**: Customize sample rates, punctuation and more -- **Voice Activity Detection**: Optional voice activity events for enhanced control -- **Sensitive Data Redaction**: Built-in PCI, SSN, and number redaction capabilities - -## Endpoint - -**Production URL**: `wss://waves-api.smallest.ai/api/v1/asr` - -## Authentication - -For authentication details, see the [Authentication Guide](../api-references/authentication). - -## Subscription Requirements - - -ASR functionality is exclusively available to **Enterprise Monthly** or **Enterprise Yearly** subscribers. - - -## Quick Start - -1. **Obtain API Key**: Get your API key from the Waves platform -2. **Connect**: Establish WebSocket connection with authentication -3. **Configure**: Set audio parameters via query strings -4. **Stream**: Send audio data as binary messages -5. **Receive**: Get real-time transcription results - -## Supported Languages - -| Language | Code | Notes | -|----------|------|-------| -| English | `en` | High accuracy | -| Hindi | `hi` | Supports mixed English-Hindi | -| Spanish | `es` | - | -| French | `fr` | - | -| German | `de` | - | -| Russian | `ru` | - | -| Portuguese | `pt` | - | -| Japanese | `ja` | - | -| Italian | `it` | - | -| Dutch | `nl` | - | -| Chinese Mandarin | `zh` | Available on request | -| Chinese Cantonese | `zh-hk` | Available on request | -| Turkish | `tr` | Available on request | -| Vietnamese | `vi` | Available on request | -| Thai | `th` | Available on request | -| Indonesian | `id` | Available on request | -| Ukrainian | `uk` | Available on request | -| Tamil | `ta` | Available on request | -| Marathi | `mr` | Available on request | -| Telugu | `te` | Available on request | -| Polish | `pl` | Available on request | -| Greek | `el` | Available on request | -| Hungarian | `hu` | Available on request | -| Romanian | `ro` | Available on request | -| Czech | `cs` | Available on request | -| Swedish | `sv` | Available on request | -| Bulgarian | `bg` | Available on request | -| Danish | `da` | Available on request | -| Finnish | `fi` | Available on request | - - -## Audio Format Support - -| Format | Description | Use Case | -|----------|-------------|----------| -| linear16 | 16-bit linear PCM | High quality, recommended | -| flac | FLAC compressed | Compressed audio files | -| mulaw | μ-law encoded | Telephony applications | -| opus | Opus compressed | Browser-native formats | - -## Response Types - -The API provides three types of responses: -- **Final Results**: Complete transcriptions for speech segments -- **End of Turn**: Indicates completion of a speech turn - -## Error Handling - -The API provides detailed error messages for: -- Invalid parameters -- Authentication failures -- Audio format mismatches -- Connection timeouts -- Subscription issues - -## Pricing - -- **Default Rate**: $0.025 per minute -- **Billing**: Per second of audio processed -- **Custom Rates**: Available for Enterprise plans diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/evaluation-walkthrough.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/evaluation-walkthrough.mdx new file mode 100644 index 0000000..9c6242f --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/evaluation-walkthrough.mdx @@ -0,0 +1,140 @@ +--- +title: 'Evaluation Walkthrough' +description: 'Step-by-step guide to evaluate Pulse STT accuracy and performance' +--- + +Our evaluation guide outlines a repeatable process: choose representative audio, generate transcripts, compute WER/CER/latency, and document findings. Use the streamlined steps below (with ready-to-run snippets) to mirror that workflow. + +## 1. Assemble your dataset matrix + +- Collect 50–200 files per use case (support calls, meetings, media, etc.). +- Produce verified transcripts plus optional speaker labels and timestamps. +- Track metadata for accent, language, and audio quality so you can pivot metrics later. + +```python +dataset = [ + {"audio": "samples/en_agent01.wav", "reference": "Thank you for calling.", "language": "en"}, + {"audio": "samples/es_call02.wav", "reference": "Hola, ¿en qué puedo ayudarte?", "language": "es"}, +] +``` + +## 2. Install the evaluation toolkit + +```bash +pip install smallestai jiwer whisper-normalizer pandas +``` + +- `smallestai` → Pulse STT client +- `jiwer` → WER/CER computation +- `whisper-normalizer` → normalization that matches the official guidance + +## 3. Transcribe + normalize + +```python +import os +from jiwer import wer, cer +from whisper_normalizer.english import EnglishTextNormalizer +from smallestai.waves import WavesClient + +client = WavesClient(api_key=os.environ["SMALLEST_AI_API_KEY"]) +normalizer = EnglishTextNormalizer() + +def run_sample(sample): + response = client.transcribe( + audio_file=sample["audio"], + language=sample["language"], + word_timestamps=True, + diarize=True + ) + ref = normalizer(sample["reference"]) + hyp = normalizer(response.transcription) + return { + "path": sample["audio"], + "wer": wer(ref, hyp), + "cer": cer(ref, hyp), + "latency_ms": response.metrics["latency_ms"], + "rtf": response.metrics["real_time_factor"], + "transcription": response.transcription + } +``` + +## 4. Batch evaluation + aggregation + +```python +import pandas as pd + +results = [run_sample(s) for s in dataset] +df = pd.DataFrame(results) + +summary = { + "samples": len(df), + "avg_wer": df.wer.mean(), + "p95_wer": df.wer.quantile(0.95), + "avg_latency_ms": df.latency_ms.mean(), + "avg_rtf": df.rtf.mean() +} +``` + +### Recommended metrics to report + +- **WER / CER** per use case and language. +- **Time to first result** and **RTF** from `response.metrics`. +- **Diarization coverage**: % of `utterances` entries with `speaker`. + +## 5. Error analysis + +```python +def breakdown(df): + worst = df.sort_values("wer", ascending=False).head(5)[["path", "wer", "transcription"]] + return worst.to_dict(orient="records") + +outliers = breakdown(df) +``` + +- Classify errors into substitutions, deletions, insertions. +- Highlight audio traits (noise, accent) that correlate with higher WER. + +## 6. Compare configurations + +```python +configs = [ + {"language": "en", "word_timestamps": True}, + {"language": "multi", "word_timestamps": True, "diarize": True} +] + +def evaluate_config(config): + return [run_sample({**s, **config}) for s in dataset] + +for config in configs: + cfg_results = pd.DataFrame(evaluate_config(config)) + print(config, cfg_results.wer.mean()) +``` + +Use this to decide whether to enable diarization, sentence-level timestamps, or enrichment features; the official evaluation doc recommends capturing cost/latency impact alongside accuracy. + +## 7. Publish the report + +Include: + +1. Dataset description + rationale +2. Metrics table (WER/CER/TTFR/RTF, p50/p90/p95) +3. Error taxonomy with audio snippets +4. Configuration recommendation (e.g., `language=multi`, `word_timestamps=true`, `diarize=true`) +5. Follow-up experiments or model versions to track + +### Example JSON summary + +```json +{ + "dataset": "contact-center-q1", + "samples": 120, + "average_wer": 0.064, + "average_cer": 0.028, + "average_latency_ms": 61.3, + "average_rtf": 0.41, + "p95_latency_ms": 88.2, + "timestamp": "2025-01-15T10:00:00Z" +} +``` + +This completes the process of self metric evaluation. With these steps, you can identify strengths and weaknesses in any STT model. \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/metrics-overview.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/metrics-overview.mdx new file mode 100644 index 0000000..7eb5247 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/metrics-overview.mdx @@ -0,0 +1,92 @@ +--- +title: 'Metrics Overview' +description: 'Key Pulse STT metrics for quality and latency.' +--- + +Pulse STT evaluations revolve around four pillars: + +1. **Accuracy** – how close transcripts are to the ground truth. +2. **Latency & throughput** – how quickly and efficiently results arrive. +3. **Enrichment quality** – how reliable diarization, timestamps, and metadata are. + +## Accuracy metrics + +### Word Error Rate (WER) +- Formula: `WER = (Substitutions + Deletions + Insertions) / Total Words`. +- Interprets overall transcript fidelity; normalize casing/punctuation before computing. + +### Character Error Rate (CER) +- Parallel to WER but at the character level; useful for languages with compact scripts or heavy compounding. + +### Sentence accuracy +- Percentage of sentences that match ground truth exactly. +- Tracks readability for QA/customer-support recaps. + +## Latency & throughput + +### Time to First Result (TTFR) +- Measures the delay between request start and first interim token. +- For real-time agents, keep TTFR below ~30 ms to maintain natural turn-taking. + +### End-to-end latency +- Wall-clock time from submission to final transcript. +- Report p50/p90/p95 to capture outliers introduced by long files or retries. + +### Real-Time Factor (RTF) +- `RTF = Processing Time / Audio Duration`. +- Values less than 1 indicate faster-than-real-time processing; Pulse STT typically runs near 0.4 RTF on clean inputs. + +## Enrichment quality + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
MetricWhat to watchWhy it matters
Diarization accuracy% of words with correct speaker_idCall-center QA, coaching, compliance
Word timestamp driftGap between predicted and reference timestampsSubtitle alignment and editing
Sentence-level timestamps% of audio covered by utterances segmentsChaptering, meeting notes
Emotion/age/gender precisionConfidence distributionRouting, analytics, compliance flags
+ +## Coverage & robustness + +- **Language detection accuracy**: share of files that land on the intended ISO 639-1 code or auto-detected language. +- **Noise robustness**: WER delta at multiple SNR levels (clean vs. +5 dB noise, etc.). +- **Accent/domain diversity**: track WER per accent or scenario (support, media, meetings) to avoid blind spots. + +## Operational metrics + +- **Requests per second / concurrent sessions**: validate you stay within quota and plan scaling needs. +- **Cost per minute**: Pulse STT bills per second at $0.025/minute list price—include enrichment toggles when modeling cost. +- **Retry volume**: differentiate infrastructure retries (HTTP 5xx) from transcription failures to spot upstream vs downstream issues. + +## Reporting checklist + +1. Describe dataset composition (language, accent, domain, duration). +2. Publish WER/CER, TTFR, and RTF with averages and percentiles. +3. Include enrichment coverage (how many segments include diarization/timestamps). +4. Summarize cost/latency impact when enabling optional features. +5. Link to reproducible scripts or notebooks for auditing. + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/performance.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/performance.mdx new file mode 100644 index 0000000..26f442a --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/benchmarks/performance.mdx @@ -0,0 +1,187 @@ +--- +title: 'Performance' +description: 'Latency, accuracy, and throughput benchmarks for Pulse STT' +--- + +## Latency Metrics + +### Time-to-First-Transcript (TTFT) + +Our Pulse STT model provides State of the art TTFT latency of ~**64ms**, which is one of the least in the world. + + + +TTFT (Time to First Transcript) measures the latency between when a user stops speaking and when the model returns the complete transcript. Lower TTFT means faster response times and better user experience in real-time applications. + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelLatency (ms)
Smallest Pulse STT64
Deepgram Nova 276
Deepgram Nova 371
Assembly AI Universal698
+ +
+ + +## Accuracy Metrics + +### Word Error Rate (WER) + +All models were evaluated on the FLEURS dataset, a standardised multilingual speech benchmark ensuring fair cross-model comparison. + + + + + + + + + + + + + + + + + + + + + + + + + + +
LanguageWER
English5.1%
Italian4.2%
Spanish5.4%
Hindi11.4%
+ +## Throughput + +### Requests Per Second + + + + + + + + + + + + + + + + + + + + + + +
Audio LengthHTTP POST
Short (< 5s)50-100
Medium (5-30s)20-50
Long (30s+)10-20
+ +*Throughput varies based on audio length, format, and server load* + +## Performance by Audio Format + +### Linear16 (PCM) + +- **Latency**: Lowest (~64ms) +- **Accuracy**: Highest +- **Bandwidth**: Highest +- **Best for**: High-quality applications + +### Opus + +- **Latency**: Low (~70-80ms) +- **Accuracy**: High +- **Bandwidth**: Low +- **Best for**: Browser/mobile applications + +### FLAC + +- **Latency**: Medium (~80-90ms) +- **Accuracy**: Highest +- **Bandwidth**: Medium +- **Best for**: Archival/quality-critical use cases + +### μ-law + +- **Latency**: Low (~65-75ms) +- **Accuracy**: Good +- **Bandwidth**: Lowest +- **Best for**: Telephony applications + +## Performance by Language + +### High-Performance Languages + +- **Italian**: 4.2% WER, ~64ms latency +- **English**: 5.1% WER, ~64ms latency +- **Spanish**: 5.4% WER, ~64ms latency +- **Portuguese**: 7.1% WER, ~64ms latency +- **German**: 8.5% WER, ~64ms latency +- **French**: 9.2% WER, ~64ms latency + +### Regional Variations + +- **Indian Languages**: 10-15% WER, ~90-100ms latency +- **Eastern European**: 9-12% WER, ~85-95ms latency + +## Feature Impact on Performance + +### Diarization + +- **Latency Impact**: +10-20ms +- **Accuracy Impact**: Minimal +- **Use When**: Multiple speakers present + +### Word Timestamps + +- **Latency Impact**: +5-10ms +- **Accuracy Impact**: None +- **Use When**: Timing information needed + +### Emotion Detection + +- **Latency Impact**: +15-25ms +- **Accuracy Impact**: None +- **Use When**: Emotion analysis required + +## Optimization Tips + +- Use 16kHz sample rate for optimal balance +- Choose linear16 format for lowest latency +- Enable only needed features to reduce latency +- Batch process when latency isn't critical + +## Next Steps + +- [Metrics Overview](/waves/documentation/speech-to-text/benchmarks/metrics-overview). +- [Evaluation Walkthrough](/waves/documentation/speech-to-text/benchmarks/evaluation-walkthrough). +- [Best Practices](/waves/documentation/speech-to-text/pre-recorded/best-practices). + + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/evaluate-asr.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/evaluate-asr.mdx deleted file mode 100644 index b99b66b..0000000 --- a/fern/products/waves/versions/v4.0.0/speech-to-text/evaluate-asr.mdx +++ /dev/null @@ -1,339 +0,0 @@ ---- -title: How to evaluate? -description: Transcribe audio files using the Smallest.ai WebSocket ASR API and evaluate with Word Error Rate (WER). -icon: "chart-simple" ---- - -# 🎙️ WER Evaluation - -This guide walks you through running automatic speech recognition (ASR) on a dataset of audio files using the Smallest.ai Lightning ASR API, normalizing the text output, and computing **Word Error Rate (WER)** against reference transcripts. - -The guide includes examples for both **WebSocket streaming** and **HTTP POST** methods (raw audio bytes and URL-based). - ---- - -## 📂 Input CSV Format - -Your dataset should be a **CSV file** with the following columns: - -### For Local Audio Files (Raw Audio method): - -| Column | Description | -| ------------ | -------------------------------------- | -| `audio_path` | Path to the audio file | -| `text` | Ground-truth reference text transcript | - -Example: - -```csv -audio_path,text -data/en_001.wav,Hello how are you doing today -data/en_002.wav,This is a test of speech recognition -``` - -### For Remote Audio Files (URL method): - -| Column | Description | -| ----------- | --------------------------------------------------- | -| `audio_url` | URL to the audio file (must be publicly accessible) | -| `text` | Ground-truth reference text transcript | - -Example: - -```csv -audio_url,text -https://example.com/audio/en_001.wav,Hello how are you doing today -https://example.com/audio/en_002.wav,This is a test of speech recognition -``` - ---- - -## Installation - -Make sure you install dependencies: - -```bash -pip install websockets jiwer whisper-normalizer -``` - ---- - -## Usage - -Save the script as `asr_eval.py` and update your API key. - - -```python streaming -import asyncio -import websockets -import json -import csv -from whisper_normalizer.english import EnglishTextNormalizer -from whisper_normalizer.basic import BasicTextNormalizer -from jiwer import wer - -english_normalizer = EnglishTextNormalizer() -other_language_normalizer = BasicTextNormalizer() - -async def transcribe_audio(api_key, audio_file): -with open(audio_file, 'rb') as f: -audio_data = f.read() - - params = { - "audioLanguage": "en", # Change to your language - "audioEncoding": "linear16", # 16-bit PCM - "audioSampleRate": "16000", # sample rate of the audio file - "audioChannels": "1", - "addPunctuation": "true", - "api_key": api_key - } - query_string = "&".join([f"{k}={v}" for k, v in params.items()]) - url = f"wss://waves-api.smallest.ai/api/v1/asr?{query_string}" - - transcription = [] - - async with websockets.connect(url) as ws: - async def listen(): - async for message in ws: - response = json.loads(message) - if "text" in response: - transcription.append(response["text"]) - - listen_task = asyncio.create_task(listen()) - - chunk_size = int(16000 * 2 * 0.3) # 16kHz × 2 bytes × 0.3s - while audio_data: - chunk, audio_data = audio_data[:chunk_size], audio_data[chunk_size:] - await ws.send(chunk) - await asyncio.sleep(0.3) - - await ws.send(b'') # End of stream - await asyncio.sleep(2) - listen_task.cancel() - - return " ".join(transcription) - -def calculate_wer(reference, hypothesis, language="en"): -if language == "en": -ref_normalized = english_normalizer(reference) -hyp_normalized = english_normalizer(hypothesis) -else: -ref_normalized = other_language_normalizer(reference) -hyp_normalized = other_language_normalizer(hypothesis) -return wer(ref_normalized, hyp_normalized) - -async def main(): -api_key = "your_api_key_here" -input_csv = "fleurs_dataset.csv" # input CSV -output_csv = "transcription_results_streaming.csv" - - results = [] - - with open(input_csv, 'r') as f: - reader = csv.DictReader(f) - for row in reader: - audio_file = row['audio_path'] - reference_text = row.get('text', '') - - transcript = await transcribe_audio(api_key, audio_file) - row['transcript'] = transcript - - row['wer'] = calculate_wer(reference_text, transcript) - results.append(row) - - if results: - with open(output_csv, 'w', newline='') as f: - fieldnames = list(results[0].keys()) - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - writer.writerows(results) - - wer_scores = [row['wer'] for row in results] - if wer_scores: - avg_wer = sum(wer_scores) / len(wer_scores) - print(f"Average WER: {avg_wer:.3f}") - -if **name** == "**main**": -asyncio.run(main()) - -```` - -```python one-shot (Raw Audio) -import csv -import requests -from whisper_normalizer.english import EnglishTextNormalizer -from whisper_normalizer.basic import BasicTextNormalizer -from jiwer import wer - -english_normalizer = EnglishTextNormalizer() -other_language_normalizer = BasicTextNormalizer() - -def transcribe_audio(api_key, audio_file): - url = "https://waves-api.smallest.ai/api/v1/lightning/get_text" - headers = { - "Authorization": f"Bearer {api_key}", - "Content-Type": "audio/wav" - } - params = { - "model": "lightning", - "language": "en", - "word_timestamps": "false" - } - - with open(audio_file, "rb") as f: - audio_data = f.read() - - response = requests.post(url, headers=headers, params=params, data=audio_data) - result = response.json() - return result['transcription'] - -def calculate_wer(reference, hypothesis, language="en"): - if language == "en": - ref_normalized = english_normalizer(reference) - hyp_normalized = english_normalizer(hypothesis) - else: - ref_normalized = other_language_normalizer(reference) - hyp_normalized = other_language_normalizer(hypothesis) - return wer(ref_normalized, hyp_normalized) - -def main(): - api_key = "your_api_key_here" - input_csv = "fleurs_dataset.csv" # input CSV - output_csv = "transcription_results_one_shot.csv" - - results = [] - - with open(input_csv, 'r') as f: - reader = csv.DictReader(f) - for row in reader: - audio_file = row['audio_path'] - reference_text = row.get('text', '') - - transcript = transcribe_audio(api_key, audio_file) - row['transcript'] = transcript - - row['wer'] = calculate_wer(reference_text, transcript) - results.append(row) - - if results: - with open(output_csv, 'w', newline='') as f: - fieldnames = list(results[0].keys()) - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - writer.writerows(results) - - wer_scores = [row['wer'] for row in results] - if wer_scores: - avg_wer = sum(wer_scores) / len(wer_scores) - print(f"Average WER: {avg_wer:.3f}") - - -if __name__ == "__main__": - main() -```` - -```python one-shot (URL) -import csv -import requests -import json -from whisper_normalizer.english import EnglishTextNormalizer -from whisper_normalizer.basic import BasicTextNormalizer -from jiwer import wer - -english_normalizer = EnglishTextNormalizer() -other_language_normalizer = BasicTextNormalizer() - -def transcribe_audio_url(api_key, audio_url): - url = "https://waves-api.smallest.ai/api/v1/lightning/get_text" - headers = { - "Authorization": f"Bearer {api_key}", - "Content-Type": "application/json" - } - params = { - "model": "lightning", - "language": "en", - "word_timestamps": "false" - } - payload = { - "url": audio_url - } - - response = requests.post(url, headers=headers, params=params, data=json.dumps(payload)) - result = response.json() - return result['transcription'] - -def calculate_wer(reference, hypothesis, language="en"): - if language == "en": - ref_normalized = english_normalizer(reference) - hyp_normalized = english_normalizer(hypothesis) - else: - ref_normalized = other_language_normalizer(reference) - hyp_normalized = other_language_normalizer(hypothesis) - return wer(ref_normalized, hyp_normalized) - -def main(): - api_key = "your_api_key_here" - input_csv = "fleurs_dataset_urls.csv" # CSV with audio URLs - output_csv = "transcription_results_url.csv" - - results = [] - - with open(input_csv, 'r') as f: - reader = csv.DictReader(f) - for row in reader: - audio_url = row['audio_url'] # Column should contain URLs - reference_text = row.get('text', '') - - transcript = transcribe_audio_url(api_key, audio_url) - row['transcript'] = transcript - - row['wer'] = calculate_wer(reference_text, transcript) - results.append(row) - - if results: - with open(output_csv, 'w', newline='') as f: - fieldnames = list(results[0].keys()) - writer = csv.DictWriter(f, fieldnames=fieldnames) - writer.writeheader() - writer.writerows(results) - - wer_scores = [row['wer'] for row in results] - if wer_scores: - avg_wer = sum(wer_scores) / len(wer_scores) - print(f"Average WER: {avg_wer:.3f}") - - -if __name__ == "__main__": - main() -``` - - - ---- - -## Output - -A new CSV (`transcription_results.csv`) will be generated with the following columns: - -| Column | Description | -| ------------ | ------------------------------------ | -| `audio_path` | Path to the audio file | -| `text` | Reference ground-truth transcript | -| `transcript` | ASR model output | -| `wer` | Word Error Rate (if `text` provided) | - -Example: - -```csv -audio_path,text,transcript,wer -data/en_001.wav,Hello how are you doing today,hello how are you doing today,0.000 -data/en_002.wav,This is a test of speech recognition,this is a test speech recognition,0.167 -``` - ---- - -## 📊 Metrics - -- **WER** = Word Error Rate = (Substitutions + Deletions + Insertions) ÷ Reference words -- Normalization is applied using `whisper-normalizer` before computing WER. -- For English, `EnglishTextNormalizer` is used, otherwise a more general one `BasicTextNormalizer`. diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/age-and-gender-detection.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/age-and-gender-detection.mdx new file mode 100644 index 0000000..b26d8b4 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/age-and-gender-detection.mdx @@ -0,0 +1,40 @@ +--- +title: 'Age & gender detection' +description: 'Predict demographic attributes alongside every transcription' +--- + +Pre-Recorded + +## Enabling age & gender detection + +Append `age_detection=true` and/or `gender_detection=true` to your Pulse STT query string. You can toggle them independently as well. + +### Sample request + +```bash +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&age_detection=true&gender_detection=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/audio.wav" +``` + + +Age and gender detection are currently supported only for Pre-Recorded API. Real-Time API support is coming soon. + + +## Output format & field of interest + +Responses include top-level `age` (`infant`, `teenager`, `adult`, `old`) and `gender` (`male`, `female`) fields that describe the dominant speaker in the processed segment. Store these fields next to each transcript to power demographic analytics or routing logic. + +### Sample response + +```json +{ + "status": "success", + "transcription": "Hello world.", + "age": "adult", + "gender": "male" +} +``` + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/diarization.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/diarization.mdx new file mode 100644 index 0000000..b2a30f5 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/diarization.mdx @@ -0,0 +1,164 @@ +--- +title: 'Speaker diarization' +description: 'Label each word and utterance with turn-by-turn speaker IDs' +--- + +Pre-Recorded Real-Time + +## Enabling speaker diarization + +### Pre-Recorded API + +Pass `diarize=true` when calling the Pulse STT POST endpoint. The parameter can be combined with other enrichment options (timestamps, emotions, etc.) without changing your audio payload. + +```bash +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&diarize=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/audio.wav" +``` + +### Real-Time WebSocket API + +Add `diarize=true` to your WebSocket connection query parameters when connecting to the Pulse STT WebSocket API. + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("diarize", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Output format & field of interest + +When enabled, every entry in `words` includes a `speaker` field (integer ID: `0`, `1`, …) and `speaker_confidence` field (0.0 to 1.0) for real-time API, or string labels (`speaker_0`, `speaker_1`, …) for pre-recorded API. The `utterances` array also carries `speaker` labels so you can reconstruct conversations, build turn-taking analytics, or display multi-speaker captions. + +### Pre-Recorded API + +## Sample request + + + +```bash +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&diarize=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/two-speaker.wav" +``` + +## Sample response + +### Pre-Recorded API Response + +```json +{ + "transcription": "Agent: Hello world. Customer: Hi there.", + "words": [ + { "start": 0.0, "end": 0.4, "speaker": "speaker_0", "word": "Hello" }, + { "start": 0.4, "end": 0.8, "speaker": "speaker_0", "word": "world." }, + { "start": 1.0, "end": 1.2, "speaker": "speaker_1", "word": "Hi" }, + { "start": 1.2, "end": 1.6, "speaker": "speaker_1", "word": "there." } + ], + "utterances": [ + { "text": "Hello world.", "start": 0.0, "end": 0.8, "speaker": "speaker_0" }, + { "text": "Hi there.", "start": 1.0, "end": 1.6, "speaker": "speaker_1" } + ] +} +``` + +### Real-Time WebSocket API Response + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "Hello world. Hi there.", + "is_final": true, + "is_last": false, + "language": "en", + "words": [ + { + "word": "Hello", + "start": 0.0, + "end": 0.4, + "confidence": 0.98, + "speaker": 0, + "speaker_confidence": 0.95 + }, + { + "word": "world.", + "start": 0.4, + "end": 0.8, + "confidence": 0.97, + "speaker": 0, + "speaker_confidence": 0.92 + }, + { + "word": "Hi", + "start": 1.0, + "end": 1.2, + "confidence": 0.99, + "speaker": 1, + "speaker_confidence": 0.88 + }, + { + "word": "there.", + "start": 1.2, + "end": 1.6, + "confidence": 0.96, + "speaker": 1, + "speaker_confidence": 0.91 + } + ], + "utterances": [ + { + "text": "Hello world.", + "start": 0.0, + "end": 0.8, + "speaker": 0 + }, + { + "text": "Hi there.", + "start": 1.0, + "end": 1.6, + "speaker": 1 + } + ] +} +``` + +## Response Fields + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeWhen IncludedDescription
`speaker`integer (realtime) / string (pre-recorded)`diarize=true`Speaker label. Real-time API uses integer IDs (0, 1, ...), pre-recorded API uses string labels (speaker_0, speaker_1, ...)
`speaker_confidence`number`diarize=true` (realtime only)Confidence score for the speaker assignment (0.0 to 1.0)
+ diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/emotion-detection.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/emotion-detection.mdx new file mode 100644 index 0000000..a6ebe0c --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/emotion-detection.mdx @@ -0,0 +1,44 @@ +--- +title: 'Emotion detection' +description: 'Capture per-emotion confidence scores from Pulse STT responses' +--- + +Pre-Recorded + +## Enabling emotion detection + +Include `emotion_detection=true` in your Pulse STT query parameters. + +### Sample request + +```bash +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&emotion_detection=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/audio.wav" +``` + + +Emotion detection is currently supported only for Pre-Recorded API. Real-Time API support is coming soon. + + +## Output format & field of interest + +The response adds an `emotions` object containing floating-point scores (0–1) for happiness, sadness, disgust, fear, and anger. Use these fields to monitor sentiment, trigger QA alerts, or enrich customer analytics. + +### Sample response + +```json +{ + "transcription": "Hello world.", + "emotions": { + "happiness": 0.80, + "sadness": 0.15, + "disgust": 0.02, + "fear": 0.03, + "anger": 0.05 + } +} +``` + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/full-transcript.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/full-transcript.mdx new file mode 100644 index 0000000..9378a0c --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/full-transcript.mdx @@ -0,0 +1,80 @@ +--- +title: 'Full Transcript' +description: 'Get cumulative transcript from real-time WebSocket API' +--- + +Real-Time + +The Full Transcript feature provides a cumulative transcript that accumulates all transcription text received up to the current point in the session. This is useful for maintaining a complete transcript of the entire conversation or audio stream. + +## Enabling Full Transcript + +Add `full_transcript=true` to your WebSocket connection query parameters when connecting to the Pulse STT WebSocket API. The default is `false`. + +### Real-Time WebSocket API + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("full_transcript", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Output Format & Field of Interest + +The `full_transcript` field contains the complete transcription text accumulated from the beginning of the session. This field is only included in responses where `full_transcript=true` query parameter is set AND `is_final=true`, ensuring you receive the complete transcript only when a segment is finalized. + +## Sample Response + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "How are you doing today?", + "is_final": true, + "is_last": false, + "full_transcript": "Hello, my name is John. How are you doing today?", + "language": "en", + "languages": ["en"] +} +``` + +## Response Fields + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeWhen IncludedDescription
`full_transcript`string`full_transcript=true` AND `is_final=true`Complete transcription text accumulated from the start of the session
`transcript`stringAlwaysPartial or complete transcription text for the current segment
`is_final`booleanAlwaysIndicates if this is the final transcription for the current segment
+ diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/language-detection.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/language-detection.mdx new file mode 100644 index 0000000..b247bd3 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/language-detection.mdx @@ -0,0 +1,79 @@ +--- +title: 'Language detection' +description: 'Automatically detect and transcribe 30+ languages with Pulse STT' +--- + +Pre-Recorded Real-Time + +## Enabling language detection + +Set the `language` query parameter to `multi` when calling the API. It will auto-detect the spoken language across 30+ ISO 639-1 language codes. + + +View the full list of [supported languages](../overview#supported-languages). + + +### Pre-Recorded API + +```bash +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=multi&word_timestamps=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/audio.wav" +``` + +### Real-Time WebSocket API + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "multi"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Output format & field of interest + +When language detection is enabled, the `transcription` (or `transcript` for realtime), `words`, and `utterances` arrays are emitted in the detected language. The response includes a `language` field with the detected primary language code, and a `languages` array (in realtime responses where `is_final=true`) listing all detected languages. Persist the detected locale in your app by storing the `language` parameter you supplied (for auditing) and by inspecting downstream metadata such as subtitles or captions that inherit the localized transcript. + +## Sample response + +### Pre-Recorded API Response + +```json +{ + "status": "success", + "transcription": "Hola mundo.", + "words": [ + { "start": 0.0, "end": 0.4, "word": "Hola" }, + { "start": 0.5, "end": 0.9, "word": "mundo." } + ], + "utterances": [ + { "text": "Hola mundo.", "start": 0.0, "end": 0.9 } + ] +} +``` + +### Real-Time WebSocket API Response + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "Hola mundo.", + "is_final": true, + "is_last": false, + "language": "es", + "languages": ["es"] +} +``` + + +The `language` field is only returned when `is_final=true` in real-time API responses. The `languages` array lists all languages detected in the audio and is also only included when `is_final=true`. + + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/numeric-formatting.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/numeric-formatting.mdx new file mode 100644 index 0000000..0f49145 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/numeric-formatting.mdx @@ -0,0 +1,87 @@ +--- +title: 'Numeric Formatting' +description: 'Control how numbers are formatted in transcriptions' +--- + +Real-Time + +Numeric Formatting allows you to control how numbers, dates, and numerical values are represented in transcription output. You can choose between spelled-out numbers or numeric digits based on your application's requirements. + + +## Enabling Numeric Formatting + + +Numeric Formatting is currently only available for the Real-Time WebSocket API. + +Add a `numerals` parameter in the query string set to `true`, `false`, or `auto` to control numeric formatting. The default is `auto`, which enables automatic detection based on context. + +### Real-Time WebSocket API + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("numerals", "true"); // or "false" or "auto" + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Formatting Options + +### `numerals=true` (Numeric Format) + +When enabled, numbers are transcribed as digits: + +- **"25"** instead of "twenty-five" +- **"$1,234.56"** instead of "one thousand two hundred thirty-four dollars and fifty-six cents" +- **"3:45 PM"** instead of "three forty-five P M" +- **"2024"** instead of "twenty twenty-four" +- **"1.5"** instead of "one point five" + +### `numerals=false` (Spelled-Out Format) + +When disabled, numbers are transcribed as words: + +- **"twenty-five"** instead of "25" +- **"one thousand two hundred thirty-four"** instead of "1234" +- **"three forty-five"** instead of "3:45" +- **"twenty twenty-four"** instead of "2024" + +### `numerals=auto` (Automatic Detection) + +When set to `auto` (default), the system automatically detects the appropriate format based on context. This is recommended for most use cases. + +## Sample Response + +### With `numerals=true` + +```json +{ + // other fields + "transcript": "The price is $25.99 and we have 42 items in stock. Call us at 555-1234.", + "is_final": true, + "is_last": false, + "language": "en" +} +``` + +### With `numerals=false` + +```json +{ + // other fields + "transcript": "The price is twenty-five dollars and ninety-nine cents and we have forty-two items in stock. Call us at five five five one two three four.", + "is_final": true, + "is_last": false, + "language": "en" +} +``` + + +If not specified, `numerals=auto` (automatic detection) is used by default. + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/redaction.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/redaction.mdx new file mode 100644 index 0000000..edfb213 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/redaction.mdx @@ -0,0 +1,140 @@ +--- +title: 'PII and PCI Redaction' +description: 'Automatically redact sensitive information from transcriptions' +--- + +Real-Time + +Redaction allows you to identify and mask sensitive information from transcriptions to protect privacy and comply with data protection regulations. The Pulse STT API supports two types of redaction: PII (Personally Identifiable Information) and PCI (Payment Card Information). + +## Enabling Redaction + +Add `redact_pii` and/or `redact_pci` parameters to your WebSocket connection query parameters. Both parameters default to `false`. Options: `true`, `false`. + +### Real-Time WebSocket API + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("redact_pii", "true"); +url.searchParams.append("redact_pci", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Redaction Types + +### PII Redaction (`redact_pii`) + +When `redact_pii=true` is enabled, the following types of personally identifiable information are automatically identified and redacted: + +- **Names**: First names and surnames +- **Addresses**: Street addresses and locations +- **Phone numbers**: Various phone number formats + +Redacted PII items are replaced with placeholder tokens like `[FIRSTNAME_1]`, `[FIRSTNAME_2]`, `[PHONENUMBER_1]`, etc. + +### PCI Redaction (`redact_pci`) + +When `redact_pci=true` is enabled, the following types of payment card information are automatically identified and redacted: + +- **Credit card numbers**: 16-digit credit/debit card numbers +- **CVV codes**: Card verification values +- **ZIP codes**: Postal/ZIP codes +- **Account numbers**: Bank account numbers + +Redacted PCI items are replaced with placeholder tokens like `[CREDITCARDCVV_1]`, `[ZIPCODE_1]`, `[ACCOUNTNUMBER_1]`, etc. + +## Output Format + +When redaction is enabled, the transcription text contains placeholder tokens instead of the original sensitive information. The response also includes a `redacted_entities` array listing all the redacted entity placeholders. + +### Sample Response with Redaction + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "[CREDITCARDCVV_1] and expiry [TIME_2] slash 34.", + "is_final": true, + "is_last": true, + "full_transcript": "Hi, my name is [FIRSTNAME_1] [FIRSTNAME_2] You can reach me at [PHONENUMBER_1] and I paid using my Visa card [ZIPCODE_1] [ACCOUNTNUMBER_1] with [CREDITCARDCVV_1] and expiry [TIME_1].", + "language": "en", + "languages": ["en"], + "redacted_entities": [ + "[CREDITCARDCVV_1]", + "[TIME_2]" + ] +} +``` + +## Response Fields + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeWhen IncludedDescription
`redacted_entities`array`redact_pii=true` or `redact_pci=true`List of redacted entity placeholders (e.g., `[FIRSTNAME_1]`, `[CREDITCARDCVV_1]`)
`transcript`stringAlwaysTranscription text with redacted entities replaced by placeholder tokens
`full_transcript`string`full_transcript=true` AND `is_final=true`Cumulative transcript with redacted entities (when `full_transcript=true` is enabled)
+ +## Redaction Placeholder Format + +Redacted entities are replaced with placeholder tokens following the pattern: +- `[ENTITYTYPE_N]` where `ENTITYTYPE` indicates the type of information (e.g., `FIRSTNAME`, `PHONENUMBER`, `CREDITCARDCVV`, `ZIPCODE`, `ACCOUNTNUMBER`) +- `N` is a sequential number starting from 1 to uniquely identify each instance + +Examples: +- `[FIRSTNAME_1]`, `[FIRSTNAME_2]` - First names +- `[PHONENUMBER_1]` - Phone numbers +- `[CREDITCARDCVV_1]` - Credit card CVV codes +- `[ZIPCODE_1]` - ZIP/Postal codes +- `[ACCOUNTNUMBER_1]` - Account numbers + + +For the highest level of protection and effective compliance auditing, enable both `redact_pii=true` and `redact_pci=true` flags in your request. + +Additionally, use the `redacted_entities` array in the response as an audit trail to track what data has been redacted from each transcript. + + +## Compliance and Privacy + +Redaction helps with compliance requirements for: + +- **HIPAA**: Health Insurance Portability and Accountability Act (healthcare data) +- **GDPR**: General Data Protection Regulation (EU data protection) +- **CCPA**: California Consumer Privacy Act (California data protection) +- **PCI DSS**: Payment Card Industry Data Security Standard (payment card data) +- **SOC 2**: System and Organization Controls (security and privacy) + + +Note: Redaction is a tool to help protect sensitive information, but it should be used as part of a comprehensive data protection strategy. Always consult with legal and compliance teams to ensure your implementation meets regulatory requirements. + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/utterances.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/utterances.mdx new file mode 100644 index 0000000..c938c5b --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/utterances.mdx @@ -0,0 +1,87 @@ +--- +title: 'Sentence-level timestamps' +description: 'Use the utterances array to capture longer segments with speaker labels' +--- + +Pre-Recorded Real-Time + +Sentence-level timestamps (utterances) are supported in both **Pre-Recorded** and **Real-Time** transcription APIs. The `utterances` array aggregates contiguous words into sentence-level segments, providing structured timing information for longer audio chunks. + +## Enabling sentence-level timestamps + +### Pre-Recorded API + +For the Pre-Recorded API, set `word_timestamps=true` in your query parameters. When word timestamps are enabled, the response includes both `words` and `utterances` arrays. + + +Sentence-level timestamps (utterances) are supported in both Pre-Recorded and Real-Time APIs. Use `sentence_timestamps=true` for Real-Time API. + + +```bash +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true&diarize=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/audio.wav" +``` + +### Real-Time API (WebSocket) + +For the Real-Time WebSocket API, set `sentence_timestamps=true` as a query parameter when establishing the WebSocket connection. + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("sentence_timestamps", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Output format + +Each `utterances` entry contains `text`, `start`, `end`, and optional `speaker` fields (when diarization is enabled). Use these sentence-level timestamps when you need to display readable captions, synchronize larger chunks of audio, or store structured call summaries. + +## Sample response + +### Pre-Recorded API + +```json +{ + "status": "success", + "transcription": "Hello world. How are you?", + "words": {...} + "utterances": [ + { "text": "Hello world.", "start": 0.0, "end": 0.9, "speaker": "speaker_0" }, + { "text": "How are you?", "start": 1.0, "end": 2.1, "speaker": "speaker_1" } + ] +} +``` + + +This response has the `speaker` field due to `diarize` being enabled in the query. + + +### Real-Time API (WebSocket) + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "Hello world. How are you?", + "is_final": true, + "is_last": false, + "language": "en", + "utterances": [ + { "text": "Hello world.", "start": 0.0, "end": 0.9 }, + { "text": "How are you?", "start": 1.0, "end": 2.1 } + ] +} +``` + + +When `diarize=true` is enabled, the `utterances` array also includes a `speaker` field (integer ID) for real-time API responses. For example: `{ "text": "Hello world.", "start": 0.0, "end": 0.9, "speaker": 0 }` + + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/word-boosting.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/word-boosting.mdx new file mode 100644 index 0000000..e69de29 diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/features/word-timestamps.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/features/word-timestamps.mdx new file mode 100644 index 0000000..84cff20 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/features/word-timestamps.mdx @@ -0,0 +1,166 @@ +--- +title: 'Word timestamps' +description: 'Return word-level timing metadata from Pulse STT' +--- + +Pre-Recorded Real-Time + +Word timestamps provide precise timing information for each word in the transcription, enabling you to generate captions, subtitles, and align transcripts with audio playback. Use these offsets to generate captions, subtitle tracks, or to align transcripts with downstream analytics. + +## Enabling Word Timestamps + +### Pre-Recorded API + +Add `word_timestamps=true` to your Pulse STT query parameters. This works for both raw-byte uploads (`Content-Type: audio/wav`) and JSON requests with hosted audio URLs. + +#### Sample request + +```bash +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/audio.wav" +``` + +### Real-Time WebSocket API + +Add `word_timestamps=true` to your WebSocket connection query parameters when connecting to the Pulse STT WebSocket API. + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("word_timestamps", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); +``` + +## Output format & field of interest + +Responses include a `words` array with `word`, `start`, `end`, and `confidence` fields. When diarization is enabled, the array also includes `speaker` (integer ID for realtime, string label for pre-recorded) and `speaker_confidence` (0.0 to 1.0, realtime only) fields. + +### Pre-Recorded API Response + +```json +{ + "status": "success", + "transcription": "Hello world.", + "words": [ + { "start": 0.0, "end": 0.5, "speaker": "speaker_0", "word": "Hello" }, + { "start": 0.6, "end": 0.9, "speaker": "speaker_0", "word": "world." } + ], + "utterances": [ + { "text": "Hello world.", "start": 0.0, "end": 0.9, "speaker": "speaker_0" } + ] +} +``` + +The response of Pre-Recorded API includes the utterances field, which includes sentence level timestamps. + +### Real-Time WebSocket API Response + +```json +{ + "type": "transcription", + "status": "success", + "session_id": "00000000-0000-0000-0000-000000000001", + "transcript": "Hello, how are you?", + "is_final": true, + "is_last": false, + "language": "en", + "words": [ + { + "word": "Hello", + "start": 0.0, + "end": 0.5, + "confidence": 0.98 + }, + { + "word": "how", + "start": 0.6, + "end": 0.8, + "confidence": 0.95 + }, + { + "word": "are", + "start": 0.8, + "end": 1.0, + "confidence": 0.97 + }, + { + "word": "you?", + "start": 1.0, + "end": 1.3, + "confidence": 0.99 + } + ] +} +``` + + +When `diarize=true` is enabled, the `words` array also includes `speaker` (integer ID) and `speaker_confidence` (0.0 to 1.0) fields. + + +## Response Fields + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeWhen IncludedDescription
`word`string`word_timestamps=true`The transcribed word
`start`number`word_timestamps=true`Start time in seconds
`end`number`word_timestamps=true`End time in seconds
`confidence`number`word_timestamps=true` (realtime only)Confidence score for the word (0.0 to 1.0)
`speaker`integer (realtime) / string (pre-recorded)`diarize=true`Speaker label. Real-time API uses integer IDs (0, 1, ...), pre-recorded API uses string labels (speaker_0, speaker_1, ...)
`speaker_confidence`number`diarize=true` (realtime only)Confidence score for the speaker assignment (0.0 to 1.0)
+ +## Use Cases + +- **Caption generation**: Create synchronized captions for video or live streams +- **Subtitle tracks**: Generate SRT or VTT subtitle files +- **Analytics**: Align transcripts with audio playback for detailed analysis +- **Search**: Enable time-based search within audio content diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/overview.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/overview.mdx new file mode 100644 index 0000000..68184aa --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/overview.mdx @@ -0,0 +1,282 @@ +--- +title: 'Overview' +description: 'Convert speech to text with the Pulse API - supporting real-time streaming and pre-recorded audio transcription with industry-leading latency' +icon: 'microphone' +--- + +The Waves Speech To Text (STT) stack processes audio via `https://api.smallest.ai/waves/v1/pulse/get_text` and returns low-latency transcripts with configurable languages, formats, and pricing tiers suited for enterprise deployments. + + + Get started in minutes. Learn how to get your API key and transcribe your first audio file. + + +## Transcription Modes + +We offer two transcription modes to cover a wide range of use cases. Choose the one that best fits your needs: + + + + Transcribe audio files using synchronous HTTPS POST requests. Perfect for batch processing, archived media, and offline transcription workflows. + + + Stream audio and receive transcription results as the audio is processed. Ideal for live conversations, voice assistants, and low-latency applications. + + + +## Feature highlights + +Our models specialize in processing audio to preserve information that is often lost during conventional speech to text conversion. + + + + Support for 32+ languages with automatic language detection or ISO 639-1 codes (`en`, `hi`, etc.). Use `language=multi` to enable automatic language detection across all supported languages. + + + + Get precise timing information for each word in the transcription. Enables caption generation, subtitle tracks, and time-based search within audio content. + + + + Receive sentence-level transcription segments with timing information. Perfect for displaying readable captions, synchronizing larger chunks of audio, or storing structured call summaries. + + + + Identify and separate generated text into speaker turns. Automatically label different speakers in multi-speaker audio, enabling speaker-attributed transcription. + + + + Estimate the age group and detect the gender of each speaker alongside transcription. Provides demographic insights for analytics and content analysis. + + + + Detect emotional tone in transcribed speech with strength indicators for 5 core emotion types. Analyze sentiment and emotional context in conversations. + + + + Automatically redact personally identifiable information (names, addresses, phone numbers) and payment card information (credit cards, CVV, account numbers) to protect privacy and ensure compliance. + + + + Get cumulative transcript received up to this point in responses where `is_final` is `true`. Maintain complete session transcripts for conversation logs and real-time monitoring. + + + + Streaming pipeline tuned for ~64 ms time to first transcript latency. Optimized for real-time transcription with minimal delay. + + + +## Supported languages + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
LanguageCodePre-RecordedReal-Time
ItalianitYesYes
SpanishesYesYes
EnglishenYesYes
PortugueseptYesYes
HindihiYesYes
GermandeYesYes
FrenchfrYesYes
UkrainianukYesYes
RussianruYesYes
KannadaknYesYes
MalayalammlYesYes
PolishplYesYes
MarathimrYesYes
GujaratiguYesYes
CzechcsYesYes
SlovakskYesYes
TeluguteYesYes
Oriya (Odia)orYesYes
DutchnlYesYes
BengalibnYesYes
LatvianlvYesYes
EstonianetYesYes
RomanianroYesYes
PunjabipaYesYes
FinnishfiYesYes
SwedishsvYesYes
BulgarianbgYesYes
TamiltaYesYes
HungarianhuYesYes
DanishdaYesYes
LithuanianltYesYes
MaltesemtYesYes
+ +Use `language=multi` to auto-detect across the full list or specify one of the codes above to pin the model to a single language. + +## Next steps + +- Send your first POST request in the [Pulse STT Pre-Recorded quickstart](/waves/documentation/speech-to-text/pre-recorded/quickstart). +- Start your first WebSocket connection in the [Pulse STT WebSocket quickstart](/waves/documentation/speech-to-text/realtime/quickstart). +- Review [best practices](/waves/documentation/speech-to-text/pre-recorded/best-practices) for audio preprocessing and request hygiene. +- Use the [troubleshooting guide](/waves/documentation/speech-to-text/pre-recorded/troubleshooting) when you need quick fixes. diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/audio-formats.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/audio-formats.mdx new file mode 100644 index 0000000..1f54c0b --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/audio-formats.mdx @@ -0,0 +1,137 @@ +--- +title: 'Audio Specifications' +description: 'Supported formats, codecs, and recommendations for pre-recorded audio' +--- + +## Input Methods + +Our API supports two input methods for transcribing audio: + + + + + + + + + + + + + + + + + + + + + +
MethodContent TypeUse Case
Raw Bytesapplication/octet-streamUpload audio files directly from your system
Audio URLapplication/jsonProcess audio files hosted on a remote server
+ +## Supported Formats + +The Pulse STT API supports a wide range of audio formats for pre-recorded transcription. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FormatExtensionCodecNotes
WAV.wavPCM, Linear PCMRecommended for best quality
MP3.mp3MPEG Audio Layer IIIWidely compatible
FLAC.flacFree Lossless Audio CodecLossless compression
OGG.oggVorbis, OpusOpen source format
M4A.m4aAAC, ALACApple format
WebM.webmOpus, VorbisWeb-optimized
+ +## Audio Requirements + +### Sample Rate + +- **Recommended**: 16 kHz (16,000 Hz) +- **Supported range**: All frequencies +- **Optimal**: 16 kHz mono for speech recognition + +### Channels + +Currently we support only single channel transcription. We are bringing in multi-channel support soon. + +### Limits + +- **Maximum size**: No limit on file size +- **Session timeout**: 10 minutes per Session + +It is recommended to split the file into chunks and then upload them in parallel for faster processing. + +## Format Recommendations + +### Best Quality + +Use 16 kHz mono Linear PCM (`audio/wav`) for the optimal mix of accuracy and processing speed. This configuration mirrors Waves’ recommended production setup for real-time speech workloads. + +``` +Format: WAV (Linear PCM) +Sample Rate: 16 kHz +Channels: Mono +Bit Depth: 16-bit +``` + +### Balanced (Telephony & Voice) + +Use 8 kHz μ-law encoded with 8-bit encoding for low bandwidth usage. It provides standard quality for voice-only applications like phone calls. + +``` +Format: MP3 or μ-law +Sample Rate: 8 kHz +Channels: Mono +Bitrate: 64–96 kbps +``` + +### Web-Optimized / High Fidelity + +For broadcast, captioning, or multimedia scenarios, it is recommended to capture higher sample rates (44.1–48 kHz). Due to the higher quality requirements, bandwidth and processing times would be on the higher side. + +``` +Format: WebM (Opus) or FLAC +Sample Rate: 44.1–48 kHz +Channels: Mono or Stereo (downmix before upload) +Bitrate: 96–160 kbps +``` \ No newline at end of file diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/best-practices.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/best-practices.mdx new file mode 100644 index 0000000..81f3a22 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/best-practices.mdx @@ -0,0 +1,52 @@ +--- +title: 'Best Practices' +description: 'Prepare audio inputs before submitting them to Pulse STT' +--- + +# Pre-recorded best practices + +Follow these recommendations to keep Pulse STT latencies low while preserving transcript fidelity. + +## Audio preprocessing workflow + +### Convert with FFmpeg + +```bash +# Convert to 16 kHz mono WAV (recommended ingest format) +ffmpeg -i input.mp3 -ar 16000 -ac 1 -sample_fmt s16 output.wav + +# Convert to MP3 with optimal speech settings +ffmpeg -i input.wav -ar 16000 -ac 1 -b:a 128k output.mp3 +``` + +### Python example + +```python +from pydub import AudioSegment + +audio = AudioSegment.from_file("input.mp3") +audio = audio.set_frame_rate(16000).set_channels(1) +audio.export("output.wav", format="wav") +``` + +### JavaScript example + +```javascript +import { createFFmpeg, fetchFile } from '@ffmpeg/ffmpeg'; + +const ffmpeg = createFFmpeg({ log: true }); +await ffmpeg.load(); + +ffmpeg.FS('writeFile', 'input.mp3', await fetchFile('input.mp3')); +await ffmpeg.run('-i', 'input.mp3', '-ar', '16000', '-ac', '1', 'output.wav'); +const data = ffmpeg.FS('readFile', 'output.wav'); +``` + +## Quality checklist + +1. **Use 16 kHz mono** whenever possible; downsample higher-fidelity recordings. +2. **Normalize audio levels** so peaks stay consistent across large batches. +3. **Remove silence** at the beginning and end to avoid wasted compute. +4. **Handle multiple speakers** by enabling diarization when agents and customers share a channel. +5. **Test with a sample clip** before launching full backfills to validate accuracy and metadata. + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/code-examples.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/code-examples.mdx new file mode 100644 index 0000000..56c0001 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/code-examples.mdx @@ -0,0 +1,132 @@ +--- +title: 'Code Examples' +description: 'Complete code samples for transcribing pre-recorded audio with Pulse STT' +--- + +Below is a complete Python example demonstrating audio preprocessing, transcription with age/gender detection, emotion detection, and sentence-level timestamps (utterances). + +
+ +```python +import os +from pydub import AudioSegment +from smallestai.waves import WavesClient + +client = WavesClient(api_key=os.getenv("SMALLEST_API_KEY")) + +def preprocess_audio(input_path, output_path): + """ + Preprocess audio file to optimal format for Pulse STT: + - Convert to 16 kHz mono WAV + - Normalize audio levels + - Remove leading/trailing silence + """ + audio = AudioSegment.from_file(input_path) + audio = audio.set_frame_rate(16000).set_channels(1) + audio = audio.normalize() + audio = audio.strip_silence(silence_len=100, silence_thresh=-40) + audio.export(output_path, format="wav") + print(f"Preprocessed audio saved to: {output_path}") + return output_path + +def transcribe_with_features(audio_path): + """ + Transcribe audio with age detection, emotion detection, and utterances. + """ + response = client.transcribe( + file_path=audio_path, + model="pulse", + language="en", + word_timestamps=True, + age_detection=True, + gender_detection=True, + emotion_detection=True, + diarize=True + ) + + return response + +def process_results(response): + """ + Extract and display transcription results. + """ + print("=" * 60) + print("TRANSCRIPTION RESULTS") + print("=" * 60) + + print(f"\nTranscription: {response.get('transcription', 'N/A')}") + + if 'age' in response: + print(f"\nAge: {response['age']}") + if 'gender' in response: + print(f"Gender: {response['gender']}") + + if 'emotions' in response: + print("\nEmotion Scores:") + emotions = response['emotions'] + for emotion, score in emotions.items(): + print(f" {emotion.capitalize()}: {score:.2f}") + + if 'utterances' in response: + print("\nUtterances (Sentence-level timestamps):") + for i, utterance in enumerate(response['utterances'], 1): + speaker = utterance.get('speaker', 'unknown') + start = utterance.get('start', 0) + end = utterance.get('end', 0) + text = utterance.get('text', '') + print(f"\n [{i}] Speaker: {speaker}") + print(f" Time: {start:.2f}s - {end:.2f}s") + print(f" Text: {text}") + + if 'words' in response: + print(f"\nWord-level timestamps: {len(response['words'])} words") + +if __name__ == "__main__": + input_audio = "input_audio.mp3" + preprocessed_audio = "preprocessed_audio.wav" + + try: + print("Preprocessing audio...") + preprocess_audio(input_audio, preprocessed_audio) + + print("\nTranscribing audio with age, emotion, and utterance detection...") + result = transcribe_with_features(preprocessed_audio) + + process_results(result) + + if os.path.exists(preprocessed_audio): + os.remove(preprocessed_audio) + print("\nCleaned up temporary preprocessed file.") + + except FileNotFoundError: + print(f"Error: Audio file '{input_audio}' not found.") + except Exception as e: + print(f"Error: {str(e)}") +``` + +
+ +## Prerequisites + +Install required dependencies: + +```bash +pip install smallestai pydub +``` + +## Key Features Demonstrated + +1. **Audio Preprocessing**: Converts audio to 16 kHz mono WAV, normalizes levels, and removes silence +2. **Age & Gender Detection**: Enables demographic analysis +3. **Emotion Detection**: Captures emotional tone with confidence scores +4. **Utterances**: Retrieves sentence-level timestamps with speaker labels +5. **Diarization**: Separates speakers for multi-speaker audio + +## Expected Output + +The script will output: +- Full transcription text +- Age and gender predictions +- Emotion scores (happiness, sadness, disgust, fear, anger) +- Sentence-level utterances with timestamps and speaker IDs + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/features.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/features.mdx new file mode 100644 index 0000000..7929efa --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/features.mdx @@ -0,0 +1,27 @@ +--- +title: 'Features' +description: 'Available features for Pre-Recorded Pulse STT API' +--- + +The Pre-Recorded Pulse STT API supports the following features: + +## Available Features + + + + Get precise timing information for each word in the transcription + + + Automatically detect the language of the audio + + + Identify and label different speakers in the audio + + + Predict demographic attributes alongside transcription + + + Detect emotional tone in the transcribed speech + + + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/quickstart.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/quickstart.mdx new file mode 100644 index 0000000..af068d8 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/quickstart.mdx @@ -0,0 +1,198 @@ +--- +title: 'Quickstart' +description: 'Get started with transcribing pre-recorded audio files using the Waves STT API' +--- + +This guide shows you how to convert an audio file into text using Smallest AI's Pulse STT model. + +# Pre-Recorded Audio + +> Transcribe pre-recorded audio files using synchronous HTTPS POST requests. Perfect for batch processing, archived media, and offline transcription workflows. + +The Pre-Recorded API allows you to upload audio files and receive complete transcripts in a single request. It can process an audio file uploaded as raw bytes or take a URL to retrieve one from a remote server. + +## When to Use Pre-Recorded Transcription + +- **Batch processing**: Transcribe multiple audio files at once +- **Archived media**: Process existing recordings, podcasts, or videos +- **Offline workflows**: Upload files that are already stored locally or in cloud storage +- **Complete transcripts**: When you need the full transcription before proceeding + +## Endpoint + +``` +POST https://api.smallest.ai/waves/v1/pulse/get_text +``` + +## Authentication + +Head over to the [smallest console](https://app.smallest.ai/dashboard/settings/apikeys) to generate an API key, if not done previously. Also look at [Authentication guide](/waves/documentation/getting-started/authentication) for more information about API keys and their usage. + + +Include your API key in the Authorization header: + +```http +Authorization: Bearer SMALLEST_API_KEY +``` + +## Example Request + +The API supports two input methods: **Raw Audio Bytes** and **Audio URL**. For details on both methods, see the [Audio Specifications](/waves/documentation/speech-to-text/pre-recorded/audio-formats) guide. + +### Method 1: Raw Audio Bytes + +Upload audio files directly by sending raw audio data: + + +```bash cURL +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/audio.wav" +``` + +```python Python +import os +import requests + +API_KEY = os.environ["SMALLEST_API_KEY"] +endpoint = "https://api.smallest.ai/waves/v1/pulse/get_text" +params = { + "language": "en", + "word_timestamps": "true", +} +headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "audio/wav", +} + +with open("sample.wav", "rb") as audio: + response = requests.post(endpoint, params=params, headers=headers, data=audio.read(), timeout=120) + +response.raise_for_status() +result = response.json() +print(result["transcription"]) +``` + +```javascript JavaScript +import fetch from "node-fetch"; +import fs from "fs"; + +const endpoint = "https://api.smallest.ai/waves/v1/pulse/get_text"; +const params = new URLSearchParams({ + language: "en", + word_timestamps: "true", +}); + +const audioBuffer = fs.readFileSync("sample.wav"); + +const response = await fetch(`${endpoint}?${params}`, { + method: "POST", + headers: { + Authorization: `Bearer ${process.env.SMALLEST_API_KEY}`, + "Content-Type": "audio/wav", + }, + body: audioBuffer, +}); + +if (!response.ok) throw new Error(await response.text()); +const data = await response.json(); +console.log(data.transcription); +``` + + +### Method 2: Audio URL + +Provide a URL to an audio file hosted remotely. This is useful when your audio files are stored in cloud storage (S3, Google Cloud Storage, etc.) or accessible via HTTP/HTTPS: + + +```bash cURL +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: application/json" \ + --data '{ + "url": "https://example.com/audio.mp3" + }' +``` + +```python Python +import os +import requests + +API_KEY = os.environ["SMALLEST_API_KEY"] +endpoint = "https://api.smallest.ai/waves/v1/pulse/get_text" +params = { + "language": "en", + "word_timestamps": "true", +} +headers = { + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "application/json", +} +body = { + "url": "https://example.com/audio.mp3" +} + +response = requests.post(endpoint, params=params, headers=headers, json=body, timeout=120) +response.raise_for_status() +result = response.json() +print(result["transcription"]) +``` + +```javascript JavaScript +import fetch from "node-fetch"; + +const endpoint = "https://api.smallest.ai/waves/v1/pulse/get_text"; +const params = new URLSearchParams({ + language: "en", + word_timestamps: "true", +}); + +const response = await fetch(`${endpoint}?${params}`, { + method: "POST", + headers: { + Authorization: `Bearer ${process.env.SMALLEST_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + url: "https://example.com/audio.mp3" + }), +}); + +if (!response.ok) throw new Error(await response.text()); +const data = await response.json(); +console.log(data.transcription); +``` + + +## Example Response + +A successful request returns a JSON object with the transcription: + +```json +{ + "status": "success", + "transcription": "Hello, this is a test transcription.", + "words": [ + {"start": 0.48, "end": 1.12, "word": "Hello,"}, + {"start": 1.12, "end": 1.28, "word": "this"}, + {"start": 1.28, "end": 1.44, "word": "is"}, + {"start": 1.44, "end": 2.16, "word": "a"}, + {"start": 2.16, "end": 2.96, "word": "test"}, + {"start": 2.96, "end": 3.76, "word": "transcription."} + ], + "utterances": [ + {"start": 0.48, "end": 3.76, "text": "Hello, this is a test transcription."} + ] +} +``` + +## Next Steps + +- Learn about [supported audio formats](/waves/documentation/speech-to-text/pre-recorded/audio-formats). +- Decide which enrichment options to enable in the [features guide](/waves/documentation/speech-to-text/pre-recorded/features). +- Configure asynchronous callbacks with [webhooks](/waves/documentation/speech-to-text/pre-recorded/webhooks). +- Review a full [code example](/waves/documentation/speech-to-text/pre-recorded/code-examples) here. + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/troubleshooting.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/troubleshooting.mdx new file mode 100644 index 0000000..44dc9ce --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/troubleshooting.mdx @@ -0,0 +1,46 @@ +--- +title: 'Troubleshooting' +description: 'Resolve common issues when uploading pre-recorded audio to Pulse STT' +--- + +# Troubleshooting pre-recorded uploads + +Go through this guide to learn about common bugs and issues that might occur when transcribing audio with Pulse STT. + +## Low-quality transcripts + +**Symptoms:** transcripts miss words or contain heavy noise. + +**Fixes:** +- Resample audio to at least 16 kHz and keep it mono. +- Prefer lossless codecs (WAV, FLAC) whenever you control the capture pipeline. +- Maintain 128 kbps or higher bitrate for compressed formats. +- Normalize and denoise audio before uploading (see [best practices](/waves/documentation/speech-to-text/pre-recorded/best-practices)). + +## Large file sizes + +**Symptoms:** uploads stall or take too long to complete. + +**Fixes:** +- Use compressed formats such as MP3 or OGG if WAV is oversized. +- Downsample anything above 16 kHz; speech rarely needs more. +- Split recordings longer than 10 minutes into smaller chunks. +- Keep bitrates near 128 kbps unless fidelity is critical. + +## Unsupported format errors + +**Symptoms:** API responses return format/codec errors immediately. + +**Fixes:** +- Convert the file with FFmpeg to a supported format (WAV, MP3, FLAC, OGG, WebM). +- Make sure the file extension matches the actual codec/container. +- Validate codec compatibility—Linear PCM is the safest fallback. + +## Still stuck? + +- Re-run uploads with `?word_timestamps=true` to confirm the request reaches the model. +- Capture the failing `request_id` and reach out to us: + - Email: [support@smallest.ai](mailto:support@smallest.ai) + - Discord: [Join our community](https://discord.gg/5evETqguJs) + + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/webhooks.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/webhooks.mdx new file mode 100644 index 0000000..c1b8135 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/pre-recorded/webhooks.mdx @@ -0,0 +1,75 @@ +--- +title: 'Webhooks' +description: 'Receive asynchronous Pulse STT results without polling' +--- + +# Webhooks for pre-recorded uploads + +Use webhooks to process Pulse STT jobs asynchronously—ideal for long recordings or high-volume backfills. When a transcription finishes, Waves sends a POST request to your callback with the final payload. + +## Steps + + + + Route an HTTPS URL (e.g., `https://api.example.com/webhooks/stt`) that accepts POST requests. Implement signature checks or HMAC validation inside this handler to guard against spoofed traffic. + + + Include `webhook_url` and optional `webhook_extra` query parameters when calling `POST /waves/v1/pulse/get_text`. `webhook_extra` accepts comma-separated `key:value` pairs that return verbatim in the webhook payload for correlation. + + + Make your transcription request as usual (raw bytes or audio URL). Pulse STT queues the job, streams to the model, and emits the webhook once `status=success` (or `failed`). + + + Parse the JSON payload, verify any signatures you added, and store the transcript, timestamps, and metadata in your system of record. Respond with `2xx` to acknowledge receipt; send `5xx` to trigger a retry. + + + +## Sample request + +```bash +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en&word_timestamps=true&webhook_url=https://api.example.com/webhooks/stt&webhook_extra=case_id:42,region:us-east" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@/path/to/audio.wav" +``` + +## Sample webhook payload + +```json +{ + "status": "success", + "transcription": "Hello world.", + "words": [ + { "start": 0.0, "end": 0.5, "speaker": "speaker_0", "word": "Hello" }, + { "start": 0.6, "end": 0.9, "speaker": "speaker_0", "word": "world." } + ], + "utterances": [ + { "text": "Hello world.", "start": 0.0, "end": 0.9, "speaker": "speaker_0" } + ], + "language": "en", + "age": "adult", + "gender": "male", + "emotions": { + "happiness": 0.8, + "sadness": 0.15, + "disgust": 0.02, + "fear": 0.03, + "anger": 0.05 + } +} +``` + +## Implementation tips + +- **Security**: terminate TLS, validate signatures/HMACs, and reject unsigned requests. +- **Retries**: respond with `5xx` to trigger Waves' retry logic; use idempotent handlers. Waves does `10 retries` before giving up on delivery. +- **Rate limits**: add lightweight rate limiting or queueing to absorb bursts. +- **Local development**: tunnel your local server with `ngrok http 3000` to test callbacks. + +## Testing checklist + +1. Trigger a transcription with `webhook_url` pointing to your dev endpoint. +2. Inspect the webhook payload, store the `request_id`, and ensure metadata flows through. +3. Simulate failures by returning `500` to confirm retries work as expected. + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/quickstart.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/quickstart.mdx new file mode 100644 index 0000000..20aa7b5 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/quickstart.mdx @@ -0,0 +1,71 @@ +--- +title: "Quickstart" +description: "This guide will help you get started quickly with transcribing your first audio using Python and the Smallest AI API." +icon: "rocket" +--- + +## Step 1: Sign Up & Get Your API Key + +1. Visit the [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=speech-to-text) and sign up for an account or log in if you already have an account. +2. Navigate to `API Key` tab in your account dashboard (Manage Plan). +3. Create a new API Key and copy it. +4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication. + +## Step 2: Install Dependencies + +To install the required library: + +```bash +pip install requests +``` + +## Step 3: Make Your First API Call + +Here is a basic example of how to use Python to transcribe an audio file: + + +```python Python +import os +import requests + +API_KEY = os.environ.get("SMALLEST_API_KEY") + +response = requests.post( + "https://api.smallest.ai/waves/v1/pulse/get_text", + params={"language": "en"}, + headers={ + "Authorization": f"Bearer {API_KEY}", + "Content-Type": "audio/wav", + }, + data=open("audio.wav", "rb").read(), + timeout=120 +) + +result = response.json() +print(result["transcription"]) +``` + +```bash cURL +curl --request POST \ + --url "https://api.smallest.ai/waves/v1/pulse/get_text?language=en" \ + --header "Authorization: Bearer $SMALLEST_API_KEY" \ + --header "Content-Type: audio/wav" \ + --data-binary "@audio.wav" +``` + + +Replace `YOUR_API_KEY` with the API key you obtained in Step 1. + +## Step 4: Explore More Features + +- **[Real-Time Transcription](/waves/documentation/speech-to-text/realtime/quickstart):** Stream audio via WebSocket for live transcription. +- **[Speaker Diarization](/waves/documentation/speech-to-text/features/diarization):** Identify and label different speakers in multi-speaker audio. +- **[Word Timestamps](/waves/documentation/speech-to-text/features/word-timestamps):** Get precise timing information for each word. +- **[Emotion Detection](/waves/documentation/speech-to-text/features/emotion-detection):** Analyze emotional tone in transcribed speech. + +For detailed documentation on all available features and endpoints, visit our [API Reference](/waves/documentation/api-references/pulse-stt). + +### Need Help? + +If you have any questions or need assistance, please contact our support team at [support@smallest.ai](mailto:support@smallest.ai). + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/audio-formats.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/audio-formats.mdx new file mode 100644 index 0000000..c3897ee --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/audio-formats.mdx @@ -0,0 +1,163 @@ +--- +title: 'Audio Specifications' +description: 'Supported audio encoding formats and requirements for real-time WebSocket transcription' +--- + +## Supported Encoding Formats + +The Pulse STT WebSocket API supports the following audio encoding formats for real-time streaming: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EncodingDescriptionUse Case
linear1616-bit linear PCMRecommended for best quality
linear3232-bit linear PCMHigh-fidelity audio
alawA-law encodingTelephony systems
mulawμ-law encodingTelephony systems (North America)
opusOpus compressed audioLow bandwidth, high quality
ogg_opusOgg Opus containerOgg container with Opus codec
+ +## Supported Sample Rates + +Sample rate is the number of times the audio signal is measured per second. A higher sample rate naturally implies audio of better detail and higher quality. However it increases the size of the audio file. + +The WebSocket API supports the following sample rates: + +- **8000 Hz** +- **16000 Hz** +- **22050 Hz** +- **24000 Hz** +- **44100 Hz** +- **48000 Hz** + +## Audio Requirements + +### Chunk Size + +The recommended size is `4096 bytes` per chunk. + +Sending audio in consistent 4096-byte chunks helps maintain optimal latency and processing efficiency. It minimizes the tradeoff between processing latency and network latency, finding the right fit between number of requests and the size of each request. + +### Channels + +Currently, we support only single-channel (mono) transcription. Multi-channel support is coming soon. + +### Streaming Rate + +For optimal real-time performance: +- Stream chunks at regular intervals (e.g., every 50-100ms) +- Maintain consistent chunk sizes when possible +- Avoid sending chunks too rapidly or too slowly + +## Format Recommendations + +### Best Quality (Default) + +Use 16 kHz mono Linear PCM (`linear16`) for the optimal mix of accuracy and processing speed: + +``` +Encoding: linear16 +Sample Rate: 16000 Hz +Channels: Mono +Chunk Size: 4096 bytes +``` + +### Telephony Quality + +Use 8 kHz μ-law or A-law encoding for low bandwidth usage: + +``` +Encoding: mulaw or alaw +Sample Rate: 8000 Hz +Channels: Mono +Chunk Size: 4096 bytes +``` + +### High Fidelity + +For broadcast or high-quality scenarios, use higher sample rates: + +``` +Encoding: linear16 or linear32 +Sample Rate: 44100 or 48000 Hz +Channels: Mono +Chunk Size: 4096 bytes +``` + +## Audio Preprocessing + +Before streaming audio to the WebSocket API, ensure your audio is: + +1. **Converted to the correct format**: Use the specified encoding (linear16, linear32, alaw, mulaw, opus, or ogg_opus) +2. **Set to the correct sample rate**: Match the `sample_rate` parameter in your WebSocket URL +3. **Mono channel**: Downmix stereo or multi-channel audio to mono +4. **Properly chunked**: Split audio into 4096-byte chunks for streaming + +### Example: Converting Audio for Streaming + +```python +import numpy as np +import soundfile as sf + +# Read audio file +audio, sample_rate = sf.read('input.wav') + +# Convert to mono if stereo +if len(audio.shape) > 1: + audio = np.mean(audio, axis=1) + +# Resample to 16 kHz if needed +if sample_rate != 16000: + from scipy import signal + audio = signal.resample(audio, int(len(audio) * 16000 / sample_rate)) + +# Convert to 16-bit PCM +audio_int16 = (audio * 32767).astype(np.int16) + +# Split into 4096-byte chunks +chunk_size = 4096 +chunks = [audio_int16[i:i+chunk_size//2] for i in range(0, len(audio_int16), chunk_size//2)] +``` + +## Query Parameters + +Specify encoding and sample rate in the WebSocket connection URL: + +```javascript +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +``` + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/best-practices.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/best-practices.mdx new file mode 100644 index 0000000..14a92a4 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/best-practices.mdx @@ -0,0 +1,224 @@ +--- +title: 'Best Practices' +description: 'Optimize your real-time WebSocket transcription for low latency and high accuracy' +--- + +# Real-time streaming best practices + +Follow these recommendations to keep Pulse STT latencies low while preserving transcript fidelity in real-time scenarios. + +## Chunk Size and Streaming Rate + +### Recommended Chunk Size + +- **Optimal**: 4096 bytes per chunk +- **Range**: 1024 to 8192 bytes +- **Consistency**: Maintain consistent chunk sizes when possible + +Sending audio in 4096-byte chunks provides the best balance between latency and processing efficiency. + +### Streaming Rate + +- **Interval**: Send chunks every 50-100ms +- **Avoid**: Sending chunks too rapidly (< 20ms) or too slowly (> 200ms) +- **Consistency**: Maintain regular intervals for predictable latency + +```javascript +// Good: Consistent 50ms intervals +setTimeout(sendChunk, 50); + +// Avoid: Variable or very short intervals +setTimeout(sendChunk, Math.random() * 10); // Too fast and inconsistent +``` + +## Handling Partial vs Final Transcripts + +The API sends two types of transcripts: + +### Partial Transcripts (`is_final: false`) + +- **Purpose**: Show interim results for immediate user feedback +- **Behavior**: May change as more audio is processed +- **Use case**: Display "live" transcription as the user speaks + +```javascript +if (!message.is_final) { + // Show partial transcript with visual indicator (e.g., grayed out) + displayPartialTranscript(message.transcript); +} +``` + +### Final Transcripts (`is_final: true`) + +- **Purpose**: Confirmed transcription for a segment +- **Behavior**: Stable and won't change +- **Use case**: Store in database, display as confirmed text + +```javascript +if (message.is_final) { + // Store final transcript + saveTranscript(message.full_transcript); + // Update UI with confirmed text + displayFinalTranscript(message.full_transcript); +} +``` + +## Audio Preprocessing + +### Before Streaming + +1. **Convert to correct format**: Ensure audio matches the `encoding` parameter (linear16, linear32, alaw, mulaw, opus, ogg_opus) +2. **Set sample rate**: Match the `sample_rate` parameter in your WebSocket URL +3. **Mono channel**: Downmix stereo/multi-channel to mono +4. **Normalize levels**: Prevent clipping and ensure consistent volume + +### Example Preprocessing + +```python +import numpy as np +import soundfile as sf + +def preprocess_audio(input_path, target_sample_rate=16000): + """Preprocess audio for WebSocket streaming""" + audio, sample_rate = sf.read(input_path) + + # Convert to mono + if len(audio.shape) > 1: + audio = np.mean(audio, axis=1) + + # Resample if needed + if sample_rate != target_sample_rate: + from scipy import signal + audio = signal.resample(audio, int(len(audio) * target_sample_rate / sample_rate)) + + # Normalize to prevent clipping + max_val = np.abs(audio).max() + if max_val > 0: + audio = audio / max_val * 0.95 + + # Convert to 16-bit PCM + audio_int16 = (audio * 32767).astype(np.int16) + + return audio_int16, target_sample_rate +``` + +## Error Handling and Reconnection + +### Connection Errors + +Implement robust error handling for network issues: + +```javascript +let reconnectAttempts = 0; +const maxReconnectAttempts = 5; + +function connect() { + const ws = new WebSocket(url.toString()); + + ws.onerror = (error) => { + console.error("WebSocket error:", error); + }; + + ws.onclose = (event) => { + if (event.code !== 1000 && reconnectAttempts < maxReconnectAttempts) { + reconnectAttempts++; + const delay = Math.min(1000 * Math.pow(2, reconnectAttempts), 30000); + console.log(`Reconnecting in ${delay}ms...`); + setTimeout(connect, delay); + } + }; + + ws.onopen = () => { + reconnectAttempts = 0; // Reset on successful connection + }; + + return ws; +} +``` + +### Handling Connection Drops + +- **Detect drops**: Monitor connection state and implement heartbeat/ping +- **Buffer audio**: Store audio chunks during disconnection +- **Resume streaming**: Continue from where you left off after reconnection + +## Session Management + +### Session Lifecycle + +1. **Establish connection**: Create WebSocket with proper authentication +2. **Stream audio**: Send chunks at regular intervals +3. **Handle responses**: Process partial and final transcripts +4. **End session**: Send `{"type": "finalize"}` when done +5. **Close connection**: Gracefully close the WebSocket + +### Graceful Shutdown + +To properly close a session, send the finalize token and wait for the server to respond with `is_last=true` before closing the WebSocket connection: + +```javascript +function endTranscription(ws) { + // Send end signal + ws.send(JSON.stringify({ type: "finalize" })); + + // Wait for is_last=true response before closing + ws.onmessage = (event) => { + const message = JSON.parse(event.data); + if (message.is_last === true) { + ws.close(1000, "Transcription complete"); + } + }; +} +``` + + +Do not close the WebSocket immediately after sending the finalize token. Always wait for the `is_last=true` response to ensure all audio has been processed and final transcripts are received. + + +## Latency Optimization + +### Minimize Processing Delays + +- **Preprocess offline**: Convert audio format before streaming +- **Use optimal encoding**: `linear16` at 16 kHz for best latency/quality balance +- **Consistent chunking**: Avoid variable chunk sizes that cause processing delays + +### Network Optimization + +- **Stable connection**: Use reliable network connections +- **Monitor bandwidth**: Ensure sufficient bandwidth for audio streaming +- **Reduce overhead**: Minimize unnecessary data in WebSocket messages + +## Quality Checklist + +1. **Use 16 kHz mono linear16** whenever possible for optimal latency +2. **Stream in 4096-byte chunks** at 50-100ms intervals +3. **Handle partial transcripts** for immediate user feedback +4. **Store final transcripts** for accuracy and persistence +5. **Implement reconnection logic** for production reliability +6. **Monitor session state** to detect and handle errors gracefully +7. **Test with real audio** to validate latency and accuracy + +## Performance Tips + +### For Low Latency + +- Use `linear16` encoding at 16 kHz +- Stream chunks every 50ms +- Process responses asynchronously +- Avoid blocking operations in message handlers + +### For High Accuracy + +- Use higher sample rates (44.1 kHz or 48 kHz) when latency allows +- Enable `word_timestamps` for precise timing +- Wait for `is_final=true` before committing transcripts +- Use `full_transcript` for complete session text + +### For Production + +- Implement connection pooling for multiple sessions +- Add rate limiting to prevent overwhelming the API +- Log session IDs for debugging and support +- Monitor transcription quality and latency metrics + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/code-examples.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/code-examples.mdx new file mode 100644 index 0000000..4e4ede6 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/code-examples.mdx @@ -0,0 +1,391 @@ +--- +title: 'Code Examples' +description: 'Complete code examples for real-time WebSocket transcription in Python, Node.js, and Browser JavaScript' +--- + +This guide contains complete examples demonstrating real-time audio transcription for various use cases in different programming languages. + +- [Python Example](#python-example): Shows how to use websockets to transcribe a pre-recorded file in chunks. +- [Node.js Example](#nodejs-example): Imitates real websocket usage by chunking a pre-recorded file in Node JS. +- [JavaScript Example](#browser-javascript-example): Shows a browser example using Javascript. +- [Streaming from Microphone](#streaming-from-microphone): Shows real-time transcription from microphone audio. + + +## Prerequisites + +### Python +```bash +pip install websockets +``` + +### Node.js +```bash +npm install ws +``` + + +## Python Example + +This example shows how to stream audio from a file and receive real-time transcriptions: + +
+ +```python +import asyncio +import websockets +import json +import os +import pathlib +from urllib.parse import urlencode + +BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text" +params = { + "language": "en", + "encoding": "linear16", + "sample_rate": "16000", + "word_timestamps": "true" +} +WS_URL = f"{BASE_WS_URL}?{urlencode(params)}" + +API_KEY = os.getenv("SMALLEST_API_KEY") +AUDIO_FILE = "path/to/audio.wav" + +async def stream_audio(): + headers = { + "Authorization": f"Bearer {API_KEY}" + } + + async with websockets.connect(WS_URL, additional_headers=headers) as ws: + print("Connected to STT WebSocket") + + audio_bytes = pathlib.Path(AUDIO_FILE).read_bytes() + chunk_size = 4096 + offset = 0 + + print(f"Streaming {len(audio_bytes)} bytes from {os.path.basename(AUDIO_FILE)}") + + async def send_chunks(): + nonlocal offset + while offset < len(audio_bytes): + chunk = audio_bytes[offset: offset + chunk_size] + await ws.send(chunk) + offset += chunk_size + await asyncio.sleep(0.05) # 50ms delay between chunks + + print("Finished sending audio, sending end signal...") + await ws.send(json.dumps({"type": "finalize"})) + + sender = asyncio.create_task(send_chunks()) + + try: + async for message in ws: + try: + data = json.loads(message) + print("Received:", json.dumps(data, indent=2)) + + # Handle partial transcripts + if not data.get("is_final"): + print(f"Partial: {data.get('transcript')}") + else: + print(f"Final: {data.get('transcript')}") + print(f"Full transcript: {data.get('full_transcript')}") + + if data.get("is_last"): + print("Transcription complete!") + break + except json.JSONDecodeError: + print("Received raw:", message) + except websockets.ConnectionClosed as e: + print(f"Connection closed: {e.code} - {e.reason}") + + await sender + +if __name__ == "__main__": + asyncio.run(stream_audio()) +``` + +
+ +## Node.js Example + +This example demonstrates real-time transcription using the `ws` library: + +
+ +```javascript +const WebSocket = require("ws"); +const fs = require("fs"); + +const API_KEY = process.env.SMALLEST_API_KEY; +const AUDIO_FILE = "path/to/audio.wav"; + +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("word_timestamps", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); + +ws.on("open", () => { + console.log("Connected to STT WebSocket"); + + const audioBuffer = fs.readFileSync(AUDIO_FILE); + const chunkSize = 4096; + let offset = 0; + + const sendChunk = () => { + if (offset >= audioBuffer.length) { + console.log("Finished sending audio, sending end signal..."); + ws.send(JSON.stringify({ type: "finalize" })); + return; + } + + const chunk = audioBuffer.slice(offset, offset + chunkSize); + ws.send(chunk); + offset += chunkSize; + + setTimeout(sendChunk, 50); // 50ms delay between chunks + }; + + sendChunk(); +}); + +ws.on("message", (data) => { + try { + const message = JSON.parse(data.toString()); + console.log("Received:", JSON.stringify(message, null, 2)); + + // Handle partial transcripts + if (!message.is_final) { + console.log(`Partial: ${message.transcript}`); + } else { + console.log(`Final: ${message.transcript}`); + console.log(`Full transcript: ${message.full_transcript}`); + + if (message.is_last) { + console.log("Transcription complete!"); + ws.close(); + } + } + } catch (error) { + console.error("Error parsing message:", error); + } +}); + +ws.on("error", (error) => { + console.error("WebSocket error:", error.message); +}); + +ws.on("close", (code, reason) => { + console.log(`Connection closed: ${code} - ${reason.toString()}`); +}); +``` + +
+ +## Browser JavaScript Example + +This example shows how to stream audio from a file input in the browser: + +
+ +```javascript +const API_KEY = "SMALLEST_API_KEY"; + +async function transcribeAudio(audioFile) { + const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); + url.searchParams.append("language", "en"); + url.searchParams.append("encoding", "linear16"); + url.searchParams.append("sample_rate", "16000"); + url.searchParams.append("word_timestamps", "true"); + + const ws = new WebSocket(url.toString()); + + ws.onopen = async () => { + console.log("Connected to STT WebSocket"); + + const arrayBuffer = await audioFile.arrayBuffer(); + const chunkSize = 4096; + let offset = 0; + + const sendChunk = () => { + if (offset >= arrayBuffer.byteLength) { + console.log("Finished sending audio"); + ws.send(JSON.stringify({ type: "finalize" })); + return; + } + + const chunk = arrayBuffer.slice(offset, offset + chunkSize); + ws.send(chunk); + offset += chunkSize; + + setTimeout(sendChunk, 50); // 50ms delay between chunks + }; + + sendChunk(); + }; + + ws.onmessage = (event) => { + try { + const message = JSON.parse(event.data); + console.log("Received:", message); + + // Update UI with transcript + if (message.is_final) { + updateTranscript(message.full_transcript); + } else { + updatePartialTranscript(message.transcript); + } + + if (message.is_last) { + console.log("Transcription complete!"); + ws.close(); + } + } catch (error) { + console.error("Error parsing message:", error); + } + }; + + ws.onerror = (error) => { + console.error("WebSocket error:", error); + }; + + ws.onclose = (event) => { + console.log(`Connection closed: ${event.code}`); + }; +} + +// Example usage with file input +const fileInput = document.getElementById("audioFile"); +fileInput.addEventListener("change", (e) => { + const file = e.target.files[0]; + if (file) { + transcribeAudio(file); + } +}); +``` + +
+ + +## Streaming from Microphone + +Here's an example of streaming live audio from a microphone in the browser: + +
+ +```javascript +const API_KEY = "SMALLEST_API_KEY"; + +async function streamMicrophone() { + // Get microphone access + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + const audioContext = new AudioContext({ sampleRate: 16000 }); + const source = audioContext.createMediaStreamSource(stream); + + // Create script processor for audio chunks + const processor = audioContext.createScriptProcessor(4096, 1, 1); + + const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); + url.searchParams.append("language", "en"); + url.searchParams.append("encoding", "linear16"); + url.searchParams.append("sample_rate", "16000"); + + const ws = new WebSocket(url.toString()); + + ws.onopen = () => { + console.log("Connected, starting microphone stream"); + + processor.onaudioprocess = (e) => { + const inputData = e.inputBuffer.getChannelData(0); + // Convert Float32Array to Int16Array + const int16Data = new Int16Array(inputData.length); + for (let i = 0; i < inputData.length; i++) { + int16Data[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768)); + } + ws.send(int16Data.buffer); + }; + + source.connect(processor); + processor.connect(audioContext.destination); + }; + + ws.onmessage = (event) => { + const message = JSON.parse(event.data); + if (message.is_final) { + console.log("Transcript:", message.full_transcript); + } + }; + + // Stop streaming after 30 seconds (example) + setTimeout(() => { + processor.disconnect(); + source.disconnect(); + stream.getTracks().forEach(track => track.stop()); + ws.send(JSON.stringify({ type: "finalize" })); + ws.close(); + }, 30000); +} + +// Start streaming +streamMicrophone().catch(console.error); +``` + +
+ +## Handling Responses + +The WebSocket API sends JSON messages with the following structure: + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "Hello, how are you?", + "full_transcript": "Hello, how are you?", + "is_final": true, + "is_last": false, + "language": "en", + "word_timestamps": [ + { + "word": "Hello", + "start": 0.0, + "end": 0.5 + } + ] +} +``` + +### Key Response Fields + +- **`is_final`**: `false` indicates a partial/interim transcript; `true` indicates a final transcript +- **`is_last`**: `true` when the session is complete +- **`transcript`**: Current segment text +- **`full_transcript`**: Accumulated text from the entire session +- **`word_timestamps`**: Only included when `word_timestamps=true` in query params + +### Browser +No additional dependencies required - uses native WebSocket API. + +## Error Handling + +Always implement proper error handling for production use: + +```javascript +ws.onerror = (error) => { + console.error("WebSocket error:", error); + // Implement retry logic or user notification +}; + +ws.onclose = (event) => { + if (event.code !== 1000) { // Not a normal closure + console.error(`Unexpected closure: ${event.code} - ${event.reason}`); + // Implement reconnection logic + } +}; +``` + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/features.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/features.mdx new file mode 100644 index 0000000..4f22d82 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/features.mdx @@ -0,0 +1,33 @@ +--- +title: 'Features' +description: 'Available features for Real-Time Pulse STT WebSocket API' +--- + +The Real-Time Pulse STT WebSocket API supports the following features: + +## Available Features + + + + Get precise timing information for each word in the transcription with confidence scores + + + Automatically detect the language of the audio + + + Get sentence-level transcription segments with timing information + + + Automatically redact personally identifiable information and payment card information + + + Get cumulative transcript received up to this point in responses where is_final is true + + + Control how numbers are formatted in transcriptions (digits, words, or auto-detect) + + + Identify and label different speakers in the audio with speaker confidence scores + + + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/quickstart.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/quickstart.mdx new file mode 100644 index 0000000..252e869 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/quickstart.mdx @@ -0,0 +1,144 @@ +--- +title: 'Quickstart' +description: 'Get started with real-time transcription using the Pulse STT WebSocket API' +--- + +This guide shows you how to transcribe streaming audio using Smallest AI's Pulse STT model via the WebSocket API. The Pulse model provides state-of-the-art low latencies (64ms) for TTFT (Time to First Transcript), making it an ideal choice for speech-to-text conversion during live conversations. + +# Real-Time Audio Transcription + +The Real-Time API allows you to stream audio data and receive transcription results as the audio is processed. This is ideal for live conversations, voice assistants, and scenarios where you need immediate transcription feedback. For these scenarios, where minimizing latency is critical, stream audio in chunks of a few kilobytes over a live connection. + +## When to Use Real-Time Transcription + +- **Live conversations**: Transcribe phone calls, video conferences, or live events. +- **Voice assistants**: Build interactive voice applications that respond immediately. +- **Streaming workflows**: Process audio as it is being captured or generated. +- **Low-latency requirements**: When you need transcription results with minimal delay. + +## Endpoint + +``` +WSS wss://api.smallest.ai/waves/v1/pulse/get_text +``` + +## Authentication + +Head over to the [smallest console](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=speech-to-text) to generate an API key if not done previously. Also look at [Authentication guide](/waves/documentation/getting-started/authentication) for more information about API keys and their usage. + +Include your API key in the Authorization header when establishing the WebSocket connection: + +```http +Authorization: Bearer SMALLEST_API_KEY +``` + +## Example Connection + + +```javascript JavaScript +const API_KEY = "SMALLEST_API_KEY"; + +const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); +url.searchParams.append("language", "en"); +url.searchParams.append("encoding", "linear16"); +url.searchParams.append("sample_rate", "16000"); +url.searchParams.append("word_timestamps", "true"); + +const ws = new WebSocket(url.toString(), { + headers: { + Authorization: `Bearer ${API_KEY}`, + }, +}); + +ws.onopen = () => { + console.log("Connected to STT WebSocket"); + // Start streaming audio chunks +}; + +ws.onmessage = (event) => { + const data = JSON.parse(event.data); + console.log("Transcript:", data.transcript); + console.log("Full transcript:", data.full_transcript); + console.log("Is final:", data.is_final); +}; +``` + +```python Python +import asyncio +import websockets +import json +from urllib.parse import urlencode + +BASE_WS_URL = "wss://api.smallest.ai/waves/v1/pulse/get_text" +params = { + "language": "en", + "encoding": "linear16", + "sample_rate": "16000", + "word_timestamps": "true" +} +WS_URL = f"{BASE_WS_URL}?{urlencode(params)}" + +API_KEY = "SMALLEST_API_KEY" + +async def connect(): + headers = { + "Authorization": f"Bearer {API_KEY}" + } + + async with websockets.connect(WS_URL, additional_headers=headers) as ws: + print("Connected to STT WebSocket") + + # Send audio chunks + # audio_chunk = b"..." + # await ws.send(audio_chunk) + + # Listen for transcriptions + async for message in ws: + data = json.loads(message) + print(f"Transcript: {data.get('transcript')}") + print(f"Is final: {data.get('is_final')}") + +asyncio.run(connect()) +``` + + +## Example Response + +The server responds with JSON messages containing transcription results: + +```json +{ + "session_id": "sess_12345abcde", + "transcript": "Hello, how are you?", + "is_final": true, + "is_last": false, + "language": "en" +} +``` + +For detailed information about response fields, see the [response format documentation](/waves/documentation/speech-to-text/realtime/response-format). + +## Streaming Audio + +Send raw audio bytes as binary WebSocket messages. The recommended chunk size is 4096 bytes: + +```javascript +const audioChunk = new Uint8Array(4096); +ws.send(audioChunk); +``` + +When you're done streaming, send an end signal: + +```json +{ + "type": "finalize" +} +``` + +## Next Steps + +- Learn about [supported audio formats](/waves/documentation/speech-to-text/realtime/audio-formats) for WebSocket streaming. +- Review complete [code examples](/waves/documentation/speech-to-text/realtime/code-examples) for Python, Node.js, and Browser JavaScript. +- Follow [best practices](/waves/documentation/speech-to-text/realtime/best-practices) for optimal streaming performance. +- Troubleshoot common issues in the [troubleshooting guide](/waves/documentation/speech-to-text/realtime/troubleshooting). + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/response-format.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/response-format.mdx new file mode 100644 index 0000000..2455c15 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/response-format.mdx @@ -0,0 +1,109 @@ +--- +title: 'Response Format' +description: 'Understanding the structure and fields of real-time transcription responses' +--- + +For every chunk sent on the WebSocket, the server responds with a JSON message. Users can structure response handling according to their needs. Users can choose to read quick responses with lower accuracy or wait until the server sends larger responses that are highly accurate. + +## Example Response + +```json +{ + "type": "transcription", + "status": "success", + "session_id": "00000000-0000-0000-0000-000000000000", + "transcript": "Hello, how are you?", + "is_final": false, + "is_last": false +} +``` + +## Response Fields + +- **`type`**: Message type identifier, set to `"transcription"` for transcription results. +- **`status`**: Status of the transcription request, typically `"success"` for valid responses. +- **`session_id`**: Unique identifier for the transcription session. +- **`transcript`**: Partial or complete transcription text for the current segment. +- **`is_final`**: Indicates if this is the final transcription for the current segment. `false` indicates a partial/interim transcript; `true` indicates a final transcript. +- **`is_last`**: Indicates if this is the last transcription in the session. `true` when the session is complete. + +### Optional Fields + +The following fields may be included in responses under certain conditions: + +- **`full_transcript`**: Complete transcription text accumulated so far. Only included when `full_transcript=true` query parameter is set AND `is_final=true`. +- **`language`**: Detected primary language code. Only returned when `is_final=true`. +- **`languages`**: Array of language codes detected in the audio. Only returned when `is_final=true`. +- **`words`**: Array of word-level timestamps (only included when `word_timestamps=true` in query parameters). Each word object contains `word`, `start`, `end`, and `confidence` fields. When `diarize=true`, also includes `speaker` (integer ID) and `speaker_confidence` (0.0 to 1.0) fields. +- **`utterances`**: Array of sentence-level timestamps (only included when `sentence_timestamps=true` in query parameters). Each utterance object contains `text`, `start`, and `end` fields. When `diarize=true`, also includes `speaker` (integer ID) field. +- **`redacted_entities`**: Array of redacted entity placeholders (only included when `redact_pii=true` or `redact_pci=true`). Examples: `[FIRSTNAME_1]`, `[CREDITCARDCVV_1]`. + +## Handling Responses + +We maintain an internal server-side buffer that collects chunked audio sent by the user. Once this buffer reaches a specific size, the server sends a special response with the `is_final` parameter set to `true` that contains the transcription of user audio collected since the last such response. + +### `is_final = true` + +We recommend processing responses of this kind for optimal transcription accuracy. The internal buffer size is calibrated to optimize response times and accuracy. + +```json +{ + "type": "transcription", + "status": "success", + "session_id": "00000000-0000-0000-0000-000000000000", + "transcript": "Should I do it? ", + "is_final": true, + "is_last": false, + "full_transcript": "Hello. Should I do it?", + "language": "en", + "languages": ["en"] +} +``` + +- Additionally, the `language` field is set to the specified language, or the detected language if the language parameter is set to `multi`. Other responses will not include the `language` field. +- The `full_transcript` is non-empty if the user sends the finalize token `{"type":"finalize"}` to signal end of session. + +### `is_final = false` + +These are interim transcript responses sent for each chunk. They provide quick feedback for low latency use cases. + +```json +{ + "type": "transcription", + "status": "success", + "session_id": "00000000-0000-0000-0000-000000000001", + "transcript": "Yeah.", + "is_final": false, + "is_last": false +} +``` + +- These responses may provide inaccurate results for the most recent words. This occurs when the audio for these words is not fully sent to the server in the respective chunk. + + +The `full_transcript` field is a feature that requires the `full_transcript` query parameter to be set to `true`. Learn more about the [Full Transcript feature](/waves/documentation/speech-to-text/features/full-transcript). + + +### `is_last = true` + +This response is similar to an `is_final=true` response, but it is the final response received after the user sends the finalize token `{"type":"finalize"}`. When `is_last=true`, the server has finished processing all audio and the session is complete. + +```json +{ + "type": "transcription", + "status": "success", + "session_id": "00000000-0000-0000-0000-000000000000", + "transcript": "Goodbye!", + "is_final": true, + "is_last": true, + "full_transcript": "Hello. Should I do it? Goodbye!", + "language": "en", + "languages": ["en"] +} +``` + +- This is the last response of the live transcription session and contains all the fields of the `is_final=true` response. + + +Do not close the WebSocket connection immediately after sending the finalize token. Wait for this `is_last=true` response to ensure all audio has been processed and you receive the complete transcript. + diff --git a/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/troubleshooting.mdx b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/troubleshooting.mdx new file mode 100644 index 0000000..4bdd358 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/speech-to-text/realtime/troubleshooting.mdx @@ -0,0 +1,344 @@ +--- +title: 'Troubleshooting' +description: 'Common issues and solutions for real-time WebSocket transcription' +--- + +## Connection Issues + +### Cannot Establish WebSocket Connection + +**Symptoms**: Connection fails immediately or times out + +**Possible Causes**: +- Invalid API key +- Network/firewall blocking WebSocket connections +- Incorrect WebSocket URL + +**Solutions**: + +1. **Verify API Key**: + ```javascript + // Ensure Authorization header is correct + headers: { + Authorization: `Bearer ${API_KEY}` // Not "Token" or "Key" + } + ``` + +2. **Check WebSocket URL**: + ```javascript + // Correct URL format + const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); + // Not "ws://" or "https://" + ``` + +3. **Test Network Connectivity**: + ```bash + # Test WebSocket endpoint + curl -i -N -H "Connection: Upgrade" \ + -H "Upgrade: websocket" \ + -H "Sec-WebSocket-Version: 13" \ + -H "Sec-WebSocket-Key: test" \ + https://api.smallest.ai/waves/v1/pulse/get_text + ``` + +### Connection Drops Unexpectedly + +**Symptoms**: Connection closes without sending end signal + +**Possible Causes**: +- Network instability +- Server timeout +- Invalid audio data + +**Solutions**: + +1. **Implement Reconnection Logic**: + ```javascript + let ws; + let reconnectAttempts = 0; + + function connect() { + ws = new WebSocket(url.toString(), { headers }); + + ws.onclose = (event) => { + if (event.code !== 1000 && reconnectAttempts < 5) { + setTimeout(connect, 1000 * Math.pow(2, reconnectAttempts++)); + } + }; + } + ``` + +2. **Monitor Connection State**: + ```javascript + setInterval(() => { + if (ws.readyState !== WebSocket.OPEN) { + console.warn("Connection not open, state:", ws.readyState); + } + }, 5000); + ``` + +## Audio Encoding Problems + +### No Transcription Received + +**Symptoms**: Connection established but no transcript messages + +**Possible Causes**: +- Incorrect audio encoding +- Mismatched sample rate +- Invalid audio format + +**Solutions**: + +1. **Verify Encoding Parameters**: + ```javascript + // Ensure encoding matches your audio format + url.searchParams.append("encoding", "linear16"); // or linear32, alaw, mulaw, opus, ogg_opus + url.searchParams.append("sample_rate", "16000"); // Must match audio + ``` + +2. **Check Audio Format**: + ```python + import soundfile as sf + + audio, sample_rate = sf.read("audio.wav") + print(f"Sample rate: {sample_rate}") + print(f"Channels: {audio.shape}") + print(f"Format: {audio.dtype}") + # Should be: 16000 Hz, mono, int16 + ``` + +3. **Validate Chunk Format**: + ```javascript + // Ensure chunks are binary (not base64 or text) + ws.send(audioChunk); // Uint8Array or ArrayBuffer + // Not: ws.send(JSON.stringify(audioChunk)) + ``` + +### Poor Transcription Quality + +**Symptoms**: Incorrect or garbled transcriptions + +**Possible Causes**: +- Wrong sample rate +- Incorrect encoding +- Audio quality issues + +**Solutions**: + +1. **Match Sample Rate**: + ```javascript + // Audio must match sample_rate parameter + const audioContext = new AudioContext({ sampleRate: 16000 }); + url.searchParams.append("sample_rate", "16000"); + ``` + +2. **Use Recommended Encoding**: + ```javascript + // Use linear16 for best quality + url.searchParams.append("encoding", "linear16"); + ``` + +3. **Preprocess Audio**: + ```python + # Normalize and clean audio before streaming + audio = audio / np.abs(audio).max() * 0.95 # Normalize + audio = audio.astype(np.int16) # Convert to int16 + ``` + +## Response Handling Issues + +### Partial Transcripts Not Updating + +**Symptoms**: Only final transcripts received, no partial updates + +**Possible Causes**: +- Not checking `is_final` field +- Filtering out partial messages + +**Solutions**: + +1. **Handle Both Types**: + ```javascript + ws.onmessage = (event) => { + const message = JSON.parse(event.data); + + if (!message.is_final) { + // Show partial transcript + displayPartial(message.transcript); + } else { + // Show final transcript + displayFinal(message.full_transcript); + } + }; + ``` + +## Performance Issues + +### High Latency + +**Symptoms**: Long delay between speaking and transcription + +**Possible Causes**: +- Chunk size too large +- Streaming rate too slow +- Network latency + +**Solutions**: + +1. **Optimize Chunk Size**: + ```javascript + // Use recommended 4096 bytes + const chunkSize = 4096; + ``` + +2. **Increase Streaming Rate**: + ```javascript + // Send chunks every 50ms + setTimeout(sendChunk, 50); // Not 200ms or longer + ``` + +3. **Check Network**: + ```bash + # Test latency to API + ping api.smallest.ai + ``` + +### Memory Issues + +**Symptoms**: Application crashes or slows down during streaming + +**Possible Causes**: +- Buffering too much audio +- Not cleaning up resources + +**Solutions**: + +1. **Stream Directly**: + ```javascript + // Stream chunks immediately, don't buffer entire file + const stream = fs.createReadStream("audio.wav", { highWaterMark: 4096 }); + stream.on("data", (chunk) => ws.send(chunk)); + ``` + +2. **Clean Up Resources**: + ```javascript + ws.onclose = () => { + // Clean up audio resources + if (audioContext) audioContext.close(); + if (stream) stream.destroy(); + }; + ``` + +## Browser-Specific Issues + +### CORS Errors + +**Symptoms**: WebSocket connection blocked in browser + +**Solutions**: + +1. **Use WSS (Secure WebSocket)**: + ```javascript + // Always use wss:// in browsers + const url = new URL("wss://api.smallest.ai/waves/v1/pulse/get_text"); + ``` + +2. **Handle Authentication**: + ```javascript + // Note: Browsers may not support custom headers in WebSocket + // Consider using query parameter for API key (if supported) + // Or use a proxy server for authentication + ``` + +### Microphone Access Denied + +**Symptoms**: Cannot access microphone in browser + +**Solutions**: + +1. **Request Permissions**: + ```javascript + navigator.mediaDevices.getUserMedia({ audio: true }) + .then(stream => { + // Use stream + }) + .catch(error => { + console.error("Microphone access denied:", error); + // Show user instructions + }); + ``` + +2. **Handle HTTPS Requirement**: + - Microphone access requires HTTPS (or localhost) + - Ensure your page is served over HTTPS + +## Debugging Tips + +### Enable Verbose Logging + +```javascript +ws.onopen = () => console.log("✓ Connected"); +ws.onmessage = (event) => { + console.log("📨 Received:", JSON.parse(event.data)); +}; +ws.onerror = (error) => console.error("✗ Error:", error); +ws.onclose = (event) => console.log("✗ Closed:", event.code, event.reason); +``` + +### Validate Audio Before Sending + +```javascript +function validateAudioChunk(chunk) { + if (!(chunk instanceof ArrayBuffer || chunk instanceof Uint8Array)) { + throw new Error("Audio chunk must be ArrayBuffer or Uint8Array"); + } + if (chunk.byteLength === 0) { + throw new Error("Audio chunk cannot be empty"); + } + if (chunk.byteLength > 8192) { + console.warn("Chunk size exceeds recommended 4096 bytes"); + } + return true; +} +``` + +### Monitor Session State + +```javascript +let sessionStats = { + chunksSent: 0, + messagesReceived: 0, + partialCount: 0, + finalCount: 0 +}; + +ws.onmessage = (event) => { + sessionStats.messagesReceived++; + const message = JSON.parse(event.data); + if (message.is_final) { + sessionStats.finalCount++; + } else { + sessionStats.partialCount++; + } + console.log("Session stats:", sessionStats); +}; +``` + +## Getting Help + +If you continue to experience issues: + +1. **Check API Status**: Verify the API is operational +2. **Review Documentation**: Ensure you're following the correct API version +3. **Test with Sample Code**: Use the provided examples as a baseline +4. **Note down**: Reach out with: + - Session ID (if available) + - Error messages + - Code snippets (sanitized of API keys) + - Network conditions (if applicable) + +5. **Reach out to us:** + - Email: [support@smallest.ai](mailto:support@smallest.ai) + - Discord: [Join our community](https://discord.gg/5evETqguJs) + diff --git a/fern/products/waves/versions/v4.0.0/text-to-speech/get-voice-models-langs.mdx b/fern/products/waves/versions/v4.0.0/text-to-speech/get-voice-models-langs.mdx index 742c613..da640a4 100644 --- a/fern/products/waves/versions/v4.0.0/text-to-speech/get-voice-models-langs.mdx +++ b/fern/products/waves/versions/v4.0.0/text-to-speech/get-voice-models-langs.mdx @@ -11,7 +11,7 @@ In this tutorial, you will learn how to use the Smallest AI platform to fetch th Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=text-to-speech). - The Smallest AI Python SDK installed. If you haven't installed it yet, follow the instructions below: ### Install the SDK @@ -39,9 +39,9 @@ def main(): languages = client.get_languages() print(f"Available Languages: {languages}") - # Get available voices for the "lightning" model, alternatively `lightning-large` - voices = client.get_voices(model="lightning") - print(f"Available Voices (Model: 'lightning'): {voices}") + # Get available voices for the "lightning-v3.1" model + voices = client.get_voices(model="lightning-v3.1") + print(f"Available Voices (Model: 'lightning-v3.1'): {voices}") # Get user-specific cloned voices cloned_voices = client.get_cloned_voices() @@ -58,10 +58,10 @@ if __name__ == "__main__": ## Explanation of Functions -- `get_languages()`: Retrieves the list of supported languages for Text-to-Speech. -- `get_voices(model="model_name")`: Retrieves the voices available for a specific model (e.g., "lightning"). -- `get_cloned_voices()`: Fetches all user-specific cloned voices. -- `get_models()`: Retrieves the TTS models on the platform available through API. +- `get_languages()`: Retrieves the list of supported languages for Text-to-Speech. +- `get_voices(model="model_name")`: Retrieves the voices available for a specific model (e.g., "lightning-v3.1"). +- `get_cloned_voices()`: Fetches all user-specific cloned voices. +- `get_models()`: Retrieves the TTS models on the platform available through API. ## Need Help? diff --git a/fern/products/waves/versions/v4.0.0/text-to-speech/how-to-tts.mdx b/fern/products/waves/versions/v4.0.0/text-to-speech/how-to-tts.mdx index e92efe1..6c07159 100644 --- a/fern/products/waves/versions/v4.0.0/text-to-speech/how-to-tts.mdx +++ b/fern/products/waves/versions/v4.0.0/text-to-speech/how-to-tts.mdx @@ -1,5 +1,5 @@ --- -title: "How to use Text to Speech" +title: "How to use Text to Speech (TTS)" description: "Learn how to synthesize your text using the Smallest AI API." icon: "wave-square" --- @@ -13,7 +13,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=text-to-speech). ## Setup @@ -29,10 +29,8 @@ export SMALLEST_API_KEY=YOUR_API_KEY ## Synchronous Text to Speech -Here is an example of how to synthesize text to speech synchronously: ' - -If you are using a `voice_id` corresponding to a voice clone, you should explicitly set the `model` parameter to `"lightning-large"` in the `Smallest` client or payload. - +Here is an example of how to synthesize text to speech synchronously: + ```python python from smallestai.waves import WavesClient @@ -52,10 +50,8 @@ if __name__ == "__main__": ## Asynchronous Text to Speech -Here is an example of how to synthesize text to speech asynchronously: - -If you are using a `voice_id` corresponding to a voice clone, you should explicitly set the `model` parameter to `"lightning-large"` in the `Smallest` client or payload. - +Here is an example of how to synthesize text to speech asynchronously: + ```python python import asyncio @@ -77,14 +73,11 @@ if __name__ == "__main__": ## Parameters - `api_key` (str): Your API key (can be set via SMALLEST_API_KEY environment variable) -- `model` (str): TTS model to use (default: `lightning`, available: `lightning`, `lightning-large`) +- `model` (str): TTS model to use (default: `lightning-v3.1`, available: `lightning-v2`, `lightning-v3.1`) - `sample_rate` (int): Audio sample rate (default: 24000) - `voice_id` (str): Voice ID (default: "emily") - `speed` (float): Speech speed multiplier (default: 1.0) -- `consistency` (float): Controls word repetition and skipping. Decrease it to prevent skipped words, and increase it to prevent repetition. Only supported in `lightning-large` model. (default: 0.5) -- `similarity` (float): Controls the similarity between the synthesized audio and the reference audio. Increase it to make the speech more similar to the reference audio. Only supported in `lightning-large` model. (default: 0) -- `enhancement` (boolean): Enhances speech quality at the cost of increased latency. Only supported in `lightning-large` model. (default: False) -- `language` (str): Language code, available languages can be found [here](/v4.0.0/content/getting-started/models) (default: "en") +- `language` (str): Language code for text normalization (e.g., how numbers, dates, and abbreviations are spelled out). Set to "auto" for automatic language detection, or specify a language code. Available languages can be found [here](/waves/documentation/getting-started/models) (default: "auto") - `output_format` (str): The format of the output audio. Available options: `pcm`, `mp3`, `wav`, `mulaw` (default: "wav") These parameters are part of the `Smallest` and `AsyncSmallest` instance. They can be set when creating the instance (as shown above). However, the `synthesize` function also accepts `kwargs`, allowing you to override any of these parameters on a per-request basis. diff --git a/fern/products/waves/versions/v4.0.0/text-to-speech/overview.mdx b/fern/products/waves/versions/v4.0.0/text-to-speech/overview.mdx new file mode 100644 index 0000000..6905d12 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/text-to-speech/overview.mdx @@ -0,0 +1,191 @@ +--- +title: 'Overview' +description: 'Convert text to natural-sounding speech with the Waves TTS API - ultra-low latency synthesis with voice cloning and streaming support' +icon: 'waveform-lines' +--- + +The Waves Text-to-Speech (TTS) API converts text into natural, expressive audio via `https://api.smallest.ai/waves/v1`. With low latencies and support for 16+ languages, it's built for real-time applications like voice assistants, interactive bots, and live content generation. + + + Get started in minutes. Learn how to get your API key and generate your first audio. + + +## Synthesis Modes + +Choose the synthesis mode that best fits your application's needs: + + + + Generate complete audio files with a single HTTP request. Ideal for pre-rendering content, batch processing, and applications where immediate streaming isn't required. + + + Receive audio chunks as they're generated via WebSocket. Perfect for real-time voice assistants, live narration, and low-latency conversational AI. + + + +## Available Models + + + + High-quality multilingual TTS with 100ms TTFB. Supports 16+ languages including English, Hindi, and European languages. Includes voice cloning support. + + + Our most natural-sounding model with 44 kHz audio output. Ultra-low latency with expressive, human-like speech. Supports English, Hindi, Tamil, and Spanish with voice cloning. + + + +## Feature Highlights + + + + Optimized streaming pipeline delivers sub-100ms time-to-first-byte (TTFB) for real-time applications. Lightning v3.1 achieves even faster response times for conversational AI. + + + + Create custom voice profiles by uploading audio samples. Instant voice cloning works with just a few seconds of audio, while professional voice cloning delivers studio-quality results. + + + + Comprehensive language support including English, Hindi, Tamil, Kannada, Malayalam, Telugu, Gujarati, Bengali, Marathi, German, French, Spanish, Italian, Polish, Dutch, and Russian. + + + + Choose from PCM, WAV, MP3, or μ-law encoding. Configurable sample rates from 8kHz to 44kHz to match your application's requirements. + + + + Adjust speech rate with a simple multiplier. Slow down for clarity or speed up for faster content delivery without pitch distortion. + + + + Define custom pronunciations for brand names, technical terms, and acronyms. Ensure consistent, accurate pronunciation across all synthesized audio. + + + + Lightning v3.1 produces 44 kHz audio with natural prosody and expressiveness. Perfect for audiobooks, podcasts, and premium voice experiences. + + + + Persistent connections for continuous audio streaming. Ideal for voice bots and interactive applications where latency is critical. + + + +## Supported Languages + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
LanguageCodeLightning v2Lightning v3.1
EnglishenYesYes
HindihiYesYes
TamiltaYesYes
KannadaknYes
MalayalammlYes
TeluguteYes
GujaratiguYes
BengalibnYes
MarathimrYes
GermandeYes
FrenchfrYes
SpanishesYesYes
ItalianitYes
PolishplYes
DutchnlYes
RussianruYes
+ +## Next Steps + +- Generate your first audio in the [TTS Quickstart](/waves/documentation/text-to-speech/quickstart) +- Learn synchronous and async synthesis in [How to use TTS](/waves/documentation/text-to-speech/how-to-tts) +- Set up real-time streaming with [WebSocket TTS](/waves/documentation/text-to-speech/stream-tts) +- Clone a voice with our [Voice Cloning guide](/waves/documentation/voice-cloning/how-to-vc) +- Add custom pronunciations with [Pronunciation Dictionaries](/waves/documentation/text-to-speech/pronunciation-dictionaries) + diff --git a/fern/products/waves/versions/v4.0.0/text-to-speech/pronunciation-dictionaries.mdx b/fern/products/waves/versions/v4.0.0/text-to-speech/pronunciation-dictionaries.mdx index 156f4a8..ee534f7 100644 --- a/fern/products/waves/versions/v4.0.0/text-to-speech/pronunciation-dictionaries.mdx +++ b/fern/products/waves/versions/v4.0.0/text-to-speech/pronunciation-dictionaries.mdx @@ -28,7 +28,7 @@ A pronunciation dictionary is a collection of word-pronunciation pairs that you First, create a pronunciation dictionary with your custom word-pronunciation pairs: ```bash -curl -X POST "https://api.waves.com/api/v1/pronunciation-dicts" \ +curl -X POST "https://api.smallest.ai/waves/v1/pronunciation-dicts" \ -H "Authorization: Bearer YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ @@ -87,7 +87,7 @@ const dictionaryId = "64f1234567890abcdef12345"; // Save this! Retrieve all your pronunciation dictionaries: ```bash -curl -X GET "https://api.waves.com/api/v1/pronunciation-dicts" \ +curl -X GET "https://api.smallest.ai/waves/v1/pronunciation-dicts" \ -H "Authorization: Bearer YOUR_API_KEY" ``` @@ -96,7 +96,7 @@ curl -X GET "https://api.waves.com/api/v1/pronunciation-dicts" \ Modify an existing pronunciation dictionary: ```bash -curl -X PUT "https://api.waves.com/api/v1/pronunciation-dicts" \ +curl -X PUT "https://api.smallest.ai/waves/v1/pronunciation-dicts" \ -H "Authorization: Bearer YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ @@ -115,7 +115,7 @@ curl -X PUT "https://api.waves.com/api/v1/pronunciation-dicts" \ Remove a pronunciation dictionary: ```bash -curl -X DELETE "https://api.waves.com/api/v1/pronunciation-dicts" \ +curl -X DELETE "https://api.smallest.ai/waves/v1/pronunciation-dicts" \ -H "Authorization: Bearer YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ @@ -127,10 +127,10 @@ curl -X DELETE "https://api.waves.com/api/v1/pronunciation-dicts" \ Once you have created a pronunciation dictionary and obtained its ID, you can use it in your TTS requests by including the `pronunciation_dicts` parameter. This parameter accepts an array of dictionary IDs, allowing you to use multiple pronunciation dictionaries in a single request: -### Lightning Model Example +### Example ```bash -curl -X POST "https://api.waves.com/api/v1/lightning" \ +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ -H "Authorization: Bearer YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ @@ -139,28 +139,7 @@ curl -X POST "https://api.waves.com/api/v1/lightning" \ "pronunciation_dicts": ["64f1234567890abcdef12345"], "sample_rate": 24000, "speed": 1.0, - "language": "en", - "output_format": "wav" - }' -``` - -### Lightning Large Model Example - -```bash -curl -X POST "https://api.waves.com/api/v1/lightning-large" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -H "Content-Type: application/json" \ - -d '{ - "text": "The Waves API makes TTS integration simple.", - "voice_id": "your_voice_id", - "pronunciation_dicts": ["64f1234567890abcdef12345"], - "sample_rate": 24000, - "speed": 1.0, - "consistency": 0.5, - "similarity": 0.0, - "enhancement": 1, - "language": "en", - "output_format": "wav" + "language": "en" }' ``` @@ -169,7 +148,7 @@ curl -X POST "https://api.waves.com/api/v1/lightning-large" \ You can also use multiple pronunciation dictionaries in a single request by providing an array of dictionary IDs: ```bash -curl -X POST "https://api.waves.com/api/v1/lightning" \ +curl -X POST "https://api.smallest.ai/waves/v1/lightning-v3.1/get_speech" \ -H "Authorization: Bearer YOUR_API_KEY" \ -H "Content-Type: application/json" \ -d '{ @@ -196,7 +175,7 @@ import json # Your API configuration API_KEY = "your_api_key_here" -BASE_URL = "https://api.waves.com/api/v1" +BASE_URL = "https://api.smallest.ai/waves/v1" headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" @@ -236,7 +215,7 @@ tts_request = { # Generate speech with custom pronunciations audio_response = requests.post( - f"{BASE_URL}/lightning", + f"{BASE_URL}/lightning-v3.1/get_speech", headers=headers, json=tts_request ) @@ -329,6 +308,6 @@ The API will return specific error messages for common issues: ## Next Steps -- Explore the [API Reference](/v4.0.0/content/api-references/pronunciations-dict/create) for detailed parameter information -- Check out [TTS Best Practices](/v4.0.0/content/best-practices/tts-best-practices) for optimization tips -- Learn about [Voice Cloning](/v4.0.0/content/voice-cloning/how-to-vc) to create custom voices +- Explore the [API Reference](/waves/documentation/api-references/pronunciations-dict/create) for detailed parameter information +- Check out [TTS Best Practices](/waves/documentation/best-practices/tts-best-practices) for optimization tips +- Learn about [Voice Cloning](/waves/documentation/voice-cloning/how-to-vc) to create custom voices diff --git a/fern/products/waves/versions/v4.0.0/text-to-speech/quickstart.mdx b/fern/products/waves/versions/v4.0.0/text-to-speech/quickstart.mdx new file mode 100644 index 0000000..0e611a2 --- /dev/null +++ b/fern/products/waves/versions/v4.0.0/text-to-speech/quickstart.mdx @@ -0,0 +1,57 @@ +--- +title: "Quickstart" +description: "Welcome to the Smallest AI platform! This guide will help you get started quickly with generating your first text-to-speech using Python and Smallest AI API. " +icon: "rocket" +--- + +## Step 1: Sign Up & get the API Key + +1. Visit the [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=text-to-speech) and sign up for an account or log in if you already have an account. +2. Navigate to `API Key` tab in your account dashboard (Manage Plan section). +3. Create a new API Key and copy it. +4. Export the API Key in your environment with the name `SMALLEST_API_KEY`, ensuring that your application can access it securely for authentication. + +## Step 2: Install the SDK + +To install the latest version available +```bash +pip install smallestai +``` +When using an SDK in your application, make sure to pin to at least the major version (e.g., ==1.*). This helps ensure your application remains stable and avoids potential issues from breaking changes in future updates. + +## Step 3: Make Your First API Call +Here is a basic example of how to use the Python SDK to convert text to speech: + + +```python python +from smallestai.waves import WavesClient + +def main(): + client = WavesClient(api_key="YOUR_API_KEY") + audio = client.synthesize( + "Modern problems require modern solutions.", + sample_rate=24000, + speed=1.0 + ) + with open("output.wav", "wb") as f: + f.write(audio) + +if __name__ == "__main__": + main() +``` + + +Replace `YOUR_API_KEY` with the API key you obtained in Step 2. + +## Step 4: Explore More Features + +- **Voice Cloning:** Create custom voice profiles by uploading voice samples. +- **Advanced Configurations:** Customize speech synthesis with different voices, speeds, and sample rates. + +For detailed documentation on all available features and endpoints, visit our API Reference. + +### Need Help? +If you have any questions or need assistance, please contact our support team at [support@smallest.ai](mailto:support@smallest.ai). + + + diff --git a/fern/products/waves/versions/v4.0.0/text-to-speech/stream-tts.mdx b/fern/products/waves/versions/v4.0.0/text-to-speech/stream-tts.mdx index f3f3498..79b5b4e 100644 --- a/fern/products/waves/versions/v4.0.0/text-to-speech/stream-tts.mdx +++ b/fern/products/waves/versions/v4.0.0/text-to-speech/stream-tts.mdx @@ -1,5 +1,5 @@ --- -title: "How to use Streaming TTS with websockets" +title: "How to use Streaming Text to Speech (TTS) with websockets" description: "Learn how to convert text to speech with real-time streaming synthesis." icon: "bars-staggered" --- @@ -78,7 +78,7 @@ save_audio_chunks_to_wav(audio_chunks, "speech_output.wav") - **`voice_id`**: Voice identifier (e.g., "aditi", "male-1", "female-2") - **`api_key`**: Your Smallest AI API key -- **`language`**: Language code for synthesis (default: "en") +- **`language`**: Language code for text normalization (e.g., how numbers, dates, and abbreviations are spelled out). Set to "auto" for automatic language detection, or specify a language code (default: "auto") - **`sample_rate`**: Audio sample rate in Hz (default: 24000) - **`speed`**: Speech speed multiplier (default: 1.0 - normal speed, 0.5 = half speed, 2.0 = double speed) - **`consistency`**: Voice consistency parameter (default: 0.5, range: 0.0-1.0) @@ -95,4 +95,4 @@ The streaming TTS returns raw PCM audio data as bytes objects. Each chunk repres - Streamed over network protocols - Processed with additional audio effects -The raw format ensures minimal latency and maximum flexibility for real-time applications where immediate audio feedback is essential. \ No newline at end of file +The raw format ensures minimal latency and maximum flexibility for real-time applications where immediate audio feedback is essential. diff --git a/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-delete-vc.mdx b/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-delete-vc.mdx index aaacb19..8674576 100644 --- a/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-delete-vc.mdx +++ b/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-delete-vc.mdx @@ -12,7 +12,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=voice-cloning). ## Setup diff --git a/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-pvc.mdx b/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-pvc.mdx index ddb07ff..403029b 100644 --- a/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-pvc.mdx +++ b/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-pvc.mdx @@ -9,9 +9,9 @@ In this tutorial, you will learn how to use the Smallest AI platform to create a # Creating a Professional Voice Clone 1. **Go to the Smallest AI Platform** - Navigate to [smallest.ai](https://waves.smallest.ai/voice-clone) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: + Navigate to the [platform](https://app.smallest.ai/waves/voice-cloning?utm_source=documentation&utm_medium=voice-cloning) and click on **Create New**. In the modal that appears, select **Professional Voice Clone**. This will direct you to the setup page: - ![Voice Clone Setup](../../../images/pvc_page.png) + ![Voice Clone Setup](/images/pvc_page.png) 2. **Upload Your Audio File** Follow the instructions provided on the page to upload your audio file. Ensure that the recording is clear for the best results. diff --git a/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-vc-ui.mdx b/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-vc-ui.mdx index e6e6f10..c138378 100644 --- a/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-vc-ui.mdx +++ b/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-vc-ui.mdx @@ -8,29 +8,29 @@ In this tutorial, you will learn how to use the **Smallest AI** platform to crea # Creating an Instant Voice Clone 1️. **Go to the Smallest AI Platform** - - Navigate to **[smallest.ai](https://waves.smallest.ai/voice-clone)** and click on **Create New**. + - Navigate to the **[platform](https://app.smallest.ai/waves/voice-cloning?utm_source=documentation&utm_medium=voice-cloning)** and click on **Create New**. - In the modal that appears, select **Instant Voice Clone**. - ![Voice Clone Setup](../../../images/ivc-image-1.png) + ![Voice Clone Setup](/images/ivc-image-1.png) 2️. **Upload Your Clean Reference Audio** - Select a **short, high-quality** audio clip (5-15 seconds). - Ensure the recording is **clear and noise-free** for the best results. - Follow the recommended **[best practices](/v3.0.1/content/best-practices/vc-best-practices)** to maximize quality. - ![Upload your clean reference audio](../../../images/ivc-image-2.png) + ![Upload your clean reference audio](/images/ivc-image-2.png) 3️. **Review Generated Testing Examples** - The platform will process your reference audio and generate **sample outputs**. - Listen to the test clips to verify the voice match. - ![Testing Examples](../../../images/ivc-image-3.png) + ![Testing Examples](/images/ivc-image-3.png) 4️. **Customize & Save Your Voice Clone** - Fill in details like **Name, Tags, and Description** for your voice. - Click **Generate** to store your cloned voice. - ![Create Voice](../../../images/ivc-image-4.png) + ![Create Voice](/images/ivc-image-4.png) If you have any questions or run into any issues, our community is here to help! diff --git a/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-vc.mdx b/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-vc.mdx index 27c95ea..57bd2fa 100644 --- a/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-vc.mdx +++ b/fern/products/waves/versions/v4.0.0/voice-cloning/how-to-vc.mdx @@ -12,7 +12,7 @@ You can access the source code for the Python SDK on our [GitHub repository](htt Before you begin, ensure you have the following: - Python (3.9 or higher) installed on your machine. -- An API key from the Smallest AI platform (sign up [here](https://waves.smallest.ai)). +- An API key from the Smallest AI [platform](https://app.smallest.ai/dashboard/settings/apikeys?utm_source=documentation&utm_medium=voice-cloning). ## Setup