Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions config.example.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"msteams": [],
"slack": [],
"telegram": [],
"voice": [],
"whatsapp": [],
"email": []
}
Expand Down Expand Up @@ -157,6 +158,25 @@
"ackReaction": "👀",
"mediaMaxMb": 20
},
"voice": {
"enabled": false,
"provider": "twilio",
"twilio": {
"accountSid": "",
"authToken": "",
"fromNumber": ""
},
"relay": {
"ttsProvider": "default",
"voice": "",
"transcriptionProvider": "default",
"language": "en-US",
"interruptible": true,
"welcomeGreeting": "Hello! How can I help you today?"
},
"webhookPath": "/voice",
"maxConcurrentCalls": 8
},
"imessage": {
"enabled": false,
"backend": "local",
Expand Down
4 changes: 4 additions & 0 deletions src/channels/channel-registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@ import {
SYSTEM_CAPABILITIES,
TELEGRAM_CAPABILITIES,
TUI_CAPABILITIES,
VOICE_CAPABILITIES,
WHATSAPP_CAPABILITIES,
} from './channel.js';
import { isEmailAddress } from './email/allowlist.js';
import { isIMessageHandle } from './imessage/handle.js';
import { isSlackChannelTarget } from './slack/target.js';
import { isTelegramChannelId } from './telegram/target.js';
import { isVoiceChannelId } from './voice/channel-id.js';
import { isWhatsAppJid } from './whatsapp/phone.js';

const DISCORD_SNOWFLAKE_RE = /^\d{16,22}$/;
Expand All @@ -31,6 +33,7 @@ const CHANNEL_CAPABILITIES: Record<ChannelKind, ChannelInfo['capabilities']> = {
slack: SLACK_CAPABILITIES,
telegram: TELEGRAM_CAPABILITIES,
tui: TUI_CAPABILITIES,
voice: VOICE_CAPABILITIES,
whatsapp: WHATSAPP_CAPABILITIES,
};

Expand Down Expand Up @@ -107,6 +110,7 @@ function inferChannelKind(channelId?: string | null): ChannelKind | undefined {
return 'msteams';
}
if (isWhatsAppJid(normalized)) return 'whatsapp';
if (isVoiceChannelId(normalized)) return 'voice';
if (isIMessageHandle(normalized)) return 'imessage';
if (isSlackChannelTarget(normalized)) return 'slack';
if (isTelegramChannelId(normalized)) return 'telegram';
Expand Down
12 changes: 12 additions & 0 deletions src/channels/channel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export type ChannelKind =
| 'slack'
| 'telegram'
| 'tui'
| 'voice'
| 'whatsapp';

export const SKILL_CONFIG_CHANNEL_KINDS = [
Expand All @@ -16,6 +17,7 @@ export const SKILL_CONFIG_CHANNEL_KINDS = [
'msteams',
'slack',
'telegram',
'voice',
'whatsapp',
] as const satisfies readonly ChannelKind[];

Expand Down Expand Up @@ -60,6 +62,16 @@ export const DISCORD_CAPABILITIES: ChannelCapabilities = Object.freeze({

export const TUI_CAPABILITIES: ChannelCapabilities = SYSTEM_CAPABILITIES;

export const VOICE_CAPABILITIES: ChannelCapabilities = Object.freeze({
typing: false,
reactions: false,
threads: false,
embeds: false,
attachments: false,
messageEditing: false,
maxMessageLength: 4_000,
});

export const WHATSAPP_CAPABILITIES: ChannelCapabilities = Object.freeze({
typing: true,
reactions: true,
Expand Down
25 changes: 25 additions & 0 deletions src/channels/voice/channel-id.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
const VOICE_CHANNEL_PREFIX = 'voice:';

export function buildVoiceChannelId(callSid: string): string {
const normalized = String(callSid || '').trim();
if (!normalized) {
throw new Error('Voice call SID is required.');
}
return `${VOICE_CHANNEL_PREFIX}${normalized}`;
}

export function isVoiceChannelId(value?: string | null): boolean {
const normalized = String(value || '').trim();
return normalized.startsWith(VOICE_CHANNEL_PREFIX);
}

export function parseVoiceChannelId(value?: string | null): string | null {
if (!isVoiceChannelId(value)) {
return null;
}
const callSid = String(value || '')
.trim()
.slice(VOICE_CHANNEL_PREFIX.length)
.trim();
return callSid || null;
}
251 changes: 251 additions & 0 deletions src/channels/voice/conversation-relay.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
import type WebSocket from 'ws';

function isRecord(value: unknown): value is Record<string, unknown> {
return value !== null && typeof value === 'object' && !Array.isArray(value);
}

function normalizeString(value: unknown): string {
return typeof value === 'string' ? value : '';
}

function normalizeBoolean(value: unknown, fallback: boolean): boolean {
return typeof value === 'boolean' ? value : fallback;
}

function normalizeNumber(value: unknown): number | undefined {
return typeof value === 'number' && Number.isFinite(value)
? value
: undefined;
}

function rawDataToString(raw: WebSocket.Data): string {
if (typeof raw === 'string') {
return raw;
}
if (Buffer.isBuffer(raw)) {
return raw.toString('utf8');
}
if (Array.isArray(raw)) {
return Buffer.concat(raw).toString('utf8');
}
return Buffer.from(raw).toString('utf8');
}

export interface ConversationRelaySetupMessage {
type: 'setup';
sessionId: string;
accountSid: string;
parentCallSid?: string;
callSid: string;
from: string;
to: string;
forwardedFrom?: string;
callType?: string;
callerName?: string;
direction?: string;
callStatus?: string;
customParameters?: Record<string, string>;
}

export interface ConversationRelayPromptMessage {
type: 'prompt';
voicePrompt: string;
lang?: string;
last: boolean;
}

export interface ConversationRelayDtmfMessage {
type: 'dtmf';
digit: string;
}

export interface ConversationRelayInterruptMessage {
type: 'interrupt';
utteranceUntilInterrupt?: string;
durationUntilInterruptMs?: number;
}

export interface ConversationRelayErrorMessage {
type: 'error';
description: string;
}

export type ConversationRelayInboundMessage =
| ConversationRelaySetupMessage
| ConversationRelayPromptMessage
| ConversationRelayDtmfMessage
| ConversationRelayInterruptMessage
| ConversationRelayErrorMessage;

export function mergePromptFragment(
existing: string,
fragment: string,
): string {
const left = String(existing || '');
const right = String(fragment || '');
if (!left) return right;
if (!right) return left;
if (right.startsWith(left)) return right;
if (left.endsWith(right)) return left;
const needsSpace =
!/\s$/.test(left) && !/^\s/.test(right) && /^[A-Za-z0-9]/.test(right);
return needsSpace ? `${left} ${right}` : `${left}${right}`;
}

export function parseConversationRelayMessage(
raw: WebSocket.Data,
): ConversationRelayInboundMessage {
const decoded = rawDataToString(raw).trim();
if (!decoded) {
throw new Error('ConversationRelay message was empty.');
}
let parsed: unknown;
try {
parsed = JSON.parse(decoded) as unknown;
} catch {
throw new Error('ConversationRelay message was not valid JSON.');
}
if (!isRecord(parsed)) {
throw new Error('ConversationRelay message must be a JSON object.');
}
const type = normalizeString(parsed.type);
if (type === 'setup') {
return {
type,
sessionId: normalizeString(parsed.sessionId),
accountSid: normalizeString(parsed.accountSid),
parentCallSid: normalizeString(parsed.parentCallSid) || undefined,
callSid: normalizeString(parsed.callSid),
from: normalizeString(parsed.from),
to: normalizeString(parsed.to),
forwardedFrom: normalizeString(parsed.forwardedFrom) || undefined,
callType: normalizeString(parsed.callType) || undefined,
callerName: normalizeString(parsed.callerName) || undefined,
direction: normalizeString(parsed.direction) || undefined,
callStatus: normalizeString(parsed.callStatus) || undefined,
customParameters: isRecord(parsed.customParameters)
? Object.fromEntries(
Object.entries(parsed.customParameters).map(([name, value]) => [
name,
normalizeString(value),
]),
)
: undefined,
};
}
if (type === 'prompt') {
return {
type,
voicePrompt: normalizeString(parsed.voicePrompt),
lang: normalizeString(parsed.lang) || undefined,
last: normalizeBoolean(parsed.last, true),
};
}
if (type === 'dtmf') {
return {
type,
digit: normalizeString(parsed.digit),
};
}
if (type === 'interrupt') {
return {
type,
utteranceUntilInterrupt:
normalizeString(parsed.utteranceUntilInterrupt) || undefined,
durationUntilInterruptMs: normalizeNumber(
parsed.durationUntilInterruptMs,
),
};
}
if (type === 'error') {
return {
type,
description: normalizeString(parsed.description),
};
}
throw new Error(
`Unsupported ConversationRelay message type: ${type || 'unknown'}`,
);
}

type SendFn = (payload: Record<string, unknown>) => Promise<void>;

export class ConversationRelayResponseStream {
private closed = false;
private pendingToken: string | null = null;
private emittedText = false;

constructor(
private readonly send: SendFn,
private readonly options: {
interruptible: boolean;
language: string;
onFirstToken?: () => void;
onFinished?: () => void;
},
) {}

get finished(): boolean {
return this.closed;
}

get hasEmittedText(): boolean {
return this.emittedText || Boolean(this.pendingToken);
}

async push(token: string, opts?: { language?: string }): Promise<void> {
if (this.closed) return;
const normalized = String(token || '');
if (!normalized) return;
if (this.pendingToken !== null) {
await this.sendText(this.pendingToken, false, opts?.language);
}
this.pendingToken = normalized;
}

async reply(text: string, opts?: { language?: string }): Promise<void> {
if (this.closed) return;
await this.push(text, opts);
await this.finish(opts);
}

async finish(opts?: { language?: string }): Promise<void> {
if (this.closed) return;
const finalToken = this.pendingToken;
this.pendingToken = null;
if (finalToken) {
await this.sendText(finalToken, true, opts?.language);
}
this.closed = true;
this.options.onFinished?.();
}

async endSession(handoffData?: string): Promise<void> {
if (this.closed) return;
this.closed = true;
await this.send({
type: 'end',
...(handoffData ? { handoffData } : {}),
});
this.options.onFinished?.();
}

private async sendText(
token: string,
last: boolean,
language?: string,
): Promise<void> {
if (!this.emittedText) {
this.emittedText = true;
this.options.onFirstToken?.();
}
await this.send({
type: 'text',
token,
last,
lang: language || this.options.language,
interruptible: this.options.interruptible,
preemptible: false,
});
}
}
Loading
Loading