Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 113 additions & 31 deletions Libs/pollilib/src/audio.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,109 @@ import { BinaryData, arrayBufferFrom, base64FromArrayBuffer } from './binary.js'
import { raiseForStatus } from './errors.js';

export async function tts(text, options = {}, client = getDefaultClient()) {
if (typeof text !== 'string' || !text.length) {
throw new Error('tts() expects a non-empty text string');
}
const { voice, model = 'openai-audio', referrer, timeoutMs } = options;
const url = `${client.textBase}/${encodeURIComponent(text)}`;
const params = { model };
if (voice) params.voice = voice;
if (referrer) params.referrer = referrer;
const normalizedText = normalizeText(text);
const { timeoutMs, ...rest } = options ?? {};
const params = buildTtsParams(rest);
const url = `${client.textBase}/${encodeURIComponent(normalizedText)}`;

const response = await client.get(url, { params, timeoutMs });
await raiseForStatus(response, 'tts');
return await BinaryData.fromResponse(response);
}

export async function stt({ file, data, arrayBuffer, buffer, path, format, question, model = 'openai-audio', timeoutMs } = {}, client = getDefaultClient()) {
let bytes = null;
if (file) bytes = await arrayBufferFrom(file);
else if (data) bytes = await arrayBufferFrom(data);
else if (arrayBuffer) bytes = await arrayBufferFrom(arrayBuffer);
else if (buffer) bytes = await arrayBufferFrom(buffer);
else if (path) bytes = await readFileArrayBuffer(path);
if (!bytes) throw new Error("stt() requires 'file', 'data', 'arrayBuffer', 'buffer', or 'path'");
export async function ttsUrl(text, options = {}, client = getDefaultClient()) {
const normalizedText = normalizeText(text);
const params = buildTtsParams(options ?? {});
const url = `${client.textBase}/${encodeURIComponent(normalizedText)}`;
return await client.getSignedUrl(url, { params, includeToken: true });
}

let fmt = format ?? guessFormat({ file, path });
if (!fmt) throw new Error("Audio 'format' is required (e.g., 'mp3' or 'wav')");
export async function stt(options = {}, client = getDefaultClient()) {
const payload = await buildSttPayload(options);
const response = await client.postJson(`${client.textBase}/openai`, payload, {
timeoutMs: options.timeoutMs,
});
await raiseForStatus(response, 'stt');
return await response.json();
}

async function buildSttPayload(options = {}) {
const {
file,
data,
arrayBuffer,
buffer,
path,
question,
prompt,
model = 'openai-audio',
format,
language,
temperature,
} = options;

const bytes = await resolveAudioBytes({ file, data, arrayBuffer, buffer, path });
const mime = format ?? guessFormat({ file, path, explicit: options.mimeType });
if (!mime) {
throw new Error("stt() requires an audio format (e.g. 'mp3' or 'wav')");
}

const b64 = base64FromArrayBuffer(bytes);
const payload = {
model,
messages: [{
role: 'user',
content: [
{ type: 'text', text: question ?? 'Transcribe this audio' },
{ type: 'input_audio', input_audio: { data: b64, format: fmt } },
],
}],
const userQuestion = question ?? prompt ?? 'Transcribe this audio';

const message = {
role: 'user',
content: [
{ type: 'text', text: userQuestion },
{ type: 'input_audio', input_audio: { data: b64, format: mime } },
],
};
const response = await client.postJson(`${client.textBase}/openai`, payload, { timeoutMs });
await raiseForStatus(response, 'stt');
return await response.json();

const payload = { model, messages: [message] };
if (language) payload.language = language;
if (temperature != null) payload.temperature = temperature;

return payload;
}

async function resolveAudioBytes({ file, data, arrayBuffer, buffer, path }) {
if (file) return await arrayBufferFrom(file);
if (data) return await arrayBufferFrom(data);
if (arrayBuffer) return await arrayBufferFrom(arrayBuffer);
if (buffer) return await arrayBufferFrom(buffer);
if (path) return await readFileArrayBuffer(path);
throw new Error("stt() requires 'file', 'data', 'arrayBuffer', 'buffer', or 'path'");
}

function buildTtsParams(options) {
const params = {};
const extras = { ...options };

assignIfPresent(params, 'model', extras.model ?? 'openai-audio');
delete extras.model;

assignIfPresent(params, 'voice', extras.voice);
delete extras.voice;

assignIfPresent(params, 'format', extras.format);
delete extras.format;

assignIfPresent(params, 'language', extras.language);
delete extras.language;

if ('referrer' in extras && extras.referrer) {
params.referrer = extras.referrer;
delete extras.referrer;
}

delete extras.timeoutMs;

for (const [key, value] of Object.entries(extras)) {
if (value === undefined || value === null) continue;
params[key] = value;
}

return params;
}

async function readFileArrayBuffer(path) {
Expand All @@ -56,7 +120,8 @@ async function readFileArrayBuffer(path) {
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
}

function guessFormat({ file, path }) {
function guessFormat({ file, path, explicit }) {
if (explicit) return explicit;
if (file?.type?.startsWith?.('audio/')) {
return file.type.split('/')[1];
}
Expand All @@ -66,3 +131,20 @@ function guessFormat({ file, path }) {
}
return null;
}

function normalizeText(text) {
if (typeof text !== 'string') {
throw new Error('tts() expects the text to be a string');
}
const trimmed = text.trim();
if (!trimmed) {
throw new Error('tts() requires a non-empty text string');
}
return trimmed;
}

function assignIfPresent(target, key, value) {
if (value !== undefined && value !== null && value !== '') {
target[key] = value;
}
}
84 changes: 68 additions & 16 deletions Libs/pollilib/src/binary.js
Original file line number Diff line number Diff line change
@@ -1,18 +1,29 @@
const hasBuffer = typeof Buffer !== 'undefined' && typeof Buffer.from === 'function';
const hasBlob = typeof Blob !== 'undefined';
const hasReadableStream = typeof ReadableStream !== 'undefined';

export class BinaryData {
constructor(arrayBuffer, mimeType = 'application/octet-stream') {
if (!(arrayBuffer instanceof ArrayBuffer)) {
constructor(buffer, mimeType = 'application/octet-stream') {
if (!(buffer instanceof ArrayBuffer)) {
throw new TypeError('BinaryData expects an ArrayBuffer');
}
this._buffer = arrayBuffer;
this._buffer = buffer;
this.mimeType = mimeType || 'application/octet-stream';
this._view = null;
this._objectUrl = null;
}

static async fromResponse(response) {
const buffer = await response.arrayBuffer();
const arrayBuffer = await response.arrayBuffer();
const mimeType = response.headers?.get?.('content-type') ?? undefined;
return new BinaryData(arrayBuffer, mimeType);
}

static async from(input, mimeType) {
if (input instanceof BinaryData) {
return new BinaryData(input.arrayBuffer(), mimeType ?? input.mimeType);
}
const buffer = await arrayBufferFrom(input);
return new BinaryData(buffer, mimeType);
}

Expand All @@ -25,7 +36,7 @@ export class BinaryData {
}

uint8Array() {
return this._view ??= new Uint8Array(this._buffer);
return (this._view ??= new Uint8Array(this._buffer));
}

toBase64() {
Expand All @@ -37,20 +48,20 @@ export class BinaryData {
}

blob() {
if (typeof Blob === 'undefined') {
throw new Error('Blob constructor is not available in this environment');
if (!hasBlob) {
throw new Error('Blob is not available in this environment');
}
return new Blob([this._buffer], { type: this.mimeType });
}

stream() {
if (typeof ReadableStream === 'undefined') {
if (!hasReadableStream) {
throw new Error('ReadableStream is not available in this environment');
}
const bytes = this.uint8Array();
const chunk = this.uint8Array();
return new ReadableStream({
start(controller) {
controller.enqueue(bytes);
controller.enqueue(chunk);
controller.close();
},
});
Expand All @@ -62,26 +73,50 @@ export class BinaryData {
}
return Buffer.from(this._buffer);
}

toObjectUrl() {
if (!hasBlob || typeof URL === 'undefined' || typeof URL.createObjectURL !== 'function') {
throw new Error('Object URLs are not supported in this environment');
}
if (!this._objectUrl) {
this._objectUrl = URL.createObjectURL(this.blob());
}
return this._objectUrl;
}

revokeObjectUrl() {
if (this._objectUrl && typeof URL?.revokeObjectURL === 'function') {
URL.revokeObjectURL(this._objectUrl);
this._objectUrl = null;
}
}
}

export async function arrayBufferFrom(input) {
if (input == null) throw new Error('No binary data provided');
if (input instanceof ArrayBuffer) return input.slice(0);
if (input == null) {
throw new Error('No binary data provided');
}
if (input instanceof ArrayBuffer) {
return input.slice(0);
}
if (ArrayBuffer.isView(input)) {
return input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength);
}
if (typeof Blob !== 'undefined' && input instanceof Blob) {
if (typeof input === 'string') {
return await arrayBufferFromString(input);
}
if (hasBlob && input instanceof Blob) {
return await input.arrayBuffer();
}
if (typeof File !== 'undefined' && input instanceof File) {
return await input.arrayBuffer();
}
if (typeof input === 'object' && typeof input.arrayBuffer === 'function') {
const ab = await input.arrayBuffer();
if (!(ab instanceof ArrayBuffer)) {
const buffer = await input.arrayBuffer();
if (!(buffer instanceof ArrayBuffer)) {
throw new Error('arrayBuffer() did not return an ArrayBuffer');
}
return ab;
return buffer;
}
if (hasBuffer && Buffer.isBuffer?.(input)) {
return input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength);
Expand All @@ -105,3 +140,20 @@ export function base64FromArrayBuffer(buffer) {
}
throw new Error('Base64 conversion is not supported in this environment');
}

async function arrayBufferFromString(value) {
if (typeof TextEncoder !== 'undefined') {
return new TextEncoder().encode(value).buffer;
}
if (hasBuffer) {
const buf = Buffer.from(String(value));
return buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
}
throw new Error('String to ArrayBuffer conversion is not supported in this environment');
}

if (typeof Symbol === 'function' && typeof Symbol.dispose === 'symbol') {
BinaryData.prototype[Symbol.dispose] = function disposeBinaryData() {
this.revokeObjectUrl();
};
}
Loading