Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export type TranscriptionAvailability = {
hasFal: boolean;
hasAnyProvider: boolean;
geminiModelId: string;
effectiveEnv: Env;
};

export async function resolveTranscriptionAvailability({
Expand Down Expand Up @@ -58,7 +59,7 @@ export async function resolveTranscriptionAvailability({
? isOnnxCliConfigured(preferredOnnxModel, effectiveEnv)
: false;

const hasLocalWhisper = await isWhisperCppReady();
const hasLocalWhisper = await isWhisperCppReady(effectiveEnv);
const hasGroq = Boolean(effective.groqApiKey);
const hasAssemblyAi = Boolean(effective.assemblyaiApiKey);
const hasGemini = Boolean(effective.geminiApiKey);
Expand All @@ -78,6 +79,7 @@ export async function resolveTranscriptionAvailability({
hasFal,
hasAnyProvider,
geminiModelId: effective.geminiModel ?? resolveGeminiTranscriptionModel(effectiveEnv),
effectiveEnv,
};
}

Expand Down Expand Up @@ -124,7 +126,7 @@ export async function resolveTranscriptionStartInfo({
? `onnx/${availability.preferredOnnxModel}`
: "onnx"
: providerHint === "cpp"
? ((await resolveWhisperCppModelNameForDisplay()) ?? "whisper.cpp")
? ((await resolveWhisperCppModelNameForDisplay(availability.effectiveEnv)) ?? "whisper.cpp")
: resolveCloudModelId(availability);

return { availability, providerHint, modelId };
Expand Down
13 changes: 11 additions & 2 deletions packages/core/src/transcription/whisper/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ export async function transcribeMediaWithWhisper({
filename,
totalDurationSeconds,
onProgress,
env,
notes,
});
if (local) return local;
Expand Down Expand Up @@ -192,6 +193,7 @@ export async function transcribeMediaFileWithWhisper({
mediaType,
totalDurationSeconds,
onProgress,
env,
notes,
});
if (local) return local;
Expand Down Expand Up @@ -470,16 +472,18 @@ async function transcribeWithLocalWhisperBytes({
filename,
totalDurationSeconds,
onProgress,
env,
notes,
}: {
bytes: Uint8Array;
mediaType: string;
filename: string | null;
totalDurationSeconds: number | null;
onProgress?: ((event: WhisperProgressEvent) => void) | null;
env: Env;
notes: string[];
}): Promise<WhisperTranscriptionResult | null> {
const localReady = await isWhisperCppReady();
const localReady = await isWhisperCppReady(env);
if (!localReady) return null;
const nameHint = filename?.trim() ? basename(filename.trim()) : "media";
const tempFile = join(
Expand All @@ -493,6 +497,7 @@ async function transcribeWithLocalWhisperBytes({
mediaType,
totalDurationSeconds,
onProgress,
env,
});
if (result.text) {
if (result.notes.length > 0) notes.push(...result.notes);
Expand All @@ -513,15 +518,17 @@ async function transcribeWithLocalWhisperFile({
mediaType,
totalDurationSeconds,
onProgress,
env,
notes,
}: {
filePath: string;
mediaType: string;
totalDurationSeconds: number | null;
onProgress?: ((event: WhisperProgressEvent) => void) | null;
env: Env;
notes: string[];
}): Promise<WhisperTranscriptionResult | null> {
const localReady = await isWhisperCppReady();
const localReady = await isWhisperCppReady(env);
if (!localReady) return null;
onProgress?.({
partIndex: null,
Expand All @@ -534,6 +541,7 @@ async function transcribeWithLocalWhisperFile({
mediaType,
totalDurationSeconds,
onProgress,
env,
});
if (result.text) {
if (result.notes.length > 0) notes.push(...result.notes);
Expand All @@ -551,6 +559,7 @@ async function safeTranscribeWithWhisperCppFile(args: {
mediaType: string;
totalDurationSeconds: number | null;
onProgress?: ((event: WhisperProgressEvent) => void) | null;
env: Env;
}): Promise<WhisperTranscriptionResult> {
try {
return await transcribeWithWhisperCppFile(args);
Expand Down
43 changes: 26 additions & 17 deletions packages/core/src/transcription/whisper/whisper-cpp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,17 @@ import {
import type { WhisperProgressEvent, WhisperTranscriptionResult } from "./types.js";
import { wrapError } from "./utils.js";

export async function isWhisperCppReady(): Promise<boolean> {
if (!isWhisperCppEnabled()) return false;
if (!(await isWhisperCliAvailable())) return false;
const model = await resolveWhisperCppModelPath();
export async function isWhisperCppReady(env?: Record<string, string | undefined>): Promise<boolean> {
if (!isWhisperCppEnabled(env)) return false;
if (!(await isWhisperCliAvailable(env))) return false;
const model = await resolveWhisperCppModelPath(env);
return Boolean(model);
}

export async function resolveWhisperCppModelNameForDisplay(): Promise<string | null> {
const modelPath = await resolveWhisperCppModelPath();
export async function resolveWhisperCppModelNameForDisplay(
env?: Record<string, string | undefined>,
): Promise<string | null> {
const modelPath = await resolveWhisperCppModelPath(env);
return modelPath ? resolveWhisperCppModelLabelFromPath(modelPath) : null;
}

Expand All @@ -33,14 +35,16 @@ export async function transcribeWithWhisperCppFile({
mediaType,
totalDurationSeconds,
onProgress,
env,
}: {
filePath: string;
mediaType: string;
totalDurationSeconds: number | null;
onProgress?: ((event: WhisperProgressEvent) => void) | null;
env?: Record<string, string | undefined>;
}): Promise<WhisperTranscriptionResult> {
const notes: string[] = [];
const modelPath = await resolveWhisperCppModelPath();
const modelPath = await resolveWhisperCppModelPath(env);
if (!modelPath) {
return {
text: null,
Expand Down Expand Up @@ -116,7 +120,7 @@ export async function transcribeWithWhisperCppFile({

try {
await new Promise<void>((resolve, reject) => {
const { proc, handle } = spawnTracked(resolveWhisperCppBinary(), args, {
const { proc, handle } = spawnTracked(resolveWhisperCppBinary(env), args, {
stdio: ["ignore", "ignore", "pipe"],
label: "whisper.cpp",
kind: "whisper.cpp",
Expand Down Expand Up @@ -189,12 +193,13 @@ export async function transcribeWithWhisperCppFile({
}
}

function isWhisperCppEnabled(): boolean {
return (process.env[DISABLE_LOCAL_WHISPER_CPP_ENV] ?? "").trim() !== "1";
function isWhisperCppEnabled(env?: Record<string, string | undefined>): boolean {
const source = env ?? process.env;
return (source[DISABLE_LOCAL_WHISPER_CPP_ENV] ?? "").trim() !== "1";
}

async function isWhisperCliAvailable(): Promise<boolean> {
const bin = resolveWhisperCppBinary();
async function isWhisperCliAvailable(env?: Record<string, string | undefined>): Promise<boolean> {
const bin = resolveWhisperCppBinary(env);
return new Promise((resolve) => {
const { proc } = spawnTracked(bin, ["--help"], {
stdio: ["ignore", "ignore", "ignore"],
Expand All @@ -207,13 +212,17 @@ async function isWhisperCliAvailable(): Promise<boolean> {
});
}

function resolveWhisperCppBinary(): string {
const override = (process.env[WHISPER_CPP_BINARY_ENV] ?? "").trim();
function resolveWhisperCppBinary(env?: Record<string, string | undefined>): string {
const source = env ?? process.env;
const override = (source[WHISPER_CPP_BINARY_ENV] ?? "").trim();
return override.length > 0 ? override : "whisper-cli";
}

async function resolveWhisperCppModelPath(): Promise<string | null> {
const override = (process.env[WHISPER_CPP_MODEL_PATH_ENV] ?? "").trim();
async function resolveWhisperCppModelPath(
env?: Record<string, string | undefined>,
): Promise<string | null> {
const source = env ?? process.env;
const override = (source[WHISPER_CPP_MODEL_PATH_ENV] ?? "").trim();
if (override) {
try {
const stat = await fs.stat(override);
Expand All @@ -223,7 +232,7 @@ async function resolveWhisperCppModelPath(): Promise<string | null> {
}
}

const home = (process.env.HOME ?? process.env.USERPROFILE ?? "").trim();
const home = (source.HOME ?? source.USERPROFILE ?? "").trim();
const cacheCandidate = home
? join(home, ".summarize", "cache", "whisper-cpp", "models", "ggml-base.bin")
: null;
Expand Down