diff --git a/README.md b/README.md index 07f1dc1..573bd2b 100644 --- a/README.md +++ b/README.md @@ -28,12 +28,17 @@ If you need to obtain an OpenAI API key, follow the steps below: - Visit the [OpenAI website](https://platform.openai.com) and sign up for an account. - Visit the [Groq website](https://console.groq.com/) to get an account there. - Visit the [Deepgram website](https://console.deepgram.com/signup) to create an account. + - Visit the [Salad Portal](https://portal.salad.com/) to create an account. + + - Visit [Perplexity](https://www.perplexity.ai/) to create an account. 2. **Access API Keys**: - Log in to your account. - **OpenAI**: Click on the ⚙️ in the top right corner and select "API Keys" from the dropdown menu. - **Groq**: Click "API Keys" on the left sidebar. - **Deepgram**: Click the "Free API key button in the top right. + - **Salad**: go to "API Access" in menu + - **Perplexity**: click on profile icon and select API 3. **Create a New Key**: - On the API Keys page, click "Create new secret key." diff --git a/package-lock.json b/package-lock.json index 0e15e93..051b64d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "neurovox", - "version": "1.0.3", + "version": "1.0.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "neurovox", - "version": "1.0.3", + "version": "1.0.4", "license": "MIT", "dependencies": { "@deepgram/sdk": "^4.2.0", diff --git a/src/adapters/AIAdapter.ts b/src/adapters/AIAdapter.ts index 4797b84..b9d725a 100644 --- a/src/adapters/AIAdapter.ts +++ b/src/adapters/AIAdapter.ts @@ -5,6 +5,8 @@ export enum AIProvider { OpenAI = 'openai', Groq = 'groq', Deepgram = 'deepgram', + Salad = 'salad', + Perplexity = 'perplexity', } export interface AIModel { @@ -41,6 +43,14 @@ export const AIModels: Record = { { id: 'nova-2', name: 'Nova-2', category: 'transcription' }, { id: 'nova-3', name: 'Nova-3', category: 'transcription' }, ], + [AIProvider.Salad]: [ + { id: 'transcribe', name: 'Salad Transcription', category: 'transcription' }, + { id: 'transcription-lite', name: 'Transcription Lite', category: 'transcription' }, + ], + [AIProvider.Perplexity]: [ + { id: 'sonar', name: 'Sonar', category: 'language', maxTokens: 127072 }, + { id: 'sonar-pro', name: 'Sonar Pro', category: 'language', maxTokens: 127072 }, + ], }; export function getModelInfo(modelId: string): AIModel | undefined { diff --git a/src/adapters/PerplexityAdapter.ts b/src/adapters/PerplexityAdapter.ts new file mode 100644 index 0000000..f279367 --- /dev/null +++ b/src/adapters/PerplexityAdapter.ts @@ -0,0 +1,65 @@ +import { AIAdapter, AIProvider } from './AIAdapter'; +import { NeuroVoxSettings } from '../settings/Settings'; + +export class PerplexityAdapter extends AIAdapter { + private apiKey: string = ''; + + constructor(settings: NeuroVoxSettings) { + super(settings, AIProvider.Perplexity); + } + + getApiKey(): string { + return this.apiKey; + } + + protected setApiKeyInternal(key: string): void { + this.apiKey = key; + } + + protected getApiBaseUrl(): string { + return 'https://api.perplexity.ai'; + } + + protected getTextGenerationEndpoint(): string { + return '/chat/completions'; + } + + protected getTranscriptionEndpoint(): string { + throw new Error('Transcription not supported by Perplexity'); + } + + protected async validateApiKeyImpl(): Promise { + if (!this.apiKey) { + return false; + } + + try { + await this.makeAPIRequest( + `${this.getApiBaseUrl()}/chat/completions`, + 'POST', + { + 'Content-Type': 'application/json' + }, + JSON.stringify({ + model: 'sonar', + messages: [{ role: 'user', content: 'test' }], + max_tokens: 1 + }) + ); + return true; + } catch (error) { + return false; + } + } + + protected parseTextGenerationResponse(response: any): string { + if (response?.choices?.[0]?.message?.content) { + return response.choices[0].message.content; + } + throw new Error('Invalid response format from Perplexity'); + } + + protected parseTranscriptionResponse(response: any): string { + throw new Error('Transcription not supported by Perplexity'); + } +} diff --git a/src/adapters/SaladAdapter.ts b/src/adapters/SaladAdapter.ts new file mode 100644 index 0000000..8eed3df --- /dev/null +++ b/src/adapters/SaladAdapter.ts @@ -0,0 +1,246 @@ +import { requestUrl } from 'obsidian'; +import { AIAdapter, AIProvider } from './AIAdapter'; +import { NeuroVoxSettings } from '../settings/Settings'; + +export class SaladAdapter extends AIAdapter { + private apiKey: string = ''; + private organization: string = ''; + + constructor(settings: NeuroVoxSettings) { + super(settings, AIProvider.Salad); + } + + getApiKey(): string { + return this.apiKey; + } + + getOrganization(): string { + return this.organization; + } + + setOrganization(org: string): void { + this.organization = org; + } + + protected setApiKeyInternal(key: string): void { + this.apiKey = key; + } + + protected getApiBaseUrl(): string { + return 'https://api.salad.com/api/public'; + } + + protected getStorageBaseUrl(): string { + return 'https://storage-api.salad.com'; + } + + protected getTextGenerationEndpoint(): string { + return ''; + } + + protected getTranscriptionEndpoint(): string { + return `/organizations/${this.organization}/inference-endpoints`; + } + + protected async validateApiKeyImpl(): Promise { + if (!this.apiKey || !this.organization) { + return false; + } + + try { + const response = await this.makeAPIRequest( + `${this.getStorageBaseUrl()}/organizations/${this.organization}/files`, + 'GET', + {}, + null + ); + return response && Array.isArray(response.files); + } catch (error) { + return false; + } + } + + protected parseTextGenerationResponse(response: any): string { + throw new Error('Text generation not supported by Salad'); + } + + protected parseTranscriptionResponse(response: any): string { + if (response?.output?.text) { + return response.output.text; + } + throw new Error('Invalid transcription response format from Salad'); + } + + public async transcribeAudio(audioArrayBuffer: ArrayBuffer, model: string): Promise { + try { + if (!this.organization) { + throw new Error('Salad organization name is not configured'); + } + + const audioUrl = await this.uploadToS4Storage(audioArrayBuffer); + + const jobId = await this.submitTranscriptionJob(audioUrl, model); + + const result = await this.pollForResult(jobId, model); + + await this.deleteFromS4Storage(audioUrl); + + return this.parseTranscriptionResponse(result); + } catch (error) { + const message = this.getErrorMessage(error); + throw new Error(`Failed to transcribe audio with Salad: ${message}`); + } + } + + private async uploadToS4Storage(audioArrayBuffer: ArrayBuffer): Promise { + const fileName = `audio/neurovox_${Date.now()}.wav`; + const uploadUrl = `${this.getStorageBaseUrl()}/organizations/${this.organization}/files/${fileName}`; + + const boundary = 'saladuploadboundary'; + const encoder = new TextEncoder(); + + const parts: Uint8Array[] = []; + + parts.push(encoder.encode(`--${boundary}\r\n`)); + parts.push(encoder.encode('Content-Disposition: form-data; name="mimeType"\r\n\r\n')); + parts.push(encoder.encode('audio/wav')); + parts.push(encoder.encode('\r\n')); + + parts.push(encoder.encode(`--${boundary}\r\n`)); + parts.push(encoder.encode('Content-Disposition: form-data; name="sign"\r\n\r\n')); + parts.push(encoder.encode('true')); + parts.push(encoder.encode('\r\n')); + + parts.push(encoder.encode(`--${boundary}\r\n`)); + parts.push(encoder.encode('Content-Disposition: form-data; name="signatureExp"\r\n\r\n')); + parts.push(encoder.encode('86400')); + parts.push(encoder.encode('\r\n')); + + parts.push(encoder.encode(`--${boundary}\r\n`)); + parts.push(encoder.encode(`Content-Disposition: form-data; name="file"; filename="audio.wav"\r\n`)); + parts.push(encoder.encode('Content-Type: audio/wav\r\n\r\n')); + parts.push(new Uint8Array(audioArrayBuffer)); + parts.push(encoder.encode('\r\n')); + + parts.push(encoder.encode(`--${boundary}--\r\n`)); + + const totalLength = parts.reduce((acc, part) => acc + part.length, 0); + const finalBuffer = new Uint8Array(totalLength); + let offset = 0; + + for (const part of parts) { + finalBuffer.set(part, offset); + offset += part.length; + } + + const response = await requestUrl({ + url: uploadUrl, + method: 'PUT', + headers: { + 'Salad-Api-Key': this.apiKey, + 'Content-Type': `multipart/form-data; boundary=${boundary}` + }, + body: finalBuffer.buffer, + throw: true + }); + + if (!response.json?.url) { + throw new Error('Failed to get signed URL from S4 storage'); + } + + return response.json.url; + } + + private async submitTranscriptionJob(audioUrl: string, model: string): Promise { + const endpoint = `${this.getApiBaseUrl()}/organizations/${this.organization}/inference-endpoints/${model}/jobs`; + + const body = { + input: { + url: audioUrl, + language_code: 'auto', + return_as_file: false, + sentence_level_timestamps: false, + word_level_timestamps: false, + diarization: false, + srt: false + } + }; + + const response = await this.makeAPIRequest( + endpoint, + 'POST', + { 'Content-Type': 'application/json' }, + JSON.stringify(body) + ); + + if (!response?.id) { + throw new Error('Failed to submit transcription job'); + } + + return response.id; + } + + private async pollForResult(jobId: string, model: string, maxAttempts: number = 120, intervalMs: number = 2000): Promise { + const endpoint = `${this.getApiBaseUrl()}/organizations/${this.organization}/inference-endpoints/${model}/jobs/${jobId}`; + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + const response = await this.makeAPIRequest(endpoint, 'GET', {}, null); + + if (response?.status === 'succeeded') { + return response; + } else if (response?.status === 'failed') { + throw new Error(`Transcription job failed: ${response?.error || 'Unknown error'}`); + } + + await new Promise(resolve => setTimeout(resolve, intervalMs)); + } + + throw new Error('Transcription job timed out'); + } + + private async deleteFromS4Storage(signedUrl: string): Promise { + try { + const urlWithoutToken = signedUrl.split('?')[0]; + + await requestUrl({ + url: urlWithoutToken, + method: 'DELETE', + headers: { + 'Salad-Api-Key': this.apiKey + }, + throw: false + }); + } catch (error) { + } + } + + protected async makeAPIRequest( + endpoint: string, + method: string, + headers: Record, + body: string | ArrayBuffer | null + ): Promise { + try { + const requestHeaders: Record = { + 'Salad-Api-Key': this.apiKey, + ...headers + }; + + const response = await requestUrl({ + url: endpoint, + method, + headers: requestHeaders, + body: body || undefined, + throw: true + }); + + if (!response.json) { + throw new Error('Invalid response format'); + } + + return response.json; + } catch (error: any) { + throw error; + } + } +} diff --git a/src/main.ts b/src/main.ts index 0b4077d..412224f 100644 --- a/src/main.ts +++ b/src/main.ts @@ -20,6 +20,8 @@ import { TimerModal } from './modals/TimerModal'; import { OpenAIAdapter } from './adapters/OpenAIAdapter'; import { GroqAdapter } from './adapters/GroqAdapter'; import { DeepgramAdapter } from './adapters/DeepgramAdapter'; +import { SaladAdapter } from './adapters/SaladAdapter'; +import { PerplexityAdapter } from './adapters/PerplexityAdapter'; import { AIProvider, AIAdapter } from './adapters/AIAdapter'; import { PluginData } from './types'; import { RecordingProcessor } from './utils/RecordingProcessor'; @@ -189,6 +191,10 @@ export default class NeuroVoxPlugin extends Plugin { public async saveSettings(): Promise { try { await this.saveData(this.settings); + + // Validate API keys after settings change to ensure adapters are ready + await this.validateApiKeys(); + this.initializeUI(); // Trigger the floating button setting changed event to ensure UI is in sync @@ -219,6 +225,21 @@ export default class NeuroVoxPlugin extends Plugin { await deepgramAdapter.validateApiKey(); } + const saladAdapter = this.aiAdapters.get(AIProvider.Salad); + if (saladAdapter) { + saladAdapter.setApiKey(this.settings.saladApiKey); + if ('setOrganization' in saladAdapter) { + (saladAdapter as SaladAdapter).setOrganization(this.settings.saladOrganization); + } + await saladAdapter.validateApiKey(); + } + + const perplexityAdapter = this.aiAdapters.get(AIProvider.Perplexity); + if (perplexityAdapter) { + perplexityAdapter.setApiKey(this.settings.perplexityApiKey); + await perplexityAdapter.validateApiKey(); + } + // Only show notice if validation fails if (openaiAdapter && !openaiAdapter.isReady() && this.settings.openaiApiKey) { new Notice('❌ OpenAI API key validation failed'); @@ -229,15 +250,26 @@ export default class NeuroVoxPlugin extends Plugin { if (deepgramAdapter && !deepgramAdapter.isReady() && this.settings.deepgramApiKey) { new Notice('❌ Deepgram API key validation failed'); } + if (saladAdapter && !saladAdapter.isReady() && this.settings.saladApiKey) { + new Notice('❌ Salad API key validation failed'); + } + if (perplexityAdapter && !perplexityAdapter.isReady() && this.settings.perplexityApiKey) { + new Notice('❌ Perplexity API key validation failed'); + } } catch (error) { // Silent fail for API key validation } }public initializeAIAdapters(): void { try { + const saladAdapter = new SaladAdapter(this.settings); + saladAdapter.setOrganization(this.settings.saladOrganization); + const adapters: Array<[AIProvider, AIAdapter]> = [ [AIProvider.OpenAI, new OpenAIAdapter(this.settings)], [AIProvider.Groq, new GroqAdapter(this.settings)], - [AIProvider.Deepgram, new DeepgramAdapter(this.settings)] + [AIProvider.Deepgram, new DeepgramAdapter(this.settings)], + [AIProvider.Salad, saladAdapter], + [AIProvider.Perplexity, new PerplexityAdapter(this.settings)] ]; this.aiAdapters = new Map(adapters); @@ -509,14 +541,23 @@ export default class NeuroVoxPlugin extends Plugin { if (this.modalInstance) return; this.modalInstance = new TimerModal(this); - this.modalInstance.onStop = async (result: Blob | string) => { + this.modalInstance.onStop = async (result: Blob | string, audioBlob?: Blob) => { try { if (typeof result === 'string') { // Streaming mode - transcription already done + // If we have audio blob, save it first + let audioFilePath: string | undefined; + if (audioBlob) { + const AudioFileManager = (await import('./utils/audio/AudioFileManager')).AudioFileManager; + const audioFileManager = new AudioFileManager(this); + audioFilePath = await audioFileManager.saveAudioFile(audioBlob); + } + await this.recordingProcessor.processStreamingResult( result, activeFile, - activeView.editor.getCursor() + activeView.editor.getCursor(), + audioFilePath ); } else { // Legacy mode - need to transcribe diff --git a/src/modals/TimerModal.ts b/src/modals/TimerModal.ts index 7f659bc..ce21fbb 100644 --- a/src/modals/TimerModal.ts +++ b/src/modals/TimerModal.ts @@ -32,7 +32,7 @@ export class TimerModal extends Modal { private readonly CONFIG: TimerConfig; - public onStop: (result: Blob | string) => void; + public onStop: (result: Blob | string, audioBlob?: Blob) => void; constructor(private plugin: NeuroVoxPlugin) { super(plugin.app); @@ -300,31 +300,64 @@ export class TimerModal extends Modal { try { const finalBlob = await this.recordingManager.stop(); - let result: Blob | string; - if (this.useStreaming && this.streamingService) { - // Streaming mode - get transcription result + // Streaming mode - keep modal open and show processing status + const audioBlob = finalBlob || undefined; + + // Update UI to show processing state + this.ui?.updateState('stopped'); + this.ui?.updateTimer('Processing...'); + this.ui?.hideDebugInfo(); + + // Get stats before processing + const stats = this.streamingService.getStats(); + + // Show notice with chunk status if debug mode is enabled + if (this.plugin.settings.debugMode) { + new Notice(`Processing ${stats.queueStats.queueSize} remaining chunks...`); + } + + // Continue processing with modal still open new Notice('Finishing transcription...'); - result = await this.streamingService.finishProcessing(); - if (!result || result.trim().length === 0) { - throw new Error('No transcription result received'); + try { + const result = await this.streamingService.finishProcessing(); + + if (!result || result.trim().length === 0) { + throw new Error('No transcription result received'); + } + + // Now close modal after successful processing + this.cleanup(); + super.close(); + + // Call onStop handler with results + if (this.onStop) { + await this.onStop(result, audioBlob); + } + } catch (processingError) { + // Close modal on error too + this.cleanup(); + super.close(); + + const errorMsg = processingError instanceof Error ? processingError.message : 'Unknown error'; + new Notice(`❌ Transcription failed: ${errorMsg}`); + throw processingError; } } else { // Legacy mode - return audio blob if (!finalBlob) { throw new Error('No audio data received from recorder'); } - result = finalBlob; - } - - // Close recording modal first - this.cleanup(); - super.close(); + + // Close recording modal first + this.cleanup(); + super.close(); - // Always save the recording - if (this.onStop) { - await this.onStop(result); + // Always save the recording + if (this.onStop) { + await this.onStop(finalBlob); + } } } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; @@ -354,11 +387,20 @@ export class TimerModal extends Modal { * Updates the timer display */ private updateTimerDisplay(): void { + // Update timer this.ui.updateTimer( this.seconds, this.CONFIG.maxDuration, this.CONFIG.warningThreshold ); + + // In debug mode with streaming, show chunk queue status on separate line + if (this.plugin.settings.debugMode && this.useStreaming && this.streamingService) { + const stats = this.streamingService.getStats(); + this.ui.updateDebugInfo(stats.queueStats.queueSize, stats.processedChunks); + } else { + this.ui.hideDebugInfo(); + } } /** diff --git a/src/settings/Settings.ts b/src/settings/Settings.ts index 18a1282..3544d1a 100644 --- a/src/settings/Settings.ts +++ b/src/settings/Settings.ts @@ -13,6 +13,9 @@ export type NeuroVoxSettings = { openaiApiKey: string; groqApiKey: string; deepgramApiKey: string; + saladApiKey: string; + saladOrganization: string; + perplexityApiKey: string; // Recording audioQuality: AudioQuality; @@ -38,6 +41,9 @@ export type NeuroVoxSettings = { postProcessingTemperature: number; postProcessingCalloutFormat: string; + // Debug + debugMode: boolean; + // Current Provider currentProvider: AIProvider; @@ -54,6 +60,9 @@ export const DEFAULT_SETTINGS: NeuroVoxSettings = { openaiApiKey: '', groqApiKey: '', deepgramApiKey: '', + saladApiKey: '', + saladOrganization: '', + perplexityApiKey: '', // Recording audioQuality: AudioQuality.Medium, @@ -78,6 +87,9 @@ export const DEFAULT_SETTINGS: NeuroVoxSettings = { postProcessingProvider: AIProvider.OpenAI, postProcessingTemperature: 0.7, postProcessingCalloutFormat: '>[!note]- Post-Processing\n>{postProcessing}', + + // Debug + debugMode: false, // Current Provider currentProvider: AIProvider.OpenAI, diff --git a/src/settings/accordions/ModelHookupAccordion.ts b/src/settings/accordions/ModelHookupAccordion.ts index ab4d403..946f598 100644 --- a/src/settings/accordions/ModelHookupAccordion.ts +++ b/src/settings/accordions/ModelHookupAccordion.ts @@ -139,5 +139,101 @@ export class ModelHookupAccordion extends BaseAccordion { } }); }); + + // Salad Organization Name + new Setting(this.contentEl) + .setName("Salad Organization") + .setDesc("Enter your SaladCloud organization name") + .addText(text => { + text + .setPlaceholder("my-organization") + .setValue(this.settings.saladOrganization); + text.onChange(async (value: string) => { + const trimmedValue = value.trim(); + this.settings.saladOrganization = trimmedValue; + await this.plugin.saveSettings(); + + const adapter = this.getAdapter(AIProvider.Salad); + if (adapter && 'setOrganization' in adapter) { + (adapter as any).setOrganization(trimmedValue); + } + }); + }); + + // Salad API Key + const saladSetting = new Setting(this.contentEl) + .setName("Salad API Key") + .setDesc("Enter your SaladCloud API key") + .addText(text => { + text + .setPlaceholder("Enter your Salad API key...") + .setValue(this.settings.saladApiKey); + text.inputEl.type = "password"; + text.onChange(async (value: string) => { + const trimmedValue = value.trim(); + this.settings.saladApiKey = trimmedValue; + await this.plugin.saveSettings(); + + const adapter = this.getAdapter(AIProvider.Salad); + if (!adapter) { + return; + } + + adapter.setApiKey(trimmedValue); + + // Also set organization if available + if ('setOrganization' in adapter) { + (adapter as any).setOrganization(this.settings.saladOrganization); + } + + const isValid = await adapter.validateApiKey(); + + if (isValid) { + saladSetting.setDesc("✅ API key validated successfully"); + try { + await this.refreshAccordions(); + } catch (error) { + saladSetting.setDesc("✅ API key valid, but failed to update model lists"); + } + } else { + saladSetting.setDesc("❌ Invalid API key or organization. Please check your credentials."); + } + }); + }); + + // Perplexity API Key + const perplexitySetting = new Setting(this.contentEl) + .setName("Perplexity API Key") + .setDesc("Enter your Perplexity API key") + .addText(text => { + text + .setPlaceholder("pplx-...") + .setValue(this.settings.perplexityApiKey); + text.inputEl.type = "password"; + text.onChange(async (value: string) => { + const trimmedValue = value.trim(); + this.settings.perplexityApiKey = trimmedValue; + await this.plugin.saveSettings(); + + const adapter = this.getAdapter(AIProvider.Perplexity); + if (!adapter) { + return; + } + + adapter.setApiKey(trimmedValue); + const isValid = await adapter.validateApiKey(); + + if (isValid) { + perplexitySetting.setDesc("✅ API key validated successfully"); + try { + await this.refreshAccordions(); + } catch (error) { + perplexitySetting.setDesc("✅ API key valid, but failed to update model lists"); + } + } else { + perplexitySetting.setDesc("❌ Invalid API key. Please check your credentials."); + } + }); + }); } } diff --git a/src/settings/accordions/PostProcessingAccordion.ts b/src/settings/accordions/PostProcessingAccordion.ts index 5f36c04..539d192 100644 --- a/src/settings/accordions/PostProcessingAccordion.ts +++ b/src/settings/accordions/PostProcessingAccordion.ts @@ -95,7 +95,7 @@ export class PostProcessingAccordion extends BaseAccordion { dropdown.selectEl.empty(); let hasValidProvider = false; - for (const provider of [AIProvider.OpenAI, AIProvider.Groq]) { + for (const provider of [AIProvider.OpenAI, AIProvider.Groq, AIProvider.Perplexity]) { const apiKey = this.settings[`${provider}ApiKey` as keyof NeuroVoxSettings]; if (apiKey) { const models = AIModels[provider].filter(model => model.category === 'language'); diff --git a/src/settings/accordions/RecordingAccordion.ts b/src/settings/accordions/RecordingAccordion.ts index 0eded05..6e9d412 100644 --- a/src/settings/accordions/RecordingAccordion.ts +++ b/src/settings/accordions/RecordingAccordion.ts @@ -38,6 +38,9 @@ export class RecordingAccordion extends BaseAccordion { // Mic Button Color this.createMicButtonColorSetting(); + // Debug Mode Toggle + this.createDebugModeSetting(); + // Add this before createTranscriptionModelSetting this.createTranscriptionFormatSetting(); @@ -164,6 +167,20 @@ export class RecordingAccordion extends BaseAccordion { }); } + private createDebugModeSetting(): void { + new Setting(this.contentEl) + .setName("Debug mode") + .setDesc("Enable detailed logging of operations (chunks, API calls, file operations, timing). Debug info will be added to transcription notes.") + .addToggle(toggle => { + toggle + .setValue(this.settings.debugMode) + .onChange(async (value) => { + this.settings.debugMode = value; + await this.plugin.saveSettings(); + }); + }); + } + public createTranscriptionFormatSetting(): void { new Setting(this.contentEl) .setName("Transcription format") @@ -195,9 +212,17 @@ export class RecordingAccordion extends BaseAccordion { dropdown.onChange(async (value) => { this.settings.transcriptionModel = value; const provider = this.getProviderFromModel(value); + console.log('NeuroVox Debug - Model changed:', { + model: value, + detectedProvider: provider + }); if (provider) { this.settings.transcriptionProvider = provider; await this.plugin.saveSettings(); + console.log('NeuroVox Debug - Settings saved:', { + transcriptionProvider: this.settings.transcriptionProvider, + transcriptionModel: this.settings.transcriptionModel + }); } }); }); @@ -205,7 +230,7 @@ export class RecordingAccordion extends BaseAccordion { dropdown.selectEl.empty(); let hasValidProvider = false; - for (const provider of [AIProvider.OpenAI, AIProvider.Groq, AIProvider.Deepgram]) { + for (const provider of [AIProvider.OpenAI, AIProvider.Groq, AIProvider.Deepgram, AIProvider.Salad]) { const apiKey = this.settings[`${provider}ApiKey` as keyof NeuroVoxSettings]; if (apiKey) { const adapter = this.getAdapter(provider); diff --git a/src/ui/RecordingUI.ts b/src/ui/RecordingUI.ts index cec669a..d6a3782 100644 --- a/src/ui/RecordingUI.ts +++ b/src/ui/RecordingUI.ts @@ -14,6 +14,7 @@ export interface RecordingUIHandlers { */ export class RecordingUI { private timerText: HTMLElement; + private debugText: HTMLElement | null = null; private pauseButton: TouchableButton; private stopButton: TouchableButton; private waveContainer: HTMLElement; @@ -70,6 +71,13 @@ export class RecordingUI { cls: 'neurovox-timer-display', text: '00:00' }); + + // Create debug text element (hidden by default) + this.debugText = this.container.createDiv({ + cls: 'neurovox-debug-display', + text: '' + }); + this.debugText.style.display = 'none'; } private createControls(): void { @@ -110,14 +118,35 @@ export class RecordingUI { } } - public updateTimer(seconds: number, maxDuration: number, warningThreshold: number): void { + public updateTimer(seconds: number | string, maxDuration?: number, warningThreshold?: number): void { + if (typeof seconds === 'string') { + // Allow custom text (e.g., "Processing...") + this.timerText.setText(seconds); + return; + } + const minutes = Math.floor(seconds / 60).toString().padStart(2, '0'); const remainingSeconds = (seconds % 60).toString().padStart(2, '0'); this.timerText.setText(`${minutes}:${remainingSeconds}`); - const timeLeft = maxDuration - seconds; - this.timerText.toggleClass('is-warning', timeLeft <= warningThreshold); + if (maxDuration !== undefined && warningThreshold !== undefined) { + const timeLeft = maxDuration - seconds; + this.timerText.toggleClass('is-warning', timeLeft <= warningThreshold); + } + } + + public updateDebugInfo(queueSize: number, processedCount: number): void { + if (this.debugText) { + this.debugText.setText(`Queue: ${queueSize} | Processed: ${processedCount}`); + this.debugText.style.display = 'block'; + } + } + + public hideDebugInfo(): void { + if (this.debugText) { + this.debugText.style.display = 'none'; + } } public updateState(state: RecordingState): void { diff --git a/src/ui/ToolbarButton.ts b/src/ui/ToolbarButton.ts index 3811ecb..ea6361c 100644 --- a/src/ui/ToolbarButton.ts +++ b/src/ui/ToolbarButton.ts @@ -47,11 +47,19 @@ export class ToolbarButton { const cursorPosition = editor.getCursor(); const modal = new TimerModal(this.plugin); - modal.onStop = async (result: Blob | string) => { + modal.onStop = async (result: Blob | string, audioBlob?: Blob) => { // Handle both streaming (string) and legacy (Blob) results if (typeof result === 'string') { // Streaming mode - transcription already done - await this.plugin.recordingProcessor.processStreamingResult(result, activeFile, cursorPosition); + // If we have audio blob, save it first + let audioFilePath: string | undefined; + if (audioBlob) { + const { AudioFileManager } = await import('../utils/audio/AudioFileManager'); + const audioFileManager = new AudioFileManager(this.plugin); + audioFilePath = await audioFileManager.saveAudioFile(audioBlob); + } + + await this.plugin.recordingProcessor.processStreamingResult(result, activeFile, cursorPosition, audioFilePath); } else { // Legacy mode - need to transcribe await this.plugin.recordingProcessor.processRecording(result, activeFile, cursorPosition); diff --git a/src/utils/DebugLogger.ts b/src/utils/DebugLogger.ts new file mode 100644 index 0000000..dd6f309 --- /dev/null +++ b/src/utils/DebugLogger.ts @@ -0,0 +1,133 @@ +import NeuroVoxPlugin from '../main'; + +export interface DebugLogEntry { + timestamp: number; + category: 'audio' | 'api' | 'file' | 'chunk' | 'general'; + operation: string; + details: any; + duration?: number; +} + +export class DebugLogger { + private logs: DebugLogEntry[] = []; + private operationTimers: Map = new Map(); + + constructor(private plugin: NeuroVoxPlugin) {} + + isEnabled(): boolean { + return this.plugin.settings.debugMode; + } + + log(category: DebugLogEntry['category'], operation: string, details: any): void { + if (!this.isEnabled()) return; + + const entry: DebugLogEntry = { + timestamp: Date.now(), + category, + operation, + details + }; + + this.logs.push(entry); + console.log(`[NeuroVox Debug] ${category.toUpperCase()} - ${operation}:`, details); + } + + startTimer(operationId: string): void { + if (!this.isEnabled()) return; + this.operationTimers.set(operationId, Date.now()); + } + + endTimer(operationId: string, category: DebugLogEntry['category'], operation: string, details: any = {}): void { + if (!this.isEnabled()) return; + + const startTime = this.operationTimers.get(operationId); + if (startTime) { + const duration = Date.now() - startTime; + this.operationTimers.delete(operationId); + + const entry: DebugLogEntry = { + timestamp: Date.now(), + category, + operation, + details, + duration + }; + + this.logs.push(entry); + console.log(`[NeuroVox Debug] ${category.toUpperCase()} - ${operation} (${duration}ms):`, details); + } + } + + getLogs(): DebugLogEntry[] { + return [...this.logs]; + } + + getFormattedLogs(): string { + if (this.logs.length === 0) { + return 'No debug logs available.'; + } + + const lines: string[] = ['## 🐛 Debug Log\n']; + + // Group by category + const categories = ['audio', 'chunk', 'api', 'file', 'general'] as const; + + for (const category of categories) { + const categoryLogs = this.logs.filter(log => log.category === category); + if (categoryLogs.length === 0) continue; + + lines.push(`### ${this.getCategoryIcon(category)} ${category.toUpperCase()}\n`); + + for (const log of categoryLogs) { + const time = new Date(log.timestamp).toISOString().split('T')[1].split('.')[0]; + const durationStr = log.duration ? ` (${log.duration}ms)` : ''; + lines.push(`- **${time}** - ${log.operation}${durationStr}`); + + // Format details + if (log.details && Object.keys(log.details).length > 0) { + const detailsStr = Object.entries(log.details) + .map(([key, value]) => { + if (typeof value === 'object') { + return ` - ${key}: ${JSON.stringify(value, null, 2)}`; + } + return ` - ${key}: ${value}`; + }) + .join('\n'); + lines.push(detailsStr); + } + } + lines.push(''); + } + + // Add summary + lines.push('### 📊 Summary\n'); + const totalDuration = this.logs + .filter(log => log.duration) + .reduce((sum, log) => sum + (log.duration || 0), 0); + + lines.push(`- Total operations: ${this.logs.length}`); + lines.push(`- Total time: ${totalDuration}ms (${(totalDuration / 1000).toFixed(2)}s)`); + lines.push(`- Audio operations: ${this.logs.filter(l => l.category === 'audio').length}`); + lines.push(`- API calls: ${this.logs.filter(l => l.category === 'api').length}`); + lines.push(`- File operations: ${this.logs.filter(l => l.category === 'file').length}`); + lines.push(`- Chunks processed: ${this.logs.filter(l => l.category === 'chunk').length}`); + + return lines.join('\n'); + } + + private getCategoryIcon(category: DebugLogEntry['category']): string { + const icons = { + audio: '🎵', + api: '🌐', + file: '📁', + chunk: '🧩', + general: '📝' + }; + return icons[category] || '📝'; + } + + clear(): void { + this.logs = []; + this.operationTimers.clear(); + } +} diff --git a/src/utils/DeviceDetection.ts b/src/utils/DeviceDetection.ts index 85bad17..e4e92ea 100644 --- a/src/utils/DeviceDetection.ts +++ b/src/utils/DeviceDetection.ts @@ -64,19 +64,19 @@ export class DeviceDetection { if (isMobile || availableMemory < 1024 * 1024 * 1024) { // Mobile or < 1GB return { - chunkDuration: 5, // 5 second chunks - maxQueueSize: 3, // Max 3 chunks in memory + chunkDuration: 10, // 10 second chunks + maxQueueSize: 20, // Max 8 chunks in memory bitrate: 16000, // 16kbps processingMode: 'streaming', - memoryLimit: 100 // 100MB limit + memoryLimit: 950 // 250MB limit }; } else { return { chunkDuration: 10, // 10 second chunks - maxQueueSize: 5, // Max 5 chunks in memory + maxQueueSize: 20, // Max 10 chunks in memory bitrate: 48000, // 48kbps processingMode: 'streaming', - memoryLimit: 300 // 300MB limit + memoryLimit: 1400 // 400MB limit }; } } diff --git a/src/utils/RecordingProcessor.ts b/src/utils/RecordingProcessor.ts index f8b4711..e582639 100644 --- a/src/utils/RecordingProcessor.ts +++ b/src/utils/RecordingProcessor.ts @@ -4,6 +4,7 @@ import { AudioProcessor } from './audio/AudioProcessor'; import { TranscriptionService, TranscriptionResult } from './transcription/TranscriptionService'; import { DocumentInserter } from './document/DocumentInserter'; import { ProcessingState } from './state/ProcessingState'; +import { DebugLogger } from './DebugLogger'; /** * Configuration for the processing pipeline @@ -31,12 +32,15 @@ export class RecordingProcessor { maxRetries: 3, retryDelay: 1000 }; + + private debugLogger: DebugLogger; private constructor(private plugin: NeuroVoxPlugin) { this.processingState = new ProcessingState(); this.audioProcessor = new AudioProcessor(plugin); this.transcriptionService = new TranscriptionService(plugin); this.documentInserter = new DocumentInserter(plugin); + this.debugLogger = new DebugLogger(plugin); } public static getInstance(plugin: NeuroVoxPlugin): RecordingProcessor { @@ -87,7 +91,11 @@ export class RecordingProcessor { { transcription: result.transcription, postProcessing: result.postProcessing, - audioFilePath: audioResult.finalPath + audioFilePath: audioResult.finalPath, + transcriptionProvider: this.plugin.settings.transcriptionProvider, + transcriptionModel: this.plugin.settings.transcriptionModel, + postProcessingProvider: this.plugin.settings.postProcessingProvider, + postProcessingModel: this.plugin.settings.postProcessingModel }, activeFile, cursorPosition @@ -109,7 +117,8 @@ export class RecordingProcessor { public async processStreamingResult( transcriptionResult: string, activeFile: TFile, - cursorPosition: EditorPosition + cursorPosition: EditorPosition, + audioFilePath?: string ): Promise { if (this.processingState.getIsProcessing()) { throw new Error('Recording is already in progress.'); @@ -118,6 +127,12 @@ export class RecordingProcessor { try { this.processingState.setIsProcessing(true); this.processingState.reset(); + this.debugLogger.clear(); + + this.debugLogger.log('general', 'Streaming result processing started', { + transcriptionLength: transcriptionResult.length, + hasAudioFile: !!audioFilePath + }); // Skip audio processing since we already have the transcription this.processingState.startStep('Content Processing'); @@ -126,9 +141,15 @@ export class RecordingProcessor { let postProcessing: string | undefined; if (this.plugin.settings.generatePostProcessing) { this.processingState.startStep('Post-processing'); + this.debugLogger.startTimer('post-processing'); postProcessing = await this.executeWithRetry(() => this.generatePostProcessing(transcriptionResult) ); + this.debugLogger.endTimer('post-processing', 'api', 'Post-processing generation', { + provider: this.plugin.settings.postProcessingProvider, + model: this.plugin.settings.postProcessingModel, + resultLength: postProcessing?.length || 0 + }); this.processingState.completeStep(); } @@ -137,8 +158,13 @@ export class RecordingProcessor { await this.documentInserter.insertContent( { transcription: transcriptionResult, - postProcessing - // No audioFilePath for streaming mode + postProcessing, + audioFilePath, + transcriptionProvider: this.plugin.settings.transcriptionProvider, + transcriptionModel: this.plugin.settings.transcriptionModel, + postProcessingProvider: this.plugin.settings.postProcessingProvider, + postProcessingModel: this.plugin.settings.postProcessingModel, + debugLogs: this.plugin.settings.debugMode ? this.debugLogger.getFormattedLogs() : undefined }, activeFile, cursorPosition @@ -146,6 +172,9 @@ export class RecordingProcessor { this.processingState.completeStep(); } catch (error) { + this.debugLogger.log('general', 'Processing error', { + error: error instanceof Error ? error.message : 'Unknown error' + }); this.handleError('Processing failed', error); this.processingState.setError(error as Error); throw error; diff --git a/src/utils/document/DocumentInserter.ts b/src/utils/document/DocumentInserter.ts index 1101876..f6d81e6 100644 --- a/src/utils/document/DocumentInserter.ts +++ b/src/utils/document/DocumentInserter.ts @@ -8,6 +8,11 @@ export interface InsertContent { transcription: string; postProcessing?: string; audioFilePath?: string; + transcriptionProvider?: string; + transcriptionModel?: string; + postProcessingProvider?: string; + postProcessingModel?: string; + debugLogs?: string; } /** @@ -74,10 +79,15 @@ export class DocumentInserter { .replace('{audioPath}', ''); // Fallback for any other format } + // Add model info to transcription + const transcriptionWithModelInfo = content.transcriptionProvider && content.transcriptionModel + ? `*Transcribed with ${content.transcriptionProvider} (${content.transcriptionModel})*\n\n${content.transcription}` + : content.transcription; + // Format transcription content let formattedContent = format .replace('{audioPath}', content.audioFilePath || '') - .replace('{transcription}', content.transcription); + .replace('{transcription}', transcriptionWithModelInfo); // Only use callout formatting if the format includes callout syntax const useTranscriptionCallout = this.isCalloutFormat(format); @@ -88,13 +98,23 @@ export class DocumentInserter { const postFormat = this.plugin.settings.postProcessingCalloutFormat; const usePostCallout = this.isCalloutFormat(postFormat); + // Add model info to post-processing + const postProcessingWithModelInfo = content.postProcessingProvider && content.postProcessingModel + ? `*Generated with ${content.postProcessingProvider} (${content.postProcessingModel})*\n\n${content.postProcessing}` + : content.postProcessing; + let postContent = postFormat - .replace('{postProcessing}', content.postProcessing); + .replace('{postProcessing}', postProcessingWithModelInfo); postContent = this.formatLines(postContent, usePostCallout); formattedContent += '\n---\n' + postContent + '\n\n'; } + // Add debug logs if available + if (content.debugLogs) { + formattedContent += '\n---\n' + content.debugLogs + '\n\n'; + } + return formattedContent + '\n'; } diff --git a/src/utils/transcription/StreamingTranscriptionService.ts b/src/utils/transcription/StreamingTranscriptionService.ts index 4d7bb87..cfacbda 100644 --- a/src/utils/transcription/StreamingTranscriptionService.ts +++ b/src/utils/transcription/StreamingTranscriptionService.ts @@ -114,37 +114,53 @@ export class StreamingTranscriptionService { } async finishProcessing(): Promise { - // Stop accepting new chunks - this.isProcessing = false; + const initialQueueSize = this.chunkQueue.size(); + + if (this.plugin.settings.debugMode) { + console.log(`[NeuroVox Debug] Starting finishProcessing with ${initialQueueSize} chunks in queue, ${this.processedChunks.size} already processed`); + } - // Wait for queue to be processed + // Keep processing running until queue is empty let attempts = 0; - const maxAttempts = 300; // 30 seconds timeout + const maxAttempts = 600; // 60 seconds timeout while (this.chunkQueue.size() > 0 && attempts < maxAttempts) { + if (this.plugin.settings.debugMode && attempts % 10 === 0) { + console.log(`[NeuroVox Debug] Waiting for chunks: ${this.chunkQueue.size()} remaining, ${this.processedChunks.size} processed`); + } await this.sleep(100); attempts++; } - // Abort if still processing after timeout - if (this.abortController) { - this.abortController.abort(); + // Now stop accepting new chunks and stop processing + this.isProcessing = false; + + if (this.plugin.settings.debugMode) { + console.log(`[NeuroVox Debug] Queue processing complete. Processed: ${this.processedChunks.size}, Remaining: ${this.chunkQueue.size()}`); } - // Wait for processing to complete + // Wait for processing loop to complete if (this.processingPromise) { try { await this.processingPromise; } catch (error) { - // Silent fail + if (this.plugin.settings.debugMode) { + console.error('[NeuroVox Debug] Error in processing promise:', error); + } } } // Get final result - return this.resultCompiler.getFinalResult( + const result = this.resultCompiler.getFinalResult( this.plugin.settings.includeTimestamps || false, true // Include metadata ); + + if (this.plugin.settings.debugMode) { + console.log(`[NeuroVox Debug] Final result length: ${result.length} characters, segments: ${this.resultCompiler.getSegmentCount()}`); + } + + return result; } getPartialResult(): string { diff --git a/src/utils/transcription/TranscriptionService.ts b/src/utils/transcription/TranscriptionService.ts index ff68a53..3a56dae 100644 --- a/src/utils/transcription/TranscriptionService.ts +++ b/src/utils/transcription/TranscriptionService.ts @@ -45,6 +45,11 @@ export class TranscriptionService { * Transcribes audio using the configured AI adapter */ private async transcribeAudio(audioBuffer: ArrayBuffer): Promise { + console.log('NeuroVox Debug - Transcription Settings:', { + provider: this.plugin.settings.transcriptionProvider, + model: this.plugin.settings.transcriptionModel + }); + const adapter = this.getAdapter( this.plugin.settings.transcriptionProvider, 'transcription' @@ -86,6 +91,13 @@ export class TranscriptionService { throw new Error(`${provider} adapter not found`); } + console.log('NeuroVox Debug - Adapter check:', { + provider, + category, + isReady: adapter.isReady(category), + apiKey: adapter.getApiKey() ? 'present' : 'missing' + }); + if (!adapter.isReady(category)) { const apiKey = adapter.getApiKey(); if (!apiKey) {