Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ let package = Package(
.macOS(.v14)
],
products: [
.executable(name: "MiniWhisper", targets: ["MiniWhisper"])
.executable(name: "MiniWhisper", targets: ["MiniWhisper"]),
.executable(name: "MiniWhisperDebug", targets: ["MiniWhisperDebug"])
],
dependencies: [
.package(url: "https://github.com/FluidInference/FluidAudio.git", from: "0.9.1")
.package(
url: "https://github.com/FluidInference/FluidAudio.git",
.upToNextMinor(from: "0.12.6")
)
],
targets: [
.executableTarget(
Expand All @@ -26,6 +30,17 @@ let package = Package(
.enableExperimentalFeature("StrictConcurrency")
]
),
.executableTarget(
name: "MiniWhisperDebug",
dependencies: [
"FluidAudio",
"whisper"
],
path: "Sources/MiniWhisperDebug",
swiftSettings: [
.enableExperimentalFeature("StrictConcurrency")
]
),
.binaryTarget(
name: "whisper",
url: "https://github.com/andyhtran/MiniWhisper/releases/download/whisper-xcframework-1.0/whisper.xcframework.zip",
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,16 @@ just dev
just dev # Build, package, and launch
just build # Debug build only
just release # Release build + .app bundle
just debug-tool # Build local debug transcription CLI
just clean # Remove build artifacts
```

Run the local debug CLI on an existing audio file or recording directory:

```bash
just debug-transcribe ~/Code/debug-stt/whisper_cpp.wav --engine whisper --preset current-app
```

## Release

Signing, notarization, and publishing require [`asc`](https://github.com/rudrankriyam/App-Store-Connect-CLI):
Expand Down
2 changes: 1 addition & 1 deletion Scripts/build-app.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fi

echo "Building $APP_NAME ($BUILD_CONFIG)..."

swift build -c "$BUILD_CONFIG"
swift build -c "$BUILD_CONFIG" --product "$APP_NAME"

echo "Creating app bundle..."
rm -rf "$APP_BUNDLE"
Expand Down
149 changes: 146 additions & 3 deletions Sources/MiniWhisper/AppState.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import Foundation
import Observation
import AppKit
import ServiceManagement
import UserNotifications

@Observable
Expand Down Expand Up @@ -37,6 +38,8 @@ final class AppState: Sendable {

var isModelDownloading: Bool { whisper.isDownloading }
var modelDownloadProgress: Double { whisper.downloadProgress }
var launchAtLoginEnabled: Bool { SMAppService.mainApp.status == .enabled }
var launchAtLoginSupported: Bool { SMAppService.mainApp.status != .notFound }

// MARK: - Initialization

Expand Down Expand Up @@ -154,8 +157,80 @@ final class AppState: Sendable {
guard recorder.state.isRecording else { return }
stopDurationChecks()
onRecordingEnded?()
recorder.cancelRecording()

let duration = recorder.currentDuration
let sampleRate = recorder.actualSampleRate
let recordingId = currentRecordingId ?? Recording.generateId()
currentRecordingId = nil

guard let audioURL = recorder.stopRecording() else {
recorder.reset()
return
}
recorder.reset()

let fileSize = (try? FileManager.default.attributesOfItem(atPath: audioURL.path)[.size] as? Int64) ?? 0
let recording = Recording(
id: recordingId,
createdAt: Date(),
recording: RecordingInfo(
duration: duration,
sampleRate: sampleRate,
channels: 1,
fileSize: fileSize,
inputDevice: recorder.systemDefaultDeviceName
),
transcription: nil,
configuration: RecordingConfiguration(
voiceModel: transcriptionMode == .english ? "Parakeet" : "Whisper",
language: "en"
),
status: .cancelled
)

do {
try recordingStore.saveWithExistingAudio(recording)
} catch {
toast.showError(title: "Cancel Save Failed", message: error.localizedDescription)
}
}

func retranscribe(_ recording: Recording) {
guard recorder.state.isIdle else {
toast.showError(title: "Busy", message: "Wait for the current recording/transcription to finish.")
return
}
guard recording.canRetranscribe else {
toast.showError(title: "Cannot Re-transcribe", message: "Audio file is no longer available.")
return
}

recorder.state = .processing

Task {
await retranscribeCancelledRecording(recording)
}
}

func setLaunchAtLogin(_ enabled: Bool) {
let service = SMAppService.mainApp
guard service.status != .notFound else {
toast.showError(
title: "Start on Login Unavailable",
message: "This is available only when running the bundled app."
)
return
}

do {
if enabled {
try service.register()
} else {
try service.unregister()
}
} catch {
toast.showError(title: "Start on Login Failed", message: error.localizedDescription)
}
}

// MARK: - Transcription
Expand Down Expand Up @@ -213,7 +288,8 @@ final class AppState: Sendable {
configuration: RecordingConfiguration(
voiceModel: result.model,
language: result.language
)
),
status: .completed
)

try recordingStore.saveWithExistingAudio(recording)
Expand Down Expand Up @@ -243,13 +319,80 @@ final class AppState: Sendable {
configuration: RecordingConfiguration(
voiceModel: transcriptionMode == .english ? "Parakeet" : "Whisper",
language: "en"
)
),
status: .failed
)
// Use saveMetadataOnly for failed recordings since audio may not exist
try? recordingStore.saveFailedRecording(recording)
}
}

private func retranscribeCancelledRecording(_ recording: Recording) async {
do {
let result: TranscriptionResult
switch transcriptionMode {
case .english:
result = try await parakeet.transcribe(audioURL: recording.audioURL)
case .multilingual:
result = try await whisper.transcribe(audioURL: recording.audioURL)
}

guard recorder.state == .processing else { return }

guard !result.text.isEmpty else {
recorder.reset()
toast.showError(title: "Empty Transcription", message: "No speech detected in recording.")
return
}

let finalText: String
if replacementSettings.enabled {
let processor = ReplacementProcessor(rules: replacementSettings.enabledRules)
finalText = processor.apply(to: result.text)
} else {
finalText = result.text
}

pasteboard.copyAndPaste(finalText)

let fileSize = (try? FileManager.default.attributesOfItem(atPath: recording.audioURL.path)[.size] as? Int64) ?? 0
let updatedRecording = Recording(
id: recording.id,
createdAt: recording.createdAt,
recording: RecordingInfo(
duration: recording.recording.duration,
sampleRate: recording.recording.sampleRate,
channels: recording.recording.channels,
fileSize: fileSize,
inputDevice: recording.recording.inputDevice
),
transcription: RecordingTranscription(
text: finalText,
segments: result.segments,
language: result.language,
model: result.model,
transcriptionDuration: result.duration
),
configuration: RecordingConfiguration(
voiceModel: result.model,
language: result.language
),
status: .completed
)

try recordingStore.saveWithExistingAudio(updatedRecording)
analyticsStore.record(
duration: recording.recording.duration,
wordCount: result.text.split(separator: " ").count
)
recorder.reset()
} catch {
guard recorder.state == .processing else { return }
recorder.reset()
toast.showError(title: "Re-transcription Failed", message: error.localizedDescription)
}
}

// MARK: - Duration Monitoring

private func startDurationChecks() {
Expand Down
47 changes: 47 additions & 0 deletions Sources/MiniWhisper/Models/Recording.swift
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import Foundation

enum RecordingStatus: String, Codable, Equatable, Hashable, Sendable {
case completed
case failed
case cancelled
}

struct RecordingInfo: Codable, Equatable, Hashable, Sendable {
let duration: TimeInterval
let sampleRate: Double
Expand Down Expand Up @@ -27,6 +33,43 @@ struct Recording: Codable, Identifiable, Equatable, Hashable, Sendable {
let recording: RecordingInfo
var transcription: RecordingTranscription?
let configuration: RecordingConfiguration
var status: RecordingStatus

init(
id: String,
createdAt: Date,
recording: RecordingInfo,
transcription: RecordingTranscription?,
configuration: RecordingConfiguration,
status: RecordingStatus = .completed
) {
self.id = id
self.createdAt = createdAt
self.recording = recording
self.transcription = transcription
self.configuration = configuration
self.status = status
}

enum CodingKeys: String, CodingKey {
case id
case createdAt
case recording
case transcription
case configuration
case status
}

init(from decoder: any Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
id = try container.decode(String.self, forKey: .id)
createdAt = try container.decode(Date.self, forKey: .createdAt)
recording = try container.decode(RecordingInfo.self, forKey: .recording)
transcription = try container.decodeIfPresent(RecordingTranscription.self, forKey: .transcription)
configuration = try container.decode(RecordingConfiguration.self, forKey: .configuration)
status = try container.decodeIfPresent(RecordingStatus.self, forKey: .status)
?? (transcription == nil ? .failed : .completed)
}

var audioURL: URL {
storageDirectory.appendingPathComponent("audio.wav")
Expand All @@ -40,6 +83,10 @@ struct Recording: Codable, Identifiable, Equatable, Hashable, Sendable {
FileManager.default.fileExists(atPath: audioURL.path)
}

var canRetranscribe: Bool {
status == .cancelled && transcription == nil && hasAudioFile
}

static var baseDirectory: URL {
let docs = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!
return docs.appendingPathComponent("MiniWhisper/recordings")
Expand Down
9 changes: 9 additions & 0 deletions Sources/MiniWhisper/Services/RecordingStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ final class RecordingStore: Sendable {
try saveMetadata(recording)
try saveTranscriptionFiles(recording)

recordings.removeAll { $0.id == recording.id }
recordings.insert(recording, at: 0)
performRetention()
}

func saveFailedRecording(_ recording: Recording) throws {
try saveMetadata(recording)
recordings.removeAll { $0.id == recording.id }
recordings.insert(recording, at: 0)
performRetention()
}
Expand Down Expand Up @@ -77,6 +79,13 @@ final class RecordingStore: Sendable {
Array(recordings.prefix(3))
}

var recentHistoryItems: [Recording] {
let filtered = recordings.filter { recording in
recording.transcription != nil || recording.status == .cancelled
}
return Array(filtered.prefix(3))
}

// MARK: - Retention

func performRetention() {
Expand Down
23 changes: 21 additions & 2 deletions Sources/MiniWhisper/Services/WhisperProvider.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,23 @@ import Foundation
@preconcurrency import AVFoundation
import whisper

enum WhisperLanguageMode: Sendable {
case auto
case fixed(String)
}

struct WhisperTranscriptionOptions: Sendable {
let language: WhisperLanguageMode
let detectLanguage: Bool
let noTimestamps: Bool
let singleSegment: Bool
let threadCount: Int32

static func `default`() -> WhisperTranscriptionOptions {
WhisperTranscriptionOptions(
language: .fixed("en"),
detectLanguage: false,
noTimestamps: true,
noTimestamps: false,
singleSegment: false,
threadCount: max(1, Int32(ProcessInfo.processInfo.activeProcessorCount - 2))
)
Expand Down Expand Up @@ -51,7 +58,19 @@ final class WhisperContext: @unchecked Sendable {
func transcribe(samples: [Float]) -> (text: String, language: String) {
let options = Self.transcriptionOptions()
var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY)
params.language = nil
var languageCString: UnsafeMutablePointer<CChar>?
switch options.language {
case .auto:
params.language = nil
case .fixed(let language):
languageCString = strdup(language)
params.language = languageCString.map { UnsafePointer($0) }
}
defer {
if let languageCString {
free(languageCString)
}
}
params.detect_language = options.detectLanguage
params.print_special = false
params.print_progress = false
Expand Down
Loading
Loading