wispr-flow/config.py at main · Xaid-vfx/wispr-flow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from dataclasses import dataclass, field


@dataclass
class AudioConfig:
    sample_rate: int = 16000
    channels: int = 1
    chunk_size: int = 512          # samples per callback (~32ms at 16kHz)

    # Energy-based VAD thresholds
    energy_threshold: float = 0.015  # RMS threshold — tune this for your mic/environment
    speech_start_chunks: int = 4     # consecutive loud chunks to declare speech start
    speech_end_chunks: int = 25      # consecutive quiet chunks to declare speech end (~800ms)
    min_speech_chunks: int = 12      # min chunks for a valid utterance (~380ms)
    max_speech_duration: float = 30.0  # force-flush after this many seconds


@dataclass
class WhisperConfig:
    model: str = "medium.en"     # tiny.en | base.en | small.en | medium.en | large-v3
    language: str = "en"
    initial_prompt: str = ""     # biases Whisper vocabulary/style; empty = no prompt


@dataclass
class LLMConfig:
    model: str = "dolphin3"
    base_url: str = "http://localhost:11434"
    enabled: bool = False    # default raw; "delta," prefix or "hey dictation rewrite mode" enables LLM
    temperature: float = 0.3
    max_tokens: int = 300


@dataclass
class ContextConfig:
    max_sentences: int = 5


@dataclass
class HotkeyConfig:
    # Key to hold while speaking.
    # Options: right_option | right_cmd | right_ctrl | right_shift | f13 | f14
    key: str = "right_option"
    min_duration: float = 0.3   # discard accidental taps shorter than this (seconds)


@dataclass
class Config:
    audio: AudioConfig = field(default_factory=AudioConfig)
    whisper: WhisperConfig = field(default_factory=WhisperConfig)
    llm: LLMConfig = field(default_factory=LLMConfig)
    context: ContextConfig = field(default_factory=ContextConfig)
    hotkey: HotkeyConfig = field(default_factory=HotkeyConfig)
    debug: bool = False   # show raw transcript before LLM rewrite