diff --git a/src/listenr/build_dataset.py b/src/listenr/build_dataset.py
index fc80002..e7e7201 100644
--- a/src/listenr/build_dataset.py
+++ b/src/listenr/build_dataset.py
@@ -26,23 +26,33 @@
 import csv
 import json
 import logging
-import os
 import random
 import sys
 from pathlib import Path
 
-import listenr.config_manager as cfg
+from listenr.constants import (
+    DATASET_FORMAT,
+    DATASET_MIN_CHARS,
+    DATASET_MIN_DURATION,
+    DATASET_OUTPUT,
+    DATASET_SEED,
+    DATASET_SPLIT,
+    STORAGE_BASE,
+)
 
 logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
 logger = logging.getLogger("listenr.build_dataset")
 
 # ---------------------------------------------------------------------------
-# Defaults
+# Defaults (sourced from constants, which read from config at import time)
 # ---------------------------------------------------------------------------
-DEFAULT_OUTPUT = Path("~/listenr_dataset").expanduser()
-DEFAULT_SPLIT = "80/10/10"
-DEFAULT_MIN_DURATION = 0.3  # seconds
-DEFAULT_MIN_CHARS = 2  # minimum non-whitespace chars in transcription
+
+DEFAULT_OUTPUT       = DATASET_OUTPUT
+DEFAULT_SPLIT        = DATASET_SPLIT
+DEFAULT_MIN_DURATION = DATASET_MIN_DURATION
+DEFAULT_MIN_CHARS    = DATASET_MIN_CHARS
+DEFAULT_SEED         = DATASET_SEED
+DEFAULT_FORMAT       = DATASET_FORMAT
 
 CSV_COLUMNS = [
     "uuid",
@@ -65,8 +75,7 @@
 
 def _manifest_path() -> Path:
     """Return the manifest.jsonl path from config."""
-    clips_path = cfg.get_setting("Storage", "audio_clips_path", "~/.listenr/audio_clips")
-    return Path(clips_path).expanduser() / "manifest.jsonl"
+    return STORAGE_BASE / "manifest.jsonl"
 
 
 def load_manifest(manifest_path: Path) -> list[dict]:
@@ -244,36 +253,36 @@ def main() -> None:
         "--output",
         type=Path,
         default=DEFAULT_OUTPUT,
-        help=f"Output directory for dataset files (default: {DEFAULT_OUTPUT})",
+        help=f"Output directory for dataset files (default: from config, currently {DEFAULT_OUTPUT})",
     )
     parser.add_argument(
         "--split",
         default=DEFAULT_SPLIT,
-        help=f"Train/dev/test split percentages, e.g. 80/10/10 (default: {DEFAULT_SPLIT})",
+        help=f"Train/dev/test split percentages, e.g. 80/10/10 (default: from config, currently {DEFAULT_SPLIT})",
     )
     parser.add_argument(
         "--min-duration",
         type=float,
         default=DEFAULT_MIN_DURATION,
-        help=f"Minimum clip duration in seconds (default: {DEFAULT_MIN_DURATION})",
+        help=f"Minimum clip duration in seconds (default: from config, currently {DEFAULT_MIN_DURATION})",
     )
     parser.add_argument(
         "--min-chars",
         type=int,
         default=DEFAULT_MIN_CHARS,
-        help=f"Minimum non-whitespace chars in transcription (default: {DEFAULT_MIN_CHARS})",
+        help=f"Minimum non-whitespace chars in transcription (default: from config, currently {DEFAULT_MIN_CHARS})",
     )
     parser.add_argument(
         "--seed",
         type=int,
-        default=42,
-        help="Random seed for reproducible splits (default: 42)",
+        default=DEFAULT_SEED,
+        help=f"Random seed for reproducible splits (default: from config, currently {DEFAULT_SEED})",
     )
     parser.add_argument(
         "--format",
         choices=["csv", "hf", "both"],
-        default="csv",
-        help="Output format: csv, hf (HuggingFace datasets), or both (default: csv)",
+        default=DEFAULT_FORMAT,
+        help=f"Output format: csv, hf (HuggingFace datasets), or both (default: from config, currently {DEFAULT_FORMAT})",
     )
     parser.add_argument(
         "--dry-run",
diff --git a/src/listenr/cli.py b/src/listenr/cli.py
index f395ba5..ec55d68 100644
--- a/src/listenr/cli.py
+++ b/src/listenr/cli.py
@@ -23,47 +23,38 @@
 from collections import deque
 from math import gcd
 from scipy.signal import resample_poly
-from pathlib import Path
 
 from listenr.unified_asr import LemonadeUnifiedASR
 from listenr.llm_processor import lemonade_llm_correct, lemonade_load_model, lemonade_unload_models
 from listenr.transcript_utils import is_hallucination, strip_noise_tags
 from listenr.storage import save_recording
-import listenr.config_manager as cfg
+from listenr.constants import (
+    ASR_RATE,
+    CAPTURE_RATE,
+    CHANNELS,
+    CHUNK_SIZE,
+    INPUT_DEVICE,
+    LLM_CONTEXT_WINDOW,
+    LLM_ENABLED as USE_LLM,
+    LLM_MODEL,
+    STORAGE_BASE,
+    WHISPER_MODEL,
+)
 
 logging.basicConfig(level=logging.WARNING, format='%(levelname)s: %(message)s')
 log = logging.getLogger('listenr.cli')
 
-# Audio settings from config
-CAPTURE_RATE = cfg.get_int_setting('Audio', 'sample_rate', 16000)
-ASR_RATE = 16000  # Lemonade /realtime always requires 16kHz PCM16
-CHUNK_SIZE = cfg.get_int_setting('Audio', 'blocksize', 1360)
-CHANNELS = cfg.get_int_setting('Audio', 'channels', 1)
-INPUT_DEVICE = cfg.get_setting('Audio', 'input_device', 'default') or None
-if INPUT_DEVICE == 'default':
-    INPUT_DEVICE = None
-
 # Compute resample ratio once (e.g. 48000→16000 = up 1, down 3)
 _gcd = gcd(CAPTURE_RATE, ASR_RATE)
 _RESAMPLE_UP = ASR_RATE // _gcd
 _RESAMPLE_DOWN = CAPTURE_RATE // _gcd
 _NEED_RESAMPLE = (CAPTURE_RATE != ASR_RATE)
 
-# Storage
-STORAGE_BASE = Path(
-    cfg.get_setting('Storage', 'audio_clips_path', '~/listenr_recordings') or '~/listenr_recordings'
-).expanduser()
-
-# LLM settings
-USE_LLM = cfg.get_bool_setting('LLM', 'enabled', False)
-LLM_MODEL = cfg.get_setting('LLM', 'model', 'gpt-oss-20b-mxfp4-GGUF') or 'gpt-oss-20b-mxfp4-GGUF'
-WHISPER_MODEL = cfg.get_setting('Whisper', 'model', 'Whisper-Large-v3-Turbo') or 'Whisper-Large-v3-Turbo'
-
 
 def get_lemonade_ws_url() -> str:
     """Discover Lemonade WebSocket URL from /api/v1/health."""
-    api_base = cfg.get_setting('LLM', 'api_base', 'http://localhost:8000/api/v1') or 'http://localhost:8000/api/v1'
-    health_url = api_base.rstrip('/').replace('/api/v1', '') + '/api/v1/health'
+    from listenr.constants import LLM_API_BASE
+    health_url = LLM_API_BASE.rstrip('/').replace('/api/v1', '') + '/api/v1/health'
     try:
         resp = requests.get(health_url, timeout=2)
         resp.raise_for_status()
@@ -165,8 +156,7 @@ async def _run(save: bool, show_raw: bool, debug: bool):
     asr = LemonadeUnifiedASR(use_llm=False)  # LLM correction handled here for saving
     pcm_buffer: list = []
     # Rolling window of (raw, corrected) pairs passed as context to the LLM
-    _context_size = cfg.get_int_setting('LLM', 'context_window', 3)
-    llm_context: deque[tuple[str, str]] = deque(maxlen=_context_size)
+    llm_context: deque[tuple[str, str]] = deque(maxlen=LLM_CONTEXT_WINDOW)
 
     async for result in asr.stream_transcribe(
         mic_stream(pcm_buffer, debug=debug),
diff --git a/src/listenr/config_manager.py b/src/listenr/config_manager.py
index c3d9edc..0d93610 100644
--- a/src/listenr/config_manager.py
+++ b/src/listenr/config_manager.py
@@ -54,10 +54,18 @@
         'timeout': '30',
         'context_window': '10',  # Number of preceding segments passed as context to the LLM
     },
+    'Dataset': {
+        'output_path': '~/listenr_dataset',   # Where build_dataset writes CSV/HF output
+        'split': '80/10/10',                  # Train/dev/test split percentages
+        'min_duration': '0.3',                # Minimum clip duration in seconds
+        'min_chars': '2',                     # Minimum non-whitespace chars in transcription
+        'seed': '42',                         # Random seed for reproducible splits
+        'format': 'csv',                      # Output format: csv, hf, or both
+    },
     'Output': {
         'file': '~/transcripts_raw.txt',
         'llm_file': '~/transcripts_clean.txt',
-        'format': '[{timestamp}] {text}',
+        'line_format': '[{timestamp}] {text}',
         'timestamp_format': '%%Y-%%m-%%d %%H:%%M:%%S',  # Double %% for configparser escaping
         'show_raw': 'false',
     },
diff --git a/src/listenr/constants.py b/src/listenr/constants.py
new file mode 100644
index 0000000..ed79e54
--- /dev/null
+++ b/src/listenr/constants.py
@@ -0,0 +1,179 @@
+"""
+constants.py — Typed, config-backed constants for the listenr package.
+
+All values are read **once** at import time from ``~/.config/listenr/config.ini``
+(via :mod:`listenr.config_manager`).  
+
+Downstream modules should import individual names::
+
+    from listenr.constants import CAPTURE_RATE, LLM_MODEL, WHISPER_MODEL
+
+If you need to refresh constants at runtime (e.g. tests that patch config),
+call :func:`reload` to re-read all values from the current config state.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import listenr.config_manager as cfg
+
+# ---------------------------------------------------------------------------
+# Lemonade
+# ---------------------------------------------------------------------------
+
+LEMONADE_API_BASE: str = (
+    cfg.get_setting("Lemonade", "api_base", "http://localhost:8000/api/v1")
+    or "http://localhost:8000/api/v1"
+)
+
+# ---------------------------------------------------------------------------
+# Whisper
+# ---------------------------------------------------------------------------
+
+WHISPER_MODEL: str = (
+    cfg.get_setting("Whisper", "model", "Whisper-Tiny") or "Whisper-Tiny"
+)
+
+# ---------------------------------------------------------------------------
+# Audio
+# ---------------------------------------------------------------------------
+
+CAPTURE_RATE: int = cfg.get_int_setting("Audio", "sample_rate", 48000)
+CHANNELS: int = cfg.get_int_setting("Audio", "channels", 1)
+CHUNK_SIZE: int = cfg.get_int_setting("Audio", "blocksize", 4096)
+INPUT_DEVICE: str | None = (
+    cfg.get_setting("Audio", "input_device", "pipewire") or None
+)
+if INPUT_DEVICE == "default":
+    INPUT_DEVICE = None
+
+# Lemonade /realtime always requires 16 kHz PCM-16 — this is not configurable.
+ASR_RATE: int = 16000
+
+# ---------------------------------------------------------------------------
+# Storage
+# ---------------------------------------------------------------------------
+
+STORAGE_BASE: Path = Path(
+    cfg.get_setting("Storage", "audio_clips_path", "~/.listenr/audio_clips")
+    or "~/.listenr/audio_clips"
+).expanduser()
+
+STORAGE_CLIPS_ENABLED: bool = cfg.get_bool_setting(
+    "Storage", "audio_clips_enabled", True
+)
+STORAGE_RETENTION_DAYS: int = cfg.get_int_setting("Storage", "retention_days", 90)
+STORAGE_MAX_GB: float = cfg.get_float_setting("Storage", "max_storage_gb", 10.0)
+
+# ---------------------------------------------------------------------------
+# VAD
+# ---------------------------------------------------------------------------
+
+VAD_THRESHOLD: float = cfg.get_float_setting("VAD", "threshold", 0.05)
+VAD_SILENCE_MS: int = cfg.get_int_setting("VAD", "silence_duration_ms", 800)
+VAD_PREFIX_PADDING_MS: int = cfg.get_int_setting("VAD", "prefix_padding_ms", 250)
+
+# ---------------------------------------------------------------------------
+# LLM
+# ---------------------------------------------------------------------------
+
+LLM_ENABLED: bool = cfg.get_bool_setting("LLM", "enabled", True)
+LLM_MODEL: str = (
+    cfg.get_setting("LLM", "model", "gpt-oss-20b-mxfp4-GGUF")
+    or "gpt-oss-20b-mxfp4-GGUF"
+)
+LLM_API_BASE: str = (
+    cfg.get_setting("LLM", "api_base", "http://localhost:8000/api/v1")
+    or "http://localhost:8000/api/v1"
+)
+LLM_TEMPERATURE: float = cfg.get_float_setting("LLM", "temperature", 0.3)
+LLM_MAX_TOKENS: int = cfg.get_int_setting("LLM", "max_tokens", 1500)
+LLM_TIMEOUT: int = cfg.get_int_setting("LLM", "timeout", 30)
+LLM_CONTEXT_WINDOW: int = cfg.get_int_setting("LLM", "context_window", 10)
+
+# ---------------------------------------------------------------------------
+# Dataset
+# ---------------------------------------------------------------------------
+
+DATASET_OUTPUT: Path = Path(
+    cfg.get_setting("Dataset", "output_path", "~/listenr_dataset")
+    or "~/listenr_dataset"
+).expanduser()
+DATASET_SPLIT: str = cfg.get_setting("Dataset", "split", "80/10/10") or "80/10/10"
+DATASET_MIN_DURATION: float = cfg.get_float_setting("Dataset", "min_duration", 0.3)
+DATASET_MIN_CHARS: int = cfg.get_int_setting("Dataset", "min_chars", 2)
+DATASET_SEED: int = cfg.get_int_setting("Dataset", "seed", 42)
+
+_VALID_DATASET_FORMATS: frozenset[str] = frozenset({"csv", "hf", "both"})
+_raw_dataset_format: str = cfg.get_setting("Dataset", "format", "csv") or "csv"
+if _raw_dataset_format not in _VALID_DATASET_FORMATS:
+    import warnings
+    warnings.warn(
+        f"Config [Dataset] format={_raw_dataset_format!r} is not a recognised value "
+        f"({', '.join(sorted(_VALID_DATASET_FORMATS))}); falling back to 'csv'.",
+        UserWarning,
+        stacklevel=2,
+    )
+    _raw_dataset_format = "csv"
+DATASET_FORMAT: str = _raw_dataset_format
+
+# ---------------------------------------------------------------------------
+# Output / transcript files
+# ---------------------------------------------------------------------------
+
+OUTPUT_FILE: Path | None = (
+    Path(v).expanduser()
+    if (v := cfg.get_setting("Output", "file", ""))
+    else None
+)
+OUTPUT_LLM_FILE: Path | None = (
+    Path(v).expanduser()
+    if (v := cfg.get_setting("Output", "llm_file", ""))
+    else None
+)
+OUTPUT_LINE_FORMAT: str = (
+    cfg.get_setting("Output", "line_format", "[{timestamp}] {text}")
+    or "[{timestamp}] {text}"
+)
+OUTPUT_TIMESTAMP_FORMAT: str = (
+    cfg.get_setting("Output", "timestamp_format", "%Y-%m-%d %H:%M:%S")
+    or "%Y-%m-%d %H:%M:%S"
+)
+OUTPUT_SHOW_RAW: bool = cfg.get_bool_setting("Output", "show_raw", False)
+
+# ---------------------------------------------------------------------------
+# Logging
+# ---------------------------------------------------------------------------
+
+LOG_LEVEL: str = cfg.get_setting("Logging", "level", "INFO") or "INFO"
+LOG_FILE: Path | None = (
+    Path(v).expanduser()
+    if (v := cfg.get_setting("Logging", "file", ""))
+    else None
+)
+
+
+# ---------------------------------------------------------------------------
+# Reload helper (used by tests and advanced callers)
+# ---------------------------------------------------------------------------
+
+def reload() -> None:
+    """Re-read all constants from the current config state (in-place update).
+
+    Useful in tests that patch :mod:`listenr.config_manager` after import::
+
+        cfg.update_setting('LLM', 'model', 'my-test-model')
+        import listenr.constants as C
+        C.reload()
+        assert C.LLM_MODEL == 'my-test-model'
+    """
+    import sys
+    import importlib
+
+    # Re-execute this module in the same module object so all names are updated
+    # in place — existing ``from listenr.constants import X`` bindings in already-
+    # imported modules won't see the change, but direct attribute access on the
+    # module object (``constants.X``) will.
+    module = sys.modules[__name__]
+    importlib.reload(module)
diff --git a/src/listenr/llm_processor.py b/src/listenr/llm_processor.py
index 649f20c..0213a29 100644
--- a/src/listenr/llm_processor.py
+++ b/src/listenr/llm_processor.py
@@ -7,8 +7,14 @@
 import re
 import requests
 import listenr.config_manager as cfg
-
-_DEFAULT_API_BASE = "http://localhost:8000/api/v1"
+from listenr.constants import (
+    LLM_API_BASE as _DEFAULT_API_BASE,
+    LLM_MAX_TOKENS,
+    LLM_MODEL,
+    LLM_TEMPERATURE,
+    LLM_TIMEOUT,
+    WHISPER_MODEL,
+)
 
 # System prompt for transcription post-processing.
 # The model must return ONLY a JSON object — no prose, no markdown fences.
@@ -139,11 +145,11 @@ def lemonade_llm_correct(
     Never raises — on failure returns the original text with is_improved=False.
     """
     if model is None:
-        model = cfg.get_setting('LLM', 'model', 'gpt-oss-20b-mxfp4-GGUF')
+        model = LLM_MODEL
 
-    temperature = cfg.get_float_setting('LLM', 'temperature', 0.1)
-    max_tokens = cfg.get_int_setting('LLM', 'max_tokens', 1500)
-    timeout = cfg.get_int_setting('LLM', 'timeout', 30)
+    temperature = LLM_TEMPERATURE
+    max_tokens = LLM_MAX_TOKENS
+    timeout = LLM_TIMEOUT
 
     # Build message list: system + interleaved context turns + current segment
     messages: list[dict] = [{"role": "system", "content": _CORRECTION_SYSTEM_PROMPT}]
@@ -184,7 +190,7 @@ def lemonade_transcribe_audio(audio_path, model=None):
     Use Lemonade's HTTP transcription endpoint for audio files.
     """
     if model is None:
-        model = cfg.get_setting('Whisper', 'model', 'Whisper-Tiny')
+        model = WHISPER_MODEL
     with open(audio_path, "rb") as f:
         resp = requests.post(
             f"{_api_base()}/audio/transcriptions",
diff --git a/src/listenr/unified_asr.py b/src/listenr/unified_asr.py
index 67d56ba..5577c1f 100644
--- a/src/listenr/unified_asr.py
+++ b/src/listenr/unified_asr.py
@@ -20,8 +20,14 @@
 import websockets
 import asyncio
 
-import listenr.config_manager as cfg
 from listenr.llm_processor import lemonade_llm_correct, lemonade_transcribe_audio
+from listenr.constants import (
+    LLM_API_BASE,
+    VAD_THRESHOLD,
+    VAD_SILENCE_MS,
+    VAD_PREFIX_PADDING_MS,
+    WHISPER_MODEL as _DEFAULT_WHISPER_MODEL,
+)
 
 logger = logging.getLogger('unified_asr')
 
@@ -72,11 +78,10 @@ async def stream_transcribe(self, audio_stream, whisper_model=None, on_result=No
         import base64
 
         if whisper_model is None:
-            whisper_model = cfg.get_setting('Whisper', 'model', 'Whisper-Large-v3-Turbo')
+            whisper_model = _DEFAULT_WHISPER_MODEL
         if lemonade_ws_url is None:
-            api_base = cfg.get_setting('LLM', 'api_base', 'http://localhost:8000/api/v1') or 'http://localhost:8000/api/v1'
             try:
-                resp = requests.get(f"{api_base}/health", timeout=5)
+                resp = requests.get(f"{LLM_API_BASE}/health", timeout=5)
                 ws_port = resp.json().get('websocket_port', 8001)
             except Exception:
                 ws_port = 8001
@@ -87,9 +92,9 @@ async def stream_transcribe(self, audio_stream, whisper_model=None, on_result=No
             "session": {
                 "model": whisper_model,
                 "turn_detection": {
-                    "threshold": cfg.get_float_setting('VAD', 'threshold', 0.01),
-                    "silence_duration_ms": cfg.get_int_setting('VAD', 'silence_duration_ms', 800),
-                    "prefix_padding_ms": cfg.get_int_setting('VAD', 'prefix_padding_ms', 250),
+                    "threshold": VAD_THRESHOLD,
+                    "silence_duration_ms": VAD_SILENCE_MS,
+                    "prefix_padding_ms": VAD_PREFIX_PADDING_MS,
                 },
             },
         }
diff --git a/tests/test_constants.py b/tests/test_constants.py
new file mode 100644
index 0000000..3402e77
--- /dev/null
+++ b/tests/test_constants.py
@@ -0,0 +1,344 @@
+"""
+Unit tests for listenr.constants.
+
+Verifies:
+  - Every public constant exists and has the expected Python type.
+  - Path constants are absolute (already expanded).
+  - Numeric constants are within sane ranges.
+  - reload() correctly picks up config changes made at runtime.
+  - Constants are consumed by the modules that use them (smoke-import checks).
+"""
+
+import importlib
+from pathlib import Path
+
+import pytest
+
+import listenr.constants as C
+
+
+# ---------------------------------------------------------------------------
+# Type checks
+# ---------------------------------------------------------------------------
+
+class TestConstantTypes:
+    def test_lemonade_api_base_is_str(self):
+        assert isinstance(C.LEMONADE_API_BASE, str)
+
+    def test_whisper_model_is_str(self):
+        assert isinstance(C.WHISPER_MODEL, str)
+
+    def test_capture_rate_is_int(self):
+        assert isinstance(C.CAPTURE_RATE, int)
+
+    def test_asr_rate_is_int(self):
+        assert isinstance(C.ASR_RATE, int)
+
+    def test_channels_is_int(self):
+        assert isinstance(C.CHANNELS, int)
+
+    def test_chunk_size_is_int(self):
+        assert isinstance(C.CHUNK_SIZE, int)
+
+    def test_input_device_is_str_or_none(self):
+        assert C.INPUT_DEVICE is None or isinstance(C.INPUT_DEVICE, str)
+
+    def test_storage_base_is_path(self):
+        assert isinstance(C.STORAGE_BASE, Path)
+
+    def test_storage_clips_enabled_is_bool(self):
+        assert isinstance(C.STORAGE_CLIPS_ENABLED, bool)
+
+    def test_storage_retention_days_is_int(self):
+        assert isinstance(C.STORAGE_RETENTION_DAYS, int)
+
+    def test_storage_max_gb_is_float(self):
+        assert isinstance(C.STORAGE_MAX_GB, float)
+
+    def test_vad_threshold_is_float(self):
+        assert isinstance(C.VAD_THRESHOLD, float)
+
+    def test_vad_silence_ms_is_int(self):
+        assert isinstance(C.VAD_SILENCE_MS, int)
+
+    def test_vad_prefix_padding_ms_is_int(self):
+        assert isinstance(C.VAD_PREFIX_PADDING_MS, int)
+
+    def test_llm_enabled_is_bool(self):
+        assert isinstance(C.LLM_ENABLED, bool)
+
+    def test_llm_model_is_str(self):
+        assert isinstance(C.LLM_MODEL, str)
+
+    def test_llm_api_base_is_str(self):
+        assert isinstance(C.LLM_API_BASE, str)
+
+    def test_llm_temperature_is_float(self):
+        assert isinstance(C.LLM_TEMPERATURE, float)
+
+    def test_llm_max_tokens_is_int(self):
+        assert isinstance(C.LLM_MAX_TOKENS, int)
+
+    def test_llm_timeout_is_int(self):
+        assert isinstance(C.LLM_TIMEOUT, int)
+
+    def test_llm_context_window_is_int(self):
+        assert isinstance(C.LLM_CONTEXT_WINDOW, int)
+
+    def test_dataset_output_is_path(self):
+        assert isinstance(C.DATASET_OUTPUT, Path)
+
+    def test_dataset_split_is_str(self):
+        assert isinstance(C.DATASET_SPLIT, str)
+
+    def test_dataset_min_duration_is_float(self):
+        assert isinstance(C.DATASET_MIN_DURATION, float)
+
+    def test_dataset_min_chars_is_int(self):
+        assert isinstance(C.DATASET_MIN_CHARS, int)
+
+    def test_dataset_seed_is_int(self):
+        assert isinstance(C.DATASET_SEED, int)
+
+    def test_dataset_format_is_str(self):
+        assert isinstance(C.DATASET_FORMAT, str)
+
+    def test_output_file_is_path_or_none(self):
+        assert C.OUTPUT_FILE is None or isinstance(C.OUTPUT_FILE, Path)
+
+    def test_output_llm_file_is_path_or_none(self):
+        assert C.OUTPUT_LLM_FILE is None or isinstance(C.OUTPUT_LLM_FILE, Path)
+
+    def test_output_line_format_is_str(self):
+        assert isinstance(C.OUTPUT_LINE_FORMAT, str)
+
+    def test_output_timestamp_format_is_str(self):
+        assert isinstance(C.OUTPUT_TIMESTAMP_FORMAT, str)
+
+    def test_output_show_raw_is_bool(self):
+        assert isinstance(C.OUTPUT_SHOW_RAW, bool)
+
+    def test_log_level_is_str(self):
+        assert isinstance(C.LOG_LEVEL, str)
+
+    def test_log_file_is_path_or_none(self):
+        assert C.LOG_FILE is None or isinstance(C.LOG_FILE, Path)
+
+
+# ---------------------------------------------------------------------------
+# Value sanity checks
+# ---------------------------------------------------------------------------
+
+class TestConstantValues:
+    def test_asr_rate_always_16000(self):
+        """ASR_RATE is fixed — Lemonade /realtime always requires 16 kHz."""
+        assert C.ASR_RATE == 16000
+
+    def test_capture_rate_positive(self):
+        assert C.CAPTURE_RATE > 0
+
+    def test_chunk_size_positive(self):
+        assert C.CHUNK_SIZE > 0
+
+    def test_channels_at_least_one(self):
+        assert C.CHANNELS >= 1
+
+    def test_vad_threshold_between_0_and_1(self):
+        assert 0.0 < C.VAD_THRESHOLD < 1.0
+
+    def test_vad_silence_ms_positive(self):
+        assert C.VAD_SILENCE_MS > 0
+
+    def test_vad_prefix_padding_ms_non_negative(self):
+        assert C.VAD_PREFIX_PADDING_MS >= 0
+
+    def test_llm_temperature_in_range(self):
+        assert 0.0 <= C.LLM_TEMPERATURE <= 2.0
+
+    def test_llm_max_tokens_positive(self):
+        assert C.LLM_MAX_TOKENS > 0
+
+    def test_llm_timeout_positive(self):
+        assert C.LLM_TIMEOUT > 0
+
+    def test_llm_context_window_positive(self):
+        assert C.LLM_CONTEXT_WINDOW > 0
+
+    def test_dataset_split_parses_as_three_ints(self):
+        parts = C.DATASET_SPLIT.split('/')
+        assert len(parts) == 3, f"Expected 3 parts, got: {C.DATASET_SPLIT!r}"
+        assert all(p.strip().isdigit() for p in parts)
+
+    def test_dataset_split_sums_to_100(self):
+        parts = [int(p) for p in C.DATASET_SPLIT.split('/')]
+        assert sum(parts) == 100, f"Split must sum to 100, got {sum(parts)}"
+
+    def test_dataset_min_duration_non_negative(self):
+        assert C.DATASET_MIN_DURATION >= 0.0
+
+    def test_dataset_min_chars_non_negative(self):
+        assert C.DATASET_MIN_CHARS >= 0
+
+    def test_dataset_format_valid(self):
+        assert C.DATASET_FORMAT in {'csv', 'hf', 'both'}
+
+    def test_lemonade_api_base_starts_with_http(self):
+        assert C.LEMONADE_API_BASE.startswith('http')
+
+    def test_llm_api_base_starts_with_http(self):
+        assert C.LLM_API_BASE.startswith('http')
+
+    def test_storage_base_is_absolute(self):
+        assert C.STORAGE_BASE.is_absolute()
+
+    def test_dataset_output_is_absolute(self):
+        assert C.DATASET_OUTPUT.is_absolute()
+
+    def test_whisper_model_non_empty(self):
+        assert C.WHISPER_MODEL.strip() != ''
+
+    def test_llm_model_non_empty(self):
+        assert C.LLM_MODEL.strip() != ''
+
+    def test_log_level_is_valid(self):
+        import logging
+        assert C.LOG_LEVEL.upper() in {'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'}
+
+    def test_output_line_format_contains_text_placeholder(self):
+        assert '{text}' in C.OUTPUT_LINE_FORMAT
+
+    def test_retention_days_positive(self):
+        assert C.STORAGE_RETENTION_DAYS > 0
+
+    def test_storage_max_gb_positive(self):
+        assert C.STORAGE_MAX_GB > 0
+
+
+# ---------------------------------------------------------------------------
+# reload() smoke test — checks the mechanism works without real config change
+# ---------------------------------------------------------------------------
+
+class TestReload:
+    def test_reload_returns_same_types(self):
+        """reload() should succeed and constants should still have correct types."""
+        C.reload()
+        # Re-import to get updated module state
+        importlib.invalidate_caches()
+        import listenr.constants as C2
+        assert isinstance(C2.CAPTURE_RATE, int)
+        assert isinstance(C2.LLM_MODEL, str)
+        assert isinstance(C2.STORAGE_BASE, Path)
+        assert isinstance(C2.DATASET_MIN_DURATION, float)
+
+    def test_reload_preserves_asr_rate(self):
+        """ASR_RATE must remain 16000 regardless of any reload."""
+        C.reload()
+        import listenr.constants as C2
+        assert C2.ASR_RATE == 16000
+
+    def test_reload_with_patched_config(self, monkeypatch):
+        """Constants module attribute reflects patched config after reload()."""
+        import listenr.config_manager as cfg_mod
+        monkeypatch.setattr(
+            cfg_mod, 'get_int_setting',
+            lambda section, key, fallback=0: 9999 if (section, key) == ('LLM', 'max_tokens') else fallback,
+        )
+        C.reload()
+        import listenr.constants as C2
+        assert C2.LLM_MAX_TOKENS == 9999
+
+    def test_reload_restores_after_patch(self, monkeypatch):
+        """After monkeypatch teardown + reload, value returns to real config."""
+        import listenr.config_manager as cfg_mod
+        original_fn = cfg_mod.get_int_setting
+        monkeypatch.setattr(
+            cfg_mod, 'get_int_setting',
+            lambda section, key, fallback=0: 1 if (section, key) == ('LLM', 'context_window') else original_fn(section, key, fallback),
+        )
+        C.reload()
+        import listenr.constants as C2
+        assert C2.LLM_CONTEXT_WINDOW == 1
+
+        # monkeypatch teardown restores original_fn automatically;
+        # call reload once more here to re-read with real config
+        monkeypatch.undo()
+        C.reload()
+        import listenr.constants as C3
+        assert isinstance(C3.LLM_CONTEXT_WINDOW, int)
+
+
+# ---------------------------------------------------------------------------
+# Smoke imports — ensure migrated modules still import cleanly
+# ---------------------------------------------------------------------------
+
+class TestModuleImports:
+    def test_cli_imports_constants(self):
+        import listenr.cli  # noqa: F401 — just verify no ImportError
+
+    def test_llm_processor_imports_constants(self):
+        import listenr.llm_processor  # noqa: F401
+
+    def test_unified_asr_imports_constants(self):
+        import listenr.unified_asr  # noqa: F401
+
+    def test_build_dataset_imports_constants(self):
+        import listenr.build_dataset  # noqa: F401
+
+    def test_build_dataset_defaults_match_constants(self):
+        import listenr.build_dataset as bd
+        assert bd.DEFAULT_OUTPUT == C.DATASET_OUTPUT
+        assert bd.DEFAULT_SPLIT == C.DATASET_SPLIT
+        assert bd.DEFAULT_MIN_DURATION == C.DATASET_MIN_DURATION
+        assert bd.DEFAULT_MIN_CHARS == C.DATASET_MIN_CHARS
+        assert bd.DEFAULT_SEED == C.DATASET_SEED
+        assert bd.DEFAULT_FORMAT == C.DATASET_FORMAT
+
+
+# ---------------------------------------------------------------------------
+# No stale cfg.get_* calls in migrated modules (grep-based AST-free check)
+# ---------------------------------------------------------------------------
+
+class TestNoCfgCallsInMigratedModules:
+    """
+    Ensure migrated modules do not contain inline cfg.get_*_setting() calls
+    that duplicate what constants.py already exposes.  We allow cfg.get_setting
+    ONLY inside _api_base() in llm_processor (URL may be overridden at runtime).
+    """
+
+    def _source(self, module_name: str) -> str:
+        mod = importlib.import_module(module_name)
+        import inspect
+        return inspect.getsource(mod)
+
+    def test_cli_has_no_inline_cfg_get_calls(self):
+        src = self._source('listenr.cli')
+        # cli.py should contain no cfg.get_*_setting() calls at all
+        import re
+        calls = re.findall(r'cfg\.get_\w+_setting\(', src)
+        assert calls == [], f"cli.py still has inline cfg calls: {calls}"
+
+    def test_build_dataset_has_no_inline_cfg_get_calls(self):
+        src = self._source('listenr.build_dataset')
+        import re
+        calls = re.findall(r'cfg\.get_\w+_setting\(', src)
+        assert calls == [], f"build_dataset.py still has inline cfg calls: {calls}"
+
+    def test_unified_asr_has_no_inline_cfg_get_calls(self):
+        src = self._source('listenr.unified_asr')
+        import re
+        calls = re.findall(r'cfg\.get_\w+_setting\(', src)
+        assert calls == [], f"unified_asr.py still has inline cfg calls: {calls}"
+
+    def test_llm_processor_cfg_calls_only_in_api_base(self):
+        src = self._source('listenr.llm_processor')
+        import re
+        # Find all lines with cfg.get_*_setting
+        lines_with_cfg = [
+            (i + 1, line.strip())
+            for i, line in enumerate(src.splitlines())
+            if re.search(r'cfg\.get_\w+_setting\(', line)
+        ]
+        for lineno, line in lines_with_cfg:
+            assert '_api_base' in src.splitlines()[lineno - 2] or 'def _api_base' in line or '_api_base' in line, (
+                f"llm_processor.py has unexpected cfg call outside _api_base at line ~{lineno}: {line!r}"
+            )