From fad6ecc089917fad06e61f135bd0c5c688f691ab Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Tue, 3 Feb 2026 14:11:50 -0600 Subject: [PATCH 01/45] YouTube downloads now honor an access-gated cookie fallback --- CHANGELOG.md | 10 ++ config/config_sample.json | 7 ++ engine/core.py | 20 ++++ engine/job_queue.py | 188 +++++++++++++++++++++++++++++++++++--- 4 files changed, 214 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca72120..c1566cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,21 @@ All notable changes to this project will be documented here. +## [v0.9.3] – YouTube Cookie Fallback (post-release) + +Fixed: +- Added an optional, fallback-only YouTube cookies.txt path that retries only access-gated failures once with cookies before marking video jobs as permanently failed; music mode and existing anonymous behavior continue to run unchanged. + ## [v0.9.1] – Runtime Stability & Direct URL Fixes This release focuses on restoring and hardening runtime stability after refactors since yt-archiver v1.2.0. Primary goals were correctness, predictability, and eliminating regressions in downloads, scheduling, and search flows. +## [v0.9.3] – YouTube Cookie Fallback (post-release) + +Fixed: +- Added an optional, fallback-only YouTube cookies.txt path that retries only access-gated failures once with cookies before marking video jobs as permanently failed; music mode and existing anonymous behavior continue to run unchanged. + Fixed: - Restored reliable Direct URL downloads for video and audio (mp3/m4a/etc). - Corrected yt-dlp invocation for audio formats (uses extract-audio instead of merge-output-format). diff --git a/config/config_sample.json b/config/config_sample.json index e1cdb56..455ebf0 100644 --- a/config/config_sample.json +++ b/config/config_sample.json @@ -46,6 +46,13 @@ ], "yt_dlp_cookies": "", "yt_dlp_opts": {}, + "youtube": { + "cookies": { + "enabled": false, + "path": "tokens/youtube_cookies.txt", + "fallback_only": true + } + }, "filename_template": "%(title)s - %(uploader)s - %(upload_date)s.%(ext)s", "music_filename_template": "%(artist)s/%(album)s/%(track_number)s - %(track)s.%(ext)s", "music_metadata": { diff --git a/engine/core.py b/engine/core.py index 482ba4e..1fa75c1 100644 --- a/engine/core.py +++ b/engine/core.py @@ -281,6 +281,26 @@ def validate_config(config): if cookie_file is not None and not isinstance(cookie_file, str): errors.append("yt_dlp_cookies must be a string") + youtube_cfg = config.get("youtube") + if youtube_cfg is not None: + if not isinstance(youtube_cfg, dict): + errors.append("youtube must be an object") + else: + cookies_cfg = youtube_cfg.get("cookies") + if cookies_cfg is not None: + if not isinstance(cookies_cfg, dict): + errors.append("youtube.cookies must be an object") + else: + enabled = cookies_cfg.get("enabled") + if enabled is not None and not isinstance(enabled, bool): + errors.append("youtube.cookies.enabled must be true/false") + fallback_only = cookies_cfg.get("fallback_only") + if fallback_only is not None and not isinstance(fallback_only, bool): + errors.append("youtube.cookies.fallback_only must be true/false") + path = cookies_cfg.get("path") + if path is not None and not isinstance(path, str): + errors.append("youtube.cookies.path must be a string") + filename_template = config.get("filename_template") if filename_template is not None and not isinstance(filename_template, str): errors.append("filename_template must be a string") diff --git a/engine/job_queue.py b/engine/job_queue.py index b32fda9..7b75d9f 100644 --- a/engine/job_queue.py +++ b/engine/job_queue.py @@ -47,6 +47,10 @@ class CancelledError(Exception): class PostprocessingError(Exception): pass + +class CookieFallbackError(RuntimeError): + """Raised when the optional YouTube cookie fallback fails.""" + _FORMAT_VIDEO = ( "bestvideo[ext=webm][height<=1080]+bestaudio[ext=webm]/" "bestvideo[ext=webm][height<=720]+bestaudio[ext=webm]/" @@ -1154,6 +1158,69 @@ def resolve_cookie_file(config): return resolved +def resolve_youtube_cookie_fallback_file(config): + youtube_cfg = (config or {}).get("youtube") + if not isinstance(youtube_cfg, dict): + return None + cookies_cfg = youtube_cfg.get("cookies") + if not isinstance(cookies_cfg, dict): + return None + if not cookies_cfg.get("enabled"): + return None + if not cookies_cfg.get("fallback_only"): + return None + path = cookies_cfg.get("path") + if not isinstance(path, str) or not path.strip(): + return None + try: + resolved = resolve_dir(path, TOKENS_DIR) + except ValueError as exc: + logging.error("Invalid youtube cookies path: %s", exc) + return None + if not os.path.exists(resolved): + logging.warning("youtube cookies file not found: %s", resolved) + return None + return resolved + + +def _is_youtube_access_gate(message: str | None) -> bool: + if not message: + return False + lower_msg = message.lower() + triggers = [ + "this video is not available", + "sign in to confirm your age", + "login required", + "access denied", + "age restricted", + "age-restricted", + "age restriction", + ] + blockers = [ + "timed out", + "timeout", + "connection reset", + "temporary failure", + "network error", + "couldn't download webpage", + "unable to download webpage", + "http error 403", + "http error 404", + "geo-restricted", + "geoblocked", + "geo blocked", + "country", + "region", + "format not available", + "private", + "removed", + ] + if not any(trigger in lower_msg for trigger in triggers): + return False + if any(blocker in lower_msg for blocker in blockers): + return False + return True + def resolve_media_type(config, *, playlist_entry=None, url=None): media_type = None if isinstance(playlist_entry, dict): @@ -1548,6 +1615,28 @@ def _format_summary(info): } +def _select_youtube_cookie_fallback( + config, + url, + stderr_text, + opts, + media_type, +): + fallback_cookie = resolve_youtube_cookie_fallback_file(config) + if not fallback_cookie: + return None + if opts.get("cookiefile"): + return None + if is_music_media_type(media_type): + return None + source = resolve_source(url) + if source not in {"youtube", "youtube_music"}: + return None + if not _is_youtube_access_gate(stderr_text): + return None + return fallback_cookie + + def download_with_ytdlp( url, temp_dir, @@ -1684,7 +1773,13 @@ def _is_empty_download_error(e: Exception) -> bool: cmd_log = _argv_to_redacted_cli(cmd_argv) try: - subprocess.run(cmd_argv, check=True, stdout=DEVNULL, stderr=DEVNULL) + subprocess.run( + cmd_argv, + check=True, + stdout=DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) # Log AFTER the command has been executed, per requirement. _log_event( logging.INFO, @@ -1694,7 +1789,71 @@ def _is_empty_download_error(e: Exception) -> bool: cli=cmd_log, ) except CalledProcessError as exc: - # If a cookiefile is present and yt-dlp produced no completed file in temp_dir, retry once WITHOUT cookies. + stderr_output = (exc.stderr or "").strip() + fallback_cookie = _select_youtube_cookie_fallback( + config=config, + url=url, + stderr_text=stderr_output, + opts=opts_for_run, + media_type=media_type, + ) + if fallback_cookie: + _log_event( + logging.INFO, + "YTDLP_YOUTUBE_COOKIE_FALLBACK_ATTEMPT", + job_id=job_id, + url=url, + origin=origin, + media_type=media_type, + media_intent=media_intent, + error=stderr_output, + ) + retry_opts = dict(opts_for_run) + retry_opts["cookiefile"] = fallback_cookie + cmd_retry_argv = _render_ytdlp_cli_argv(retry_opts, url) + cmd_retry_log = _argv_to_redacted_cli(cmd_retry_argv) + try: + subprocess.run( + cmd_retry_argv, + check=True, + stdout=DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + _log_event( + logging.INFO, + "YTDLP_YOUTUBE_COOKIE_FALLBACK_SUCCEEDED", + job_id=job_id, + url=url, + origin=origin, + media_type=media_type, + media_intent=media_intent, + ) + _log_event( + logging.INFO, + "YTDLP_CLI_EQUIVALENT", + job_id=job_id, + url=url, + cli=cmd_retry_log, + ) + if (stop_event and stop_event.is_set()) or ( + callable(cancel_check) and cancel_check() + ): + raise CancelledError(cancel_reason or "Cancelled by user") + return info, _select_download_output(temp_dir, info, audio_mode) + except CalledProcessError as fallback_exc: + fallback_message = (fallback_exc.stderr or "").strip() + _log_event( + logging.ERROR, + "YTDLP_YOUTUBE_COOKIE_FALLBACK_FAILED", + job_id=job_id, + url=url, + origin=origin, + media_type=media_type, + media_intent=media_intent, + error=fallback_message, + ) + raise CookieFallbackError(f"yt_dlp_cookie_fallback_failed: {fallback_exc}") if opts.get("cookiefile"): found = False for entry in os.listdir(temp_dir): @@ -1789,7 +1948,10 @@ def _is_empty_download_error(e: Exception) -> bool: if (stop_event and stop_event.is_set()) or (callable(cancel_check) and cancel_check()): raise CancelledError(cancel_reason or "Cancelled by user") + return info, _select_download_output(temp_dir, info, audio_mode) + +def _select_download_output(temp_dir, info, audio_mode): local_path = None if isinstance(info, dict): local_path = info.get("_filename") @@ -1799,15 +1961,12 @@ def _is_empty_download_error(e: Exception) -> bool: if local_path: break - # If yt-dlp reported a concrete output file, use it if local_path and os.path.exists(local_path) and os.path.getsize(local_path) > 0: - return info, local_path + return local_path - # Otherwise, scan temp_dir for completed artifacts candidates = [] audio_candidates = [] for entry in os.listdir(temp_dir): - # Ignore yt-dlp temporary/partial artifacts if entry.endswith((".part", ".ytdl", ".temp")): continue candidate = os.path.join(temp_dir, entry) @@ -1820,22 +1979,27 @@ def _is_empty_download_error(e: Exception) -> bool: if size <= 0: continue candidates.append((size, candidate)) - if os.path.splitext(candidate)[1].lower() in {".m4a", ".webm", ".opus", ".aac", ".mp3", ".flac"}: + if os.path.splitext(candidate)[1].lower() in { + ".m4a", + ".webm", + ".opus", + ".aac", + ".mp3", + ".flac", + }: audio_candidates.append((size, candidate)) - # In audio_mode, we MUST have an audio-capable artifact if audio_mode: if not audio_candidates: raise PostprocessingError( "No audio stream resolved (video-only format selected)" ) audio_candidates.sort(reverse=True) - return info, audio_candidates[0][1] + return audio_candidates[0][1] - # Video mode fallback: pick the largest completed artifact if candidates: candidates.sort(reverse=True) - return info, candidates[0][1] + return candidates[0][1] raise RuntimeError("yt_dlp_no_output") @@ -2313,6 +2477,8 @@ def is_retryable_error(error): return False if isinstance(error, PostprocessingError): return False + if isinstance(error, CookieFallbackError): + return False if isinstance(error, (DownloadError, ExtractorError)): message = str(error).lower() else: From e0f4e87dbad56b1b8a2ff4a08cc44643955b3d8d Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 9 Feb 2026 09:52:02 -0600 Subject: [PATCH 02/45] v0.9.3: send Telegram run summary on scheduled and watcher completion --- api/main.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/api/main.py b/api/main.py index 610b205..cb06ec6 100644 --- a/api/main.py +++ b/api/main.py @@ -156,6 +156,40 @@ def _sanitize_non_http_urls(obj): return [_sanitize_non_http_urls(v) for v in obj] return obj +def notify_run_summary(config, *, run_type: str, status, started_at, finished_at): + if run_type not in {"scheduled", "watcher"}: + return + + successes = int(getattr(status, "run_successes", 0) or 0) + failures = int(getattr(status, "run_failures", 0) or 0) + attempted = successes + failures + + if attempted <= 0: + return + + duration_label = "unknown" + if started_at and finished_at: + start_dt = _parse_iso(started_at) + finish_dt = _parse_iso(finished_at) + if start_dt is not None and finish_dt is not None: + duration_sec = int((finish_dt - start_dt).total_seconds()) + m, s = divmod(max(0, duration_sec), 60) + duration_label = f"{m}m {s}s" if m else f"{s}s" + + msg = ( + "Retreivr Run Summary\n" + f"Run type: {run_type}\n" + f"Attempted: {attempted}\n" + f"Succeeded: {successes}\n" + f"Failed: {failures}\n" + f"Duration: {duration_label}" + ) + + try: + telegram_notify(config, msg) + except Exception: + logging.exception("Telegram notify failed (run_type=%s)", run_type) + def normalize_search_payload(payload: dict | None, *, default_sources: list[str]) -> dict: if payload is None: @@ -1862,6 +1896,14 @@ async def _runner(): logging.info("State reset to idle") elif app.state.state in {"running", "completed"}: app.state.state = "idle" + + notify_run_summary( + config, + run_type=run_source, + status=status, + started_at=app.state.started_at, + finished_at=app.state.finished_at, + ) app.state.run_task = asyncio.create_task(_runner()) From d035a69d65bcc803d23717aab586a9ded4ba353b Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 9 Feb 2026 11:03:41 -0600 Subject: [PATCH 03/45] corrected changelog for 0.9.x notes --- CHANGELOG.md | 70 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1566cd..321caad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,16 +7,6 @@ All notable changes to this project will be documented here. Fixed: - Added an optional, fallback-only YouTube cookies.txt path that retries only access-gated failures once with cookies before marking video jobs as permanently failed; music mode and existing anonymous behavior continue to run unchanged. -## [v0.9.1] – Runtime Stability & Direct URL Fixes - -This release focuses on restoring and hardening runtime stability after refactors since yt-archiver v1.2.0. -Primary goals were correctness, predictability, and eliminating regressions in downloads, scheduling, and search flows. - -## [v0.9.3] – YouTube Cookie Fallback (post-release) - -Fixed: -- Added an optional, fallback-only YouTube cookies.txt path that retries only access-gated failures once with cookies before marking video jobs as permanently failed; music mode and existing anonymous behavior continue to run unchanged. - Fixed: - Restored reliable Direct URL downloads for video and audio (mp3/m4a/etc). - Corrected yt-dlp invocation for audio formats (uses extract-audio instead of merge-output-format). @@ -42,6 +32,66 @@ Notes: - Kill-download button is not guaranteed during active runs and remains experimental. - Watcher functionality is present but considered beta and may change in later releases. +## [v0.9.2] – Search Engine Dialed In // Home Page UI Update + +Highlights + +This release hardens the download pipeline (especially audio-only MP3), improves observability, and simplifies the Home UI ahead of broader feature work. Video downloads remain stable and unchanged. + +⸻ + +🚀 Improvements • Reliable MP3 audio-only downloads • Audio mode now uses a robust bestaudio[acodec!=none]/bestaudio/best selector. • Prevents unnecessary video downloads when targeting MP3. • Matches known-working yt-dlp CLI behavior. • Works consistently for direct URLs and queued jobs. • Safer yt-dlp option handling • Avoids forced merge/remux unless explicitly required. • Reduces ffmpeg post-processing failures. • Audio and video paths are now clearly separated and predictable. • yt-dlp CLI observability • Job workers now log the exact yt-dlp CLI command executed (with secrets redacted). • Makes debugging format, cookie, and extractor issues significantly easier. + +⸻ + +🧠 Behavior Fixes • Post-processing failures are now terminal • ffmpeg / post-processing errors correctly mark jobs as FAILED. • Prevents silent re-queue loops and misleading “Queued” states in the UI. • Video pipeline preserved • Default video behavior (bestvideo+bestaudio/best) remains unchanged. • MP4 / MKV / WebM downloads continue to work as before. + +⸻ + +🎧 Music & Metadata • Music metadata enrichment remains optional • Failed or low-confidence enrichment no longer blocks successful downloads. • Clear logging when metadata is skipped due to confidence thresholds. + +⸻ + +🖥 UI / UX • Home page cleanup • Reorganized source filters and advanced options into a single compact row. • Reduced visual noise without removing functionality. • Improved spacing and alignment for music mode, format, and destination controls. • Advanced Search remains available • Advanced functionality is still accessible via the dedicated Advanced Search page. + +⸻ + +🧹 Internal / Maintenance • Improved internal option auditing logs. • Better separation between search, enqueue, and execution logic. • No schema or config migrations required. + +⸻ + +⚠️ Known Notes • Client-side (“download to this device”) delivery is still being refined and may be disabled or hidden in some UI paths. + +## [v0.9.1] – Runtime Stability & Direct URL Fixes + +This release focuses on restoring and hardening runtime stability after refactors since yt-archiver v1.2.0. Primary goals were correctness, predictability, and eliminating regressions in downloads, scheduling, and search flows. + +Fixed: + +Restored reliable Direct URL downloads for video and audio (mp3/m4a/etc). +Corrected yt-dlp invocation for audio formats (uses extract-audio instead of merge-output-format). +Fixed Direct URL runs appearing permanently queued in the Home UI. +Prevented empty or zero-byte output files from being recorded as completed. +Fixed scheduler playlist downloads producing incorrect formats or audio-only output. +Ensured scheduler and direct downloads can run concurrently without interference. +Fixed missing database schema initialization for search-related tables. +Normalized all filesystem paths via paths.py and environment variables (Docker-safe). +Fixed Advanced Search “Failed to load requests” error caused by search DB store calling service-only logic. +Fixed Home screen results remaining stuck in “Queued” by restoring reliable search request status hydration. +Unified search job database usage to a single canonical path to prevent schema and state mismatches. +Changed: + +Direct URL playlist links are now explicitly rejected with a clear user-facing error message. +Direct URL runs bypass the job queue but still report progress and completion via run status. +Search-only results can now be downloaded individually via the Home results UI. +Default video downloads respect configured format preferences (e.g., webm/mp4). +Metadata enrichment failures no longer block or corrupt completed downloads. +Notes: + +Playlist URLs must be added via Scheduler / Playlist configuration, not Direct URL mode. +Kill-download button is not guaranteed during active runs and remains experimental. +Watcher functionality is present but considered beta and may change in later releases. + ## [v0.9.0] – Retreivr Rebrand Release // Music Mode and Metadata - Project renamed to Retreivr - Repository migrated to new namespace From 9e523a8700b03e7a1ce3e074ddc3cd4819d4602c Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 10:21:05 -0600 Subject: [PATCH 04/45] feat(spotify): add playlist snapshot storage, diffing, and watch job scaffolding add SQLite migration helpers for playlist_snapshots and playlist_snapshot_items with unique constraints and indexes add snapshot persistence layer with fast-path skip when snapshot_id is unchanged add Spotify playlist client method get_playlist_items(playlist_id) returning (snapshot_id, normalized ordered items) add duplicate-aware diff_playlist(prev, curr) utility with added/removed/moved output add scheduler job spotify_playlist_watch.py to compute snapshot+diff and enqueue only new tracks add unit tests for diff behavior and snapshot persistence fast-path/ordering --- db/__init__.py | 5 + db/migrations.py | 59 ++++++++ db/playlist_snapshots.py | 178 +++++++++++++++++++++++ scheduler/__init__.py | 2 + scheduler/jobs/__init__.py | 2 + scheduler/jobs/spotify_playlist_watch.py | 67 +++++++++ spotify/__init__.py | 6 + spotify/client.py | 122 ++++++++++++++++ spotify/diff.py | 47 ++++++ tests/test_playlist_diff.py | 35 +++++ tests/test_playlist_snapshot_store.py | 101 +++++++++++++ 11 files changed, 624 insertions(+) create mode 100644 db/__init__.py create mode 100644 db/migrations.py create mode 100644 db/playlist_snapshots.py create mode 100644 scheduler/__init__.py create mode 100644 scheduler/jobs/__init__.py create mode 100644 scheduler/jobs/spotify_playlist_watch.py create mode 100644 spotify/__init__.py create mode 100644 spotify/client.py create mode 100644 spotify/diff.py create mode 100644 tests/test_playlist_diff.py create mode 100644 tests/test_playlist_snapshot_store.py diff --git a/db/__init__.py b/db/__init__.py new file mode 100644 index 0000000..2a2223a --- /dev/null +++ b/db/__init__.py @@ -0,0 +1,5 @@ +"""Database helpers for Retreivr.""" + +from db.playlist_snapshots import PlaylistSnapshotStore, SnapshotWriteResult + +__all__ = ["PlaylistSnapshotStore", "SnapshotWriteResult"] diff --git a/db/migrations.py b/db/migrations.py new file mode 100644 index 0000000..81244f7 --- /dev/null +++ b/db/migrations.py @@ -0,0 +1,59 @@ +"""SQLite migrations for playlist snapshot storage.""" + +from __future__ import annotations + +import sqlite3 + + +def ensure_playlist_snapshot_tables(conn: sqlite3.Connection) -> None: + """Ensure playlist snapshot tables and indexes exist.""" + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE IF NOT EXISTS playlist_snapshots ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source TEXT NOT NULL, + playlist_id TEXT NOT NULL, + snapshot_id TEXT NOT NULL, + fetched_at TEXT NOT NULL, + track_count INTEGER NOT NULL, + raw_json TEXT, + UNIQUE (source, playlist_id, snapshot_id) + ) + """ + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS idx_playlist_snapshots_lookup " + "ON playlist_snapshots (source, playlist_id, id DESC)" + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS idx_playlist_snapshots_fetched_at " + "ON playlist_snapshots (fetched_at)" + ) + cur.execute( + """ + CREATE TABLE IF NOT EXISTS playlist_snapshot_items ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + snapshot_db_id INTEGER NOT NULL, + position INTEGER NOT NULL, + track_uri TEXT, + track_id TEXT, + added_at TEXT, + added_by TEXT, + is_local INTEGER NOT NULL DEFAULT 0, + name TEXT, + FOREIGN KEY (snapshot_db_id) REFERENCES playlist_snapshots(id) ON DELETE CASCADE, + UNIQUE (snapshot_db_id, position) + ) + """ + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS idx_playlist_snapshot_items_snapshot_position " + "ON playlist_snapshot_items (snapshot_db_id, position)" + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS idx_playlist_snapshot_items_track_uri " + "ON playlist_snapshot_items (track_uri)" + ) + conn.commit() + diff --git a/db/playlist_snapshots.py b/db/playlist_snapshots.py new file mode 100644 index 0000000..0a0c640 --- /dev/null +++ b/db/playlist_snapshots.py @@ -0,0 +1,178 @@ +"""Persistence for playlist snapshots and normalized snapshot items.""" + +from __future__ import annotations + +import sqlite3 +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any + +from db.migrations import ensure_playlist_snapshot_tables + + +@dataclass(frozen=True) +class SnapshotWriteResult: + """Result payload for snapshot writes.""" + + inserted: bool + snapshot_db_id: int | None + reason: str | None = None + + +class PlaylistSnapshotStore: + """SQLite-backed playlist snapshot store.""" + + def __init__(self, db_path: str) -> None: + self.db_path = db_path + + def _connect(self) -> sqlite3.Connection: + conn = sqlite3.connect(self.db_path, check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + return conn + + def ensure_schema(self) -> None: + """Create required snapshot tables if missing.""" + conn = self._connect() + try: + ensure_playlist_snapshot_tables(conn) + finally: + conn.close() + + def get_latest_snapshot(self, source: str, playlist_id: str) -> dict[str, Any] | None: + """Return latest snapshot and items for a playlist.""" + conn = self._connect() + try: + ensure_playlist_snapshot_tables(conn) + cur = conn.cursor() + cur.execute( + """ + SELECT id, source, playlist_id, snapshot_id, fetched_at, track_count, raw_json + FROM playlist_snapshots + WHERE source=? AND playlist_id=? + ORDER BY id DESC + LIMIT 1 + """, + (source, playlist_id), + ) + row = cur.fetchone() + if not row: + return None + snapshot = dict(row) + snapshot["items"] = self._get_snapshot_items(cur, int(row["id"])) + return snapshot + finally: + conn.close() + + def get_latest_track_uris(self, source: str, playlist_id: str) -> list[str]: + """Return ordered track URIs for the latest snapshot.""" + snapshot = self.get_latest_snapshot(source, playlist_id) + if not snapshot: + return [] + return [item["track_uri"] for item in snapshot["items"] if item.get("track_uri")] + + def insert_snapshot( + self, + *, + source: str, + playlist_id: str, + snapshot_id: str, + items: list[dict[str, Any]], + fetched_at: str | None = None, + raw_json: str | None = None, + ) -> SnapshotWriteResult: + """Insert a new snapshot and its normalized item rows.""" + now = fetched_at or datetime.now(timezone.utc).replace(microsecond=0).isoformat() + conn = self._connect() + try: + ensure_playlist_snapshot_tables(conn) + cur = conn.cursor() + cur.execute("BEGIN IMMEDIATE") + cur.execute( + """ + SELECT id, snapshot_id + FROM playlist_snapshots + WHERE source=? AND playlist_id=? + ORDER BY id DESC + LIMIT 1 + """, + (source, playlist_id), + ) + previous = cur.fetchone() + if previous and previous["snapshot_id"] == snapshot_id: + conn.commit() + return SnapshotWriteResult( + inserted=False, + snapshot_db_id=int(previous["id"]), + reason="snapshot_unchanged", + ) + + try: + cur.execute( + """ + INSERT INTO playlist_snapshots ( + source, playlist_id, snapshot_id, fetched_at, track_count, raw_json + ) VALUES (?, ?, ?, ?, ?, ?) + """, + (source, playlist_id, snapshot_id, now, len(items), raw_json), + ) + except sqlite3.IntegrityError: + cur.execute( + """ + SELECT id + FROM playlist_snapshots + WHERE source=? AND playlist_id=? AND snapshot_id=? + ORDER BY id DESC + LIMIT 1 + """, + (source, playlist_id, snapshot_id), + ) + existing = cur.fetchone() + conn.commit() + return SnapshotWriteResult( + inserted=False, + snapshot_db_id=int(existing["id"]) if existing else None, + reason="snapshot_exists", + ) + + snapshot_db_id = int(cur.lastrowid) + cur.executemany( + """ + INSERT INTO playlist_snapshot_items ( + snapshot_db_id, position, track_uri, track_id, added_at, added_by, is_local, name + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, + [ + ( + snapshot_db_id, + int(item.get("position", index)), + item.get("uri"), + item.get("track_id"), + item.get("added_at"), + item.get("added_by"), + 1 if bool(item.get("is_local")) else 0, + item.get("name"), + ) + for index, item in enumerate(items) + ], + ) + conn.commit() + return SnapshotWriteResult(inserted=True, snapshot_db_id=snapshot_db_id) + finally: + conn.close() + + def _get_snapshot_items(self, cur: sqlite3.Cursor, snapshot_db_id: int) -> list[dict[str, Any]]: + cur.execute( + """ + SELECT + id, snapshot_db_id, position, track_uri, track_id, added_at, added_by, is_local, name + FROM playlist_snapshot_items + WHERE snapshot_db_id=? + ORDER BY position ASC + """, + (snapshot_db_id,), + ) + items = [dict(row) for row in cur.fetchall()] + for item in items: + item["is_local"] = bool(item.get("is_local")) + return items diff --git a/scheduler/__init__.py b/scheduler/__init__.py new file mode 100644 index 0000000..693093e --- /dev/null +++ b/scheduler/__init__.py @@ -0,0 +1,2 @@ +"""Scheduler integration package.""" + diff --git a/scheduler/jobs/__init__.py b/scheduler/jobs/__init__.py new file mode 100644 index 0000000..959bffd --- /dev/null +++ b/scheduler/jobs/__init__.py @@ -0,0 +1,2 @@ +"""Scheduler job implementations.""" + diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py new file mode 100644 index 0000000..e309136 --- /dev/null +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -0,0 +1,67 @@ +"""Scheduler job for Spotify playlist change detection via snapshots.""" + +from __future__ import annotations + +from collections import Counter +from datetime import datetime, timezone +from typing import Any, Callable + +from db.playlist_snapshots import PlaylistSnapshotStore +from spotify.client import SpotifyPlaylistClient +from spotify.diff import diff_playlist + + +def run_spotify_playlist_watch_job( + *, + playlist_id: str, + spotify_client: SpotifyPlaylistClient, + snapshot_store: PlaylistSnapshotStore, + enqueue_track: Callable[[dict[str, Any]], None], + source: str = "spotify", +) -> dict[str, Any]: + """Run one playlist watch cycle and enqueue newly added tracks only.""" + previous_uris = snapshot_store.get_latest_track_uris(source, playlist_id) + + snapshot_id, items = spotify_client.get_playlist_items(playlist_id) + fetched_at = datetime.now(timezone.utc).replace(microsecond=0).isoformat() + write_result = snapshot_store.insert_snapshot( + source=source, + playlist_id=playlist_id, + snapshot_id=snapshot_id, + items=items, + fetched_at=fetched_at, + ) + if not write_result.inserted: + return { + "status": "unchanged", + "playlist_id": playlist_id, + "snapshot_id": snapshot_id, + "snapshot_db_id": write_result.snapshot_db_id, + "enqueued": 0, + "diff": {"added": [], "removed": [], "moved": []}, + } + + current_uris = [item.get("uri") for item in items if item.get("uri")] + changes = diff_playlist(previous_uris, current_uris) + + prev_counts = Counter(previous_uris) + observed_counts = Counter() + enqueued = 0 + for item in items: + uri = item.get("uri") + if not uri: + continue + observed_counts[uri] += 1 + if observed_counts[uri] <= prev_counts.get(uri, 0): + continue + enqueue_track(item) + enqueued += 1 + + return { + "status": "updated", + "playlist_id": playlist_id, + "snapshot_id": snapshot_id, + "snapshot_db_id": write_result.snapshot_db_id, + "enqueued": enqueued, + "diff": changes, + } diff --git a/spotify/__init__.py b/spotify/__init__.py new file mode 100644 index 0000000..aef6b28 --- /dev/null +++ b/spotify/__init__.py @@ -0,0 +1,6 @@ +"""Spotify integration modules.""" + +from spotify.client import SpotifyPlaylistClient +from spotify.diff import diff_playlist + +__all__ = ["SpotifyPlaylistClient", "diff_playlist"] diff --git a/spotify/client.py b/spotify/client.py new file mode 100644 index 0000000..474d273 --- /dev/null +++ b/spotify/client.py @@ -0,0 +1,122 @@ +"""Spotify API client for playlist snapshot reads.""" + +from __future__ import annotations + +import base64 +import os +import time +import urllib.parse +from typing import Any + +import requests + + +class SpotifyPlaylistClient: + """Client for reading playlist snapshots and playlist items from Spotify.""" + + _TOKEN_URL = "https://accounts.spotify.com/api/token" + _PLAYLIST_URL = "https://api.spotify.com/v1/playlists/{playlist_id}" + _PLAYLIST_ITEMS_URL = "https://api.spotify.com/v1/playlists/{playlist_id}/tracks" + + def __init__( + self, + *, + client_id: str | None = None, + client_secret: str | None = None, + timeout_sec: int = 20, + ) -> None: + self.client_id = client_id or os.environ.get("SPOTIFY_CLIENT_ID") + self.client_secret = client_secret or os.environ.get("SPOTIFY_CLIENT_SECRET") + self.timeout_sec = timeout_sec + self._access_token: str | None = None + self._access_token_expire_at: float = 0.0 + + def _get_access_token(self) -> str: + if not self.client_id or not self.client_secret: + raise RuntimeError("Spotify credentials are required") + + now = time.time() + if self._access_token and now < self._access_token_expire_at: + return self._access_token + + auth_payload = f"{self.client_id}:{self.client_secret}".encode("utf-8") + auth_header = base64.b64encode(auth_payload).decode("ascii") + response = requests.post( + self._TOKEN_URL, + data={"grant_type": "client_credentials"}, + headers={"Authorization": f"Basic {auth_header}"}, + timeout=self.timeout_sec, + ) + if response.status_code != 200: + raise RuntimeError(f"Spotify token request failed ({response.status_code})") + + payload = response.json() + token = payload.get("access_token") + if not token: + raise RuntimeError("Spotify token response missing access_token") + + expires_in = int(payload.get("expires_in") or 0) + self._access_token = token + self._access_token_expire_at = now + max(0, expires_in - 30) + return token + + def _request_json(self, url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + token = self._get_access_token() + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, params=params, headers=headers, timeout=self.timeout_sec) + if response.status_code == 401: + self._access_token = None + token = self._get_access_token() + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, params=params, headers=headers, timeout=self.timeout_sec) + if response.status_code != 200: + raise RuntimeError(f"Spotify request failed ({response.status_code})") + return response.json() + + def get_playlist_items(self, playlist_id: str) -> tuple[str, list[dict[str, Any]]]: + """Fetch a playlist snapshot id and ordered normalized item records.""" + playlist_id = (playlist_id or "").strip() + if not playlist_id: + raise ValueError("playlist_id is required") + + encoded_id = urllib.parse.quote(playlist_id, safe="") + metadata = self._request_json( + self._PLAYLIST_URL.format(playlist_id=encoded_id), + params={"fields": "snapshot_id"}, + ) + snapshot_id = metadata.get("snapshot_id") + if not snapshot_id: + raise RuntimeError("Spotify playlist response missing snapshot_id") + + items: list[dict[str, Any]] = [] + offset = 0 + limit = 100 + while True: + payload = self._request_json( + self._PLAYLIST_ITEMS_URL.format(playlist_id=encoded_id), + params={ + "offset": offset, + "limit": limit, + "fields": "items(added_at,added_by(id),is_local,track(id,uri,name)),total,next", + }, + ) + raw_items = payload.get("items") or [] + for raw in raw_items: + track = raw.get("track") or {} + items.append( + { + "uri": track.get("uri"), + "track_id": track.get("id"), + "added_at": raw.get("added_at"), + "added_by": (raw.get("added_by") or {}).get("id"), + "is_local": bool(raw.get("is_local")), + "name": track.get("name"), + } + ) + + if not payload.get("next"): + break + offset += len(raw_items) + + return str(snapshot_id), items + diff --git a/spotify/diff.py b/spotify/diff.py new file mode 100644 index 0000000..f3df00c --- /dev/null +++ b/spotify/diff.py @@ -0,0 +1,47 @@ +"""Deterministic playlist diff utilities.""" + +from __future__ import annotations + +from collections import Counter, defaultdict, deque + + +def diff_playlist(prev: list[str], curr: list[str]) -> dict[str, list]: + """Return duplicate-aware added/removed/moved changes from prev to curr order.""" + prev_list = [value for value in prev if value] + curr_list = [value for value in curr if value] + + prev_counts = Counter(prev_list) + curr_counts = Counter(curr_list) + + added: list[str] = [] + running_curr = Counter() + for uri in curr_list: + running_curr[uri] += 1 + if running_curr[uri] > prev_counts.get(uri, 0): + added.append(uri) + + removed: list[str] = [] + running_prev = Counter() + for uri in prev_list: + running_prev[uri] += 1 + if running_prev[uri] > curr_counts.get(uri, 0): + removed.append(uri) + + prev_positions: dict[str, deque[int]] = defaultdict(deque) + curr_positions: dict[str, deque[int]] = defaultdict(deque) + for idx, uri in enumerate(prev_list): + prev_positions[uri].append(idx) + for idx, uri in enumerate(curr_list): + curr_positions[uri].append(idx) + + moved: list[dict[str, int | str]] = [] + for uri in sorted(set(prev_positions).intersection(curr_positions)): + retained = min(len(prev_positions[uri]), len(curr_positions[uri])) + for _ in range(retained): + old_pos = prev_positions[uri].popleft() + new_pos = curr_positions[uri].popleft() + if old_pos != new_pos: + moved.append({"uri": uri, "from": old_pos, "to": new_pos}) + + return {"added": added, "removed": removed, "moved": moved} + diff --git a/tests/test_playlist_diff.py b/tests/test_playlist_diff.py new file mode 100644 index 0000000..c18048f --- /dev/null +++ b/tests/test_playlist_diff.py @@ -0,0 +1,35 @@ +from spotify.diff import diff_playlist + + +def test_diff_playlist_added_removed_and_moved() -> None: + prev = ["a", "b", "c"] + curr = ["b", "a", "d"] + + changes = diff_playlist(prev, curr) + + assert changes["added"] == ["d"] + assert changes["removed"] == ["c"] + assert changes["moved"] == [ + {"uri": "a", "from": 0, "to": 1}, + {"uri": "b", "from": 1, "to": 0}, + ] + + +def test_diff_playlist_honors_duplicates() -> None: + prev = ["x", "y", "x"] + curr = ["x", "x", "y", "x"] + + changes = diff_playlist(prev, curr) + + assert changes["added"] == ["x"] + assert changes["removed"] == [] + assert changes["moved"] == [ + {"uri": "x", "from": 2, "to": 1}, + {"uri": "y", "from": 1, "to": 2}, + ] + + +def test_diff_playlist_handles_empty_lists() -> None: + changes = diff_playlist([], []) + + assert changes == {"added": [], "removed": [], "moved": []} diff --git a/tests/test_playlist_snapshot_store.py b/tests/test_playlist_snapshot_store.py new file mode 100644 index 0000000..ee46b0a --- /dev/null +++ b/tests/test_playlist_snapshot_store.py @@ -0,0 +1,101 @@ +import sqlite3 + +from db.playlist_snapshots import PlaylistSnapshotStore + + +def _sample_items() -> list[dict[str, object]]: + return [ + { + "uri": "spotify:track:1", + "track_id": "1", + "added_at": "2026-02-09T00:00:00+00:00", + "added_by": "user_a", + "is_local": False, + "name": "Track One", + }, + { + "uri": "spotify:track:2", + "track_id": "2", + "added_at": "2026-02-09T00:01:00+00:00", + "added_by": "user_b", + "is_local": False, + "name": "Track Two", + }, + ] + + +def test_snapshot_store_inserts_snapshot_and_items(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + + result = store.insert_snapshot( + source="spotify", + playlist_id="playlist-1", + snapshot_id="snap-1", + items=_sample_items(), + ) + + assert result.inserted is True + latest = store.get_latest_snapshot("spotify", "playlist-1") + assert latest is not None + assert latest["snapshot_id"] == "snap-1" + assert latest["track_count"] == 2 + assert [item["track_uri"] for item in latest["items"]] == [ + "spotify:track:1", + "spotify:track:2", + ] + + +def test_snapshot_store_fast_path_for_same_snapshot_id(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + store.insert_snapshot( + source="spotify", + playlist_id="playlist-1", + snapshot_id="snap-1", + items=_sample_items(), + ) + + second = store.insert_snapshot( + source="spotify", + playlist_id="playlist-1", + snapshot_id="snap-1", + items=_sample_items(), + ) + + assert second.inserted is False + assert second.reason == "snapshot_unchanged" + + with sqlite3.connect(db_path) as conn: + count = conn.execute("SELECT COUNT(*) FROM playlist_snapshots").fetchone()[0] + assert count == 1 + + +def test_snapshot_store_tracks_latest_snapshot_uris(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + store.insert_snapshot( + source="spotify", + playlist_id="playlist-2", + snapshot_id="snap-1", + items=_sample_items(), + ) + updated_items = _sample_items() + [ + { + "uri": "spotify:track:3", + "track_id": "3", + "added_at": "2026-02-09T00:02:00+00:00", + "added_by": "user_c", + "is_local": False, + "name": "Track Three", + } + ] + store.insert_snapshot( + source="spotify", + playlist_id="playlist-2", + snapshot_id="snap-2", + items=updated_items, + ) + + latest_uris = store.get_latest_track_uris("spotify", "playlist-2") + assert latest_uris == ["spotify:track:1", "spotify:track:2", "spotify:track:3"] From cad18405d528d885881a309d79576315b6bdcc1c Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 11:28:46 -0600 Subject: [PATCH 05/45] =?UTF-8?q?feat(dev):=20add=20structured=20Codex=20p?= =?UTF-8?q?rompt=20blocks=20for=20Spotify=20world-class=20music=20pipeline?= =?UTF-8?q?=20=09=E2=80=A2=09Introduced=20modular,=20focused=20Codex=20pro?= =?UTF-8?q?mpt=20set=20for=20Spotify=20ingestion=20hardening=20=09?= =?UTF-8?q?=E2=80=A2=09Added=20prompt=20blocks=20for:=20=09=E2=80=A2=09Spo?= =?UTF-8?q?tify=20playlist=20fetch=20+=20normalization=20=09=E2=80=A2=09Sn?= =?UTF-8?q?apshot=20diff=20logic=20(duplicate=20+=20move=20aware)=20=09?= =?UTF-8?q?=E2=80=A2=09SQLite=20snapshot=20persistence=20+=20fast-path=20h?= =?UTF-8?q?andling=20=09=E2=80=A2=09Scheduler=20watch=20job=20integration?= =?UTF-8?q?=20=09=E2=80=A2=09Canonical=20MusicMetadata=20object=20=09?= =?UTF-8?q?=E2=80=A2=09Deterministic=20metadata=20precedence=20(Spotify=20?= =?UTF-8?q?=E2=86=92=20MusicBrainz=20=E2=86=92=20yt-dlp)=20=09=E2=80=A2=09?= =?UTF-8?q?Music=20filename=20builder=20(industry-standard=20format)=20=09?= =?UTF-8?q?=E2=80=A2=09ID3/Vorbis=20tagging=20routine=20with=20artwork=20+?= =?UTF-8?q?=20lyrics=20=09=E2=80=A2=09Deterministic=20Spotify=20search=20q?= =?UTF-8?q?uery=20builder=20=09=E2=80=A2=09Designed=20prompts=20for=20incr?= =?UTF-8?q?emental=20generation=20to=20keep=20Codex=20scoped=20and=20focus?= =?UTF-8?q?ed=20=09=E2=80=A2=09Enforces=20deterministic=20behavior,=20idem?= =?UTF-8?q?potency,=20and=20streaming-quality=20metadata=20goals=20=09?= =?UTF-8?q?=E2=80=A2=09Supports=20=E2=80=9CSpotify=20as=20authority?= =?UTF-8?q?=E2=80=9D=20model=20(metadata=20only,=20not=20media=20source)?= =?UTF-8?q?=20=09=E2=80=A2=09Lays=20foundation=20for=20production-grade=20?= =?UTF-8?q?Music=20Mode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No runtime behavior changed. Development workflow enhancement only. --- db/migrations.py | 39 +-- db/playlist_snapshots.py | 278 +++++++++++------- metadata/__init__.py | 6 +- metadata/merge.py | 141 +++++++++ metadata/music_files.py | 137 +++++++++ metadata/music_metadata.py | 217 ++++++++++++++ metadata/tagging.py | 66 +++++ metadata/types.py | 97 ++++++ scheduler/jobs/spotify_playlist_watch.py | 158 ++++++---- spotify/client.py | 180 ++++++++++-- spotify/diff.py | 83 +++--- spotify/search_queries.py | 39 +++ tests/test_metadata_tagging.py | 137 +++++++++ tests/test_music_file_naming_and_tagging.py | 146 +++++++++ tests/test_music_metadata_merge.py | 94 ++++++ .../test_retreivr_playlist_snapshot_store.py | 63 ++++ tests/test_spotify_diff.py | 92 ++++++ tests/test_spotify_playlist_client.py | 128 ++++++++ tests/test_spotify_playlist_watch_job.py | 108 +++++++ 19 files changed, 1962 insertions(+), 247 deletions(-) create mode 100644 metadata/merge.py create mode 100644 metadata/music_files.py create mode 100644 metadata/music_metadata.py create mode 100644 metadata/tagging.py create mode 100644 metadata/types.py create mode 100644 spotify/search_queries.py create mode 100644 tests/test_metadata_tagging.py create mode 100644 tests/test_music_file_naming_and_tagging.py create mode 100644 tests/test_music_metadata_merge.py create mode 100644 tests/test_retreivr_playlist_snapshot_store.py create mode 100644 tests/test_spotify_diff.py create mode 100644 tests/test_spotify_playlist_client.py create mode 100644 tests/test_spotify_playlist_watch_job.py diff --git a/db/migrations.py b/db/migrations.py index 81244f7..6388e14 100644 --- a/db/migrations.py +++ b/db/migrations.py @@ -1,4 +1,4 @@ -"""SQLite migrations for playlist snapshot storage.""" +"""SQLite migrations for Spotify playlist snapshot persistence.""" from __future__ import annotations @@ -6,54 +6,41 @@ def ensure_playlist_snapshot_tables(conn: sqlite3.Connection) -> None: - """Ensure playlist snapshot tables and indexes exist.""" + """Create snapshot tables and indexes when they do not already exist.""" cur = conn.cursor() cur.execute( """ CREATE TABLE IF NOT EXISTS playlist_snapshots ( id INTEGER PRIMARY KEY AUTOINCREMENT, - source TEXT NOT NULL, playlist_id TEXT NOT NULL, snapshot_id TEXT NOT NULL, - fetched_at TEXT NOT NULL, - track_count INTEGER NOT NULL, - raw_json TEXT, - UNIQUE (source, playlist_id, snapshot_id) + timestamp TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP ) """ ) cur.execute( - "CREATE INDEX IF NOT EXISTS idx_playlist_snapshots_lookup " - "ON playlist_snapshots (source, playlist_id, id DESC)" + "CREATE UNIQUE INDEX IF NOT EXISTS uq_playlist_snapshots_playlist_snapshot " + "ON playlist_snapshots (playlist_id, snapshot_id)" ) cur.execute( - "CREATE INDEX IF NOT EXISTS idx_playlist_snapshots_fetched_at " - "ON playlist_snapshots (fetched_at)" + "CREATE INDEX IF NOT EXISTS idx_playlist_snapshots_snapshot_lookup " + "ON playlist_snapshots (playlist_id, snapshot_id)" ) + cur.execute( """ CREATE TABLE IF NOT EXISTS playlist_snapshot_items ( id INTEGER PRIMARY KEY AUTOINCREMENT, - snapshot_db_id INTEGER NOT NULL, + snapshot_id INTEGER NOT NULL, + spotify_track_id TEXT NOT NULL, position INTEGER NOT NULL, - track_uri TEXT, - track_id TEXT, added_at TEXT, - added_by TEXT, - is_local INTEGER NOT NULL DEFAULT 0, - name TEXT, - FOREIGN KEY (snapshot_db_id) REFERENCES playlist_snapshots(id) ON DELETE CASCADE, - UNIQUE (snapshot_db_id, position) + FOREIGN KEY (snapshot_id) REFERENCES playlist_snapshots(id) ON DELETE CASCADE ) """ ) cur.execute( - "CREATE INDEX IF NOT EXISTS idx_playlist_snapshot_items_snapshot_position " - "ON playlist_snapshot_items (snapshot_db_id, position)" - ) - cur.execute( - "CREATE INDEX IF NOT EXISTS idx_playlist_snapshot_items_track_uri " - "ON playlist_snapshot_items (track_uri)" + "CREATE UNIQUE INDEX IF NOT EXISTS uq_playlist_snapshot_items_unique_position " + "ON playlist_snapshot_items (snapshot_id, spotify_track_id, position)" ) conn.commit() - diff --git a/db/playlist_snapshots.py b/db/playlist_snapshots.py index 0a0c640..32bea88 100644 --- a/db/playlist_snapshots.py +++ b/db/playlist_snapshots.py @@ -1,26 +1,140 @@ -"""Persistence for playlist snapshots and normalized snapshot items.""" +"""Persistence helpers for Spotify playlist snapshots.""" from __future__ import annotations +import os import sqlite3 from dataclasses import dataclass -from datetime import datetime, timezone from typing import Any from db.migrations import ensure_playlist_snapshot_tables +_DEFAULT_DB_ENV_KEY = "RETREIVR_DB_PATH" + + +def _resolve_db_path() -> str: + return os.environ.get(_DEFAULT_DB_ENV_KEY, os.path.join(os.getcwd(), "retreivr.sqlite3")) + + +def _connect(db_path: str | None = None) -> sqlite3.Connection: + conn = sqlite3.connect(db_path or _resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + ensure_playlist_snapshot_tables(conn) + return conn + @dataclass(frozen=True) class SnapshotWriteResult: - """Result payload for snapshot writes.""" + """Result payload for class-based snapshot writes.""" inserted: bool snapshot_db_id: int | None reason: str | None = None +def get_latest_snapshot(playlist_id: str) -> tuple[str | None, list[dict[str, Any]]]: + """Return latest `(snapshot_id, items)` for a playlist, or `(None, [])` when missing.""" + pid = (playlist_id or "").strip() + if not pid: + return None, [] + + conn = _connect() + try: + cur = conn.cursor() + cur.execute( + """ + SELECT id, snapshot_id + FROM playlist_snapshots + WHERE playlist_id=? + ORDER BY id DESC + LIMIT 1 + """, + (pid,), + ) + row = cur.fetchone() + if not row: + return None, [] + + snapshot_row_id = int(row["id"]) + snapshot_id = str(row["snapshot_id"]) + cur.execute( + """ + SELECT spotify_track_id, position, added_at + FROM playlist_snapshot_items + WHERE snapshot_id=? + ORDER BY position ASC, id ASC + """, + (snapshot_row_id,), + ) + items = [dict(item) for item in cur.fetchall()] + return snapshot_id, items + finally: + conn.close() + + +def store_snapshot(playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> None: + """Store a snapshot and items only when `snapshot_id` differs from the latest snapshot.""" + pid = (playlist_id or "").strip() + sid = (snapshot_id or "").strip() + if not pid: + raise ValueError("playlist_id is required") + if not sid: + raise ValueError("snapshot_id is required") + + conn = _connect() + try: + cur = conn.cursor() + cur.execute("BEGIN IMMEDIATE") + cur.execute( + """ + SELECT id, snapshot_id + FROM playlist_snapshots + WHERE playlist_id=? + ORDER BY id DESC + LIMIT 1 + """, + (pid,), + ) + latest = cur.fetchone() + if latest and str(latest["snapshot_id"]) == sid: + conn.commit() + return + + cur.execute( + """ + INSERT INTO playlist_snapshots (playlist_id, snapshot_id) + VALUES (?, ?) + """, + (pid, sid), + ) + snapshot_row_id = int(cur.lastrowid) + + rows: list[tuple[Any, ...]] = [] + for idx, item in enumerate(items): + track_id = item.get("spotify_track_id") + if not track_id: + continue + position = int(item.get("position", idx)) + rows.append((snapshot_row_id, str(track_id), position, item.get("added_at"))) + + if rows: + cur.executemany( + """ + INSERT INTO playlist_snapshot_items ( + snapshot_id, spotify_track_id, position, added_at + ) VALUES (?, ?, ?, ?) + """, + rows, + ) + + conn.commit() + finally: + conn.close() + + class PlaylistSnapshotStore: - """SQLite-backed playlist snapshot store.""" + """Compatibility wrapper around module-level snapshot helpers.""" def __init__(self, db_path: str) -> None: self.db_path = db_path @@ -29,150 +143,116 @@ def _connect(self) -> sqlite3.Connection: conn = sqlite3.connect(self.db_path, check_same_thread=False, timeout=30) conn.row_factory = sqlite3.Row conn.execute("PRAGMA foreign_keys = ON") + ensure_playlist_snapshot_tables(conn) return conn def ensure_schema(self) -> None: - """Create required snapshot tables if missing.""" + """Ensure snapshot schema exists.""" conn = self._connect() - try: - ensure_playlist_snapshot_tables(conn) - finally: - conn.close() + conn.close() - def get_latest_snapshot(self, source: str, playlist_id: str) -> dict[str, Any] | None: - """Return latest snapshot and items for a playlist.""" + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + """Return latest snapshot metadata and ordered items for `playlist_id`.""" + pid = (playlist_id or "").strip() + if not pid: + return None conn = self._connect() try: - ensure_playlist_snapshot_tables(conn) cur = conn.cursor() cur.execute( """ - SELECT id, source, playlist_id, snapshot_id, fetched_at, track_count, raw_json + SELECT id, playlist_id, snapshot_id, timestamp FROM playlist_snapshots - WHERE source=? AND playlist_id=? + WHERE playlist_id=? ORDER BY id DESC LIMIT 1 """, - (source, playlist_id), + (pid,), ) row = cur.fetchone() if not row: return None snapshot = dict(row) - snapshot["items"] = self._get_snapshot_items(cur, int(row["id"])) + cur.execute( + """ + SELECT spotify_track_id, position, added_at + FROM playlist_snapshot_items + WHERE snapshot_id=? + ORDER BY position ASC, id ASC + """, + (int(row["id"]),), + ) + items = [dict(item) for item in cur.fetchall()] + snapshot["items"] = items + snapshot["track_count"] = len(items) + snapshot["fetched_at"] = snapshot.get("timestamp") + snapshot["raw_json"] = None return snapshot finally: conn.close() - def get_latest_track_uris(self, source: str, playlist_id: str) -> list[str]: - """Return ordered track URIs for the latest snapshot.""" - snapshot = self.get_latest_snapshot(source, playlist_id) - if not snapshot: - return [] - return [item["track_uri"] for item in snapshot["items"] if item.get("track_uri")] - - def insert_snapshot( + def store_snapshot( self, - *, - source: str, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]], - fetched_at: str | None = None, - raw_json: str | None = None, ) -> SnapshotWriteResult: - """Insert a new snapshot and its normalized item rows.""" - now = fetched_at or datetime.now(timezone.utc).replace(microsecond=0).isoformat() + """Store snapshot with fast-path skip when unchanged.""" + pid = (playlist_id or "").strip() + sid = (snapshot_id or "").strip() + if not pid: + raise ValueError("playlist_id is required") + if not sid: + raise ValueError("snapshot_id is required") + conn = self._connect() try: - ensure_playlist_snapshot_tables(conn) cur = conn.cursor() cur.execute("BEGIN IMMEDIATE") cur.execute( """ SELECT id, snapshot_id FROM playlist_snapshots - WHERE source=? AND playlist_id=? + WHERE playlist_id=? ORDER BY id DESC LIMIT 1 """, - (source, playlist_id), + (pid,), ) - previous = cur.fetchone() - if previous and previous["snapshot_id"] == snapshot_id: + latest = cur.fetchone() + if latest and str(latest["snapshot_id"]) == sid: conn.commit() return SnapshotWriteResult( inserted=False, - snapshot_db_id=int(previous["id"]), + snapshot_db_id=int(latest["id"]), reason="snapshot_unchanged", ) - try: - cur.execute( - """ - INSERT INTO playlist_snapshots ( - source, playlist_id, snapshot_id, fetched_at, track_count, raw_json - ) VALUES (?, ?, ?, ?, ?, ?) - """, - (source, playlist_id, snapshot_id, now, len(items), raw_json), - ) - except sqlite3.IntegrityError: - cur.execute( - """ - SELECT id - FROM playlist_snapshots - WHERE source=? AND playlist_id=? AND snapshot_id=? - ORDER BY id DESC - LIMIT 1 - """, - (source, playlist_id, snapshot_id), - ) - existing = cur.fetchone() - conn.commit() - return SnapshotWriteResult( - inserted=False, - snapshot_db_id=int(existing["id"]) if existing else None, - reason="snapshot_exists", - ) - - snapshot_db_id = int(cur.lastrowid) - cur.executemany( + cur.execute( """ - INSERT INTO playlist_snapshot_items ( - snapshot_db_id, position, track_uri, track_id, added_at, added_by, is_local, name - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + INSERT INTO playlist_snapshots (playlist_id, snapshot_id) + VALUES (?, ?) """, - [ - ( - snapshot_db_id, - int(item.get("position", index)), - item.get("uri"), - item.get("track_id"), - item.get("added_at"), - item.get("added_by"), - 1 if bool(item.get("is_local")) else 0, - item.get("name"), - ) - for index, item in enumerate(items) - ], + (pid, sid), ) + snapshot_row_id = int(cur.lastrowid) + rows: list[tuple[Any, ...]] = [] + for idx, item in enumerate(items): + track_id = item.get("spotify_track_id") + if not track_id: + continue + position = int(item.get("position", idx)) + rows.append((snapshot_row_id, str(track_id), position, item.get("added_at"))) + if rows: + cur.executemany( + """ + INSERT INTO playlist_snapshot_items ( + snapshot_id, spotify_track_id, position, added_at + ) VALUES (?, ?, ?, ?) + """, + rows, + ) conn.commit() - return SnapshotWriteResult(inserted=True, snapshot_db_id=snapshot_db_id) + return SnapshotWriteResult(inserted=True, snapshot_db_id=snapshot_row_id) finally: conn.close() - - def _get_snapshot_items(self, cur: sqlite3.Cursor, snapshot_db_id: int) -> list[dict[str, Any]]: - cur.execute( - """ - SELECT - id, snapshot_db_id, position, track_uri, track_id, added_at, added_by, is_local, name - FROM playlist_snapshot_items - WHERE snapshot_db_id=? - ORDER BY position ASC - """, - (snapshot_db_id,), - ) - items = [dict(row) for row in cur.fetchall()] - for item in items: - item["is_local"] = bool(item.get("is_local")) - return items diff --git a/metadata/__init__.py b/metadata/__init__.py index 1fe6989..2138d87 100644 --- a/metadata/__init__.py +++ b/metadata/__init__.py @@ -1,3 +1,7 @@ -from .queue import enqueue_metadata +try: + from .queue import enqueue_metadata +except ModuleNotFoundError: # pragma: no cover - optional deps may be absent in test env + def enqueue_metadata(*_args, **_kwargs): + raise RuntimeError("metadata queue dependencies are unavailable") __all__ = ["enqueue_metadata"] diff --git a/metadata/merge.py b/metadata/merge.py new file mode 100644 index 0000000..0b70669 --- /dev/null +++ b/metadata/merge.py @@ -0,0 +1,141 @@ +"""Metadata merge logic for Spotify, MusicBrainz, and yt-dlp sources.""" + +from __future__ import annotations + +import logging +import re +from typing import Any + +from metadata.types import MusicMetadata + +_LOG = logging.getLogger(__name__) +_WS_RE = re.compile(r"\s+") +_TITLE_SPLIT_RE = re.compile(r"([\s\-\(\)\[\]/:&])") +_LOWER_WORDS = {"a", "an", "and", "as", "at", "by", "for", "in", "of", "on", "or", "the", "to", "vs"} + + +def merge_metadata(spotify: dict, mb: dict, ytdlp: dict) -> MusicMetadata: + """Merge metadata with precedence Spotify -> MusicBrainz -> yt-dlp and normalized outputs.""" + sp = spotify or {} + mbd = mb or {} + ytd = ytdlp or {} + + def pick(field: str, extractor) -> tuple[Any, str]: + for source_name, source in (("spotify", sp), ("musicbrainz", mbd), ("ytdlp", ytd)): + value = extractor(source) + if _has_value(value): + _LOG.info("metadata_field_source field=%s source=%s", field, source_name) + return value, source_name + _LOG.info("metadata_field_source field=%s source=missing", field) + return None, "missing" + + title, _ = pick("title", lambda s: s.get("title") or s.get("track")) + artist, _ = pick("artist", lambda s: s.get("artist")) + album, _ = pick("album", lambda s: s.get("album")) + album_artist, _ = pick("album_artist", lambda s: s.get("album_artist")) + track_num, _ = pick("track_num", lambda s: s.get("track_num") or s.get("track_number")) + disc_num, _ = pick("disc_num", lambda s: s.get("disc_num") or s.get("disc_number")) + date, _ = pick("date", lambda s: s.get("date") or s.get("release_date") or s.get("year")) + genre, _ = pick("genre", lambda s: s.get("genre")) + isrc, _ = pick("isrc", lambda s: s.get("isrc")) + mbid, _ = pick( + "mbid", + lambda s: s.get("mbid") or s.get("recording_id") or s.get("musicbrainz_recording_id"), + ) + artwork, _ = pick("artwork", lambda s: s.get("artwork")) + lyrics, _ = pick("lyrics", lambda s: s.get("lyrics")) + + return MusicMetadata( + title=_normalize_title(title) or "Unknown Title", + artist=_normalize_string(artist) or "Unknown Artist", + album=_normalize_title(album) or "Unknown Album", + album_artist=_normalize_string(album_artist) or _normalize_string(artist) or "Unknown Artist", + track_num=_parse_positive_int(track_num, default=1), + disc_num=_parse_positive_int(disc_num, default=1), + date=_normalize_string(date) or "Unknown", + genre=_normalize_title(_genre_to_string(genre)) or "Unknown", + isrc=_normalize_string(isrc), + mbid=_normalize_string(mbid), + artwork=_coerce_artwork_bytes(artwork), + lyrics=_normalize_string(lyrics), + ) + + +def _has_value(value: Any) -> bool: + if value is None: + return False + if isinstance(value, str): + return bool(value.strip()) + if isinstance(value, (bytes, bytearray)): + return len(value) > 0 + if isinstance(value, list): + return len(value) > 0 + return True + + +def _normalize_string(value: Any) -> str | None: + if value is None: + return None + text = _WS_RE.sub(" ", str(value)).strip() + return text or None + + +def _normalize_title(value: Any) -> str | None: + base = _normalize_string(value) + if not base: + return None + parts = _TITLE_SPLIT_RE.split(base) + out: list[str] = [] + major_seen = False + for token in parts: + if not token: + continue + if _TITLE_SPLIT_RE.fullmatch(token): + out.append(token) + continue + lower = token.lower() + if major_seen and lower in _LOWER_WORDS: + out.append(lower) + elif token.isupper() and len(token) > 1: + out.append(token) + else: + out.append(token[:1].upper() + token[1:].lower()) + major_seen = True + return "".join(out) + + +def _parse_positive_int(value: Any, *, default: int) -> int: + if value is None: + return default + text = str(value).strip() + if not text: + return default + if "/" in text: + text = text.split("/", 1)[0].strip() + try: + parsed = int(text) + except ValueError: + return default + return parsed if parsed > 0 else default + + +def _genre_to_string(value: Any) -> str | None: + if value is None: + return None + if isinstance(value, list): + parts = [_normalize_string(v) for v in value] + cleaned = [p for p in parts if p] + return ", ".join(cleaned) if cleaned else None + return _normalize_string(value) + + +def _coerce_artwork_bytes(value: Any) -> bytes | None: + if value is None: + return None + if isinstance(value, bytes): + return value or None + if isinstance(value, bytearray): + data = bytes(value) + return data or None + return None + diff --git a/metadata/music_files.py b/metadata/music_files.py new file mode 100644 index 0000000..ca48660 --- /dev/null +++ b/metadata/music_files.py @@ -0,0 +1,137 @@ +"""Music filename and tagging helpers.""" + +from __future__ import annotations + +import mimetypes +import os +import re +from typing import Any + +try: + from mutagen.flac import FLAC, Picture + from mutagen.id3 import APIC, ID3, TALB, TCON, TDRC, TIT2, TPE1, TPE2, TPOS, TRCK, TSRC, TXXX, USLT +except ImportError: # pragma: no cover - exercised via unit-test monkeypatching + FLAC = None + Picture = None + APIC = ID3 = TALB = TCON = TDRC = TIT2 = TPE1 = TPE2 = TPOS = TRCK = TSRC = TXXX = USLT = None + +from metadata.music_metadata import MusicMetadata + +_FS_FORBIDDEN_CHARS_RE = re.compile(r'[<>:"/\\|?*]') +_WHITESPACE_RE = re.compile(r"\s+") + + +def build_music_filename(metadata: MusicMetadata) -> str: + """Build a strict music filename in the form `01 - Track Title.mp3`.""" + track_num = metadata.track_num if isinstance(metadata.track_num, int) and metadata.track_num > 0 else 0 + track_label = f"{track_num:02d}" + title = _sanitize_filename_component(metadata.title or "Unknown Title") + return f"{track_label} - {title}.mp3" + + +def tag_music_file(path: str, metadata: MusicMetadata) -> None: + """Write music tags and embedded artwork for MP3/FLAC files.""" + ext = os.path.splitext(path)[1].lower() + if ext == ".mp3": + _tag_mp3(path, metadata) + return + if ext == ".flac": + _tag_flac(path, metadata) + return + raise ValueError(f"Unsupported music format: {ext or '(none)'}") + + +def _sanitize_text(value: str | None) -> str | None: + if value is None: + return None + text = str(value) + text = "".join(ch if ord(ch) >= 32 else " " for ch in text) + text = _WHITESPACE_RE.sub(" ", text).strip() + return text or None + + +def _sanitize_filename_component(value: str) -> str: + text = _sanitize_text(value) or "Unknown" + text = _FS_FORBIDDEN_CHARS_RE.sub("-", text) + text = text.strip(" .") + return text or "Unknown" + + +def _read_artwork_bytes(artwork: str | None) -> tuple[bytes, str] | None: + if not artwork: + return None + if not os.path.exists(artwork): + return None + with open(artwork, "rb") as handle: + data = handle.read() + if not data: + return None + mime = mimetypes.guess_type(artwork)[0] or "image/jpeg" + return data, mime + + +def _set_id3_text(audio: ID3, frame_cls: Any, value: str | int | None) -> None: + normalized = _sanitize_text(str(value)) if value is not None else None + if not normalized: + return + audio.add(frame_cls(encoding=3, text=[normalized])) + + +def _tag_mp3(path: str, metadata: MusicMetadata) -> None: + if ID3 is None: + raise RuntimeError("mutagen is required for MP3 tagging") + audio = ID3() + _set_id3_text(audio, TIT2, metadata.title) + _set_id3_text(audio, TPE1, metadata.artist) + _set_id3_text(audio, TALB, metadata.album) + _set_id3_text(audio, TPE2, metadata.album_artist) + _set_id3_text(audio, TRCK, metadata.track_num) + _set_id3_text(audio, TPOS, metadata.disc_num) + _set_id3_text(audio, TDRC, metadata.date) + _set_id3_text(audio, TCON, metadata.genre) + _set_id3_text(audio, TSRC, metadata.isrc) + if metadata.mbid: + audio.add(TXXX(encoding=3, desc="MBID", text=[_sanitize_text(metadata.mbid)])) + if metadata.lyrics: + audio.add(USLT(encoding=3, lang="eng", desc="Lyrics", text=_sanitize_text(metadata.lyrics))) + + artwork_blob = _read_artwork_bytes(metadata.artwork) + if artwork_blob: + data, mime = artwork_blob + audio.add(APIC(encoding=3, mime=mime, type=3, desc="cover", data=data)) + audio.save(path, v2_version=4) + + +def _tag_flac(path: str, metadata: MusicMetadata) -> None: + if FLAC is None or Picture is None: + raise RuntimeError("mutagen is required for FLAC tagging") + audio = FLAC(path) + fields: dict[str, str | int | None] = { + "title": metadata.title, + "artist": metadata.artist, + "album": metadata.album, + "albumartist": metadata.album_artist, + "tracknumber": metadata.track_num, + "discnumber": metadata.disc_num, + "date": metadata.date, + "genre": metadata.genre, + "isrc": metadata.isrc, + "musicbrainz_trackid": metadata.mbid, + "lyrics": metadata.lyrics, + } + for key, value in fields.items(): + normalized = _sanitize_text(str(value)) if value is not None else None + if normalized: + audio[key] = [normalized] + + artwork_blob = _read_artwork_bytes(metadata.artwork) + if artwork_blob: + data, mime = artwork_blob + picture = Picture() + picture.data = data + picture.type = 3 + picture.mime = mime + picture.desc = "cover" + audio.clear_pictures() + audio.add_picture(picture) + audio.save() diff --git a/metadata/music_metadata.py b/metadata/music_metadata.py new file mode 100644 index 0000000..f5a4f33 --- /dev/null +++ b/metadata/music_metadata.py @@ -0,0 +1,217 @@ +"""Deterministic music metadata model and merge helpers.""" + +from __future__ import annotations + +import logging +import re +import unicodedata +from dataclasses import dataclass +from typing import Any + +_LOG = logging.getLogger(__name__) +_FS_FORBIDDEN_CHARS_RE = re.compile(r'[<>:"/\\|?*]') +_WHITESPACE_RE = re.compile(r"\s+") +_FEAT_RE = re.compile(r"\s+(?:feat\.?|featuring|ft\.?)\s+", re.IGNORECASE) + + +@dataclass(frozen=True) +class MusicMetadata: + """Normalized metadata ready for tags and filesystem-safe naming.""" + + title: str | None + artist: str | None + album: str | None + album_artist: str | None + track_num: int | None + disc_num: int | None + date: str | None + genre: str | None + isrc: str | None + mbid: str | None + artwork: str | None + lyrics: str | None + + +def merge_metadata( + spotify_data: dict[str, Any] | None, + mb_data: dict[str, Any] | None, + ytdlp_data: dict[str, Any] | None, +) -> MusicMetadata: + """Merge Spotify, MusicBrainz, and yt-dlp metadata using deterministic precedence.""" + spotify = spotify_data or {} + musicbrainz = mb_data or {} + ytdlp = ytdlp_data or {} + + def pick(field_name: str, resolver) -> Any: + for source_name, source_data in ( + ("spotify", spotify), + ("musicbrainz", musicbrainz), + ("ytdlp", ytdlp), + ): + value = resolver(source_data) + if _has_value(value): + _LOG.info("metadata_field_source field=%s source=%s", field_name, source_name) + return value + _LOG.info("metadata_field_source field=%s source=missing", field_name) + return None + + metadata = MusicMetadata( + title=pick("title", _resolve_title), + artist=pick("artist", _resolve_artist), + album=pick("album", _resolve_album), + album_artist=pick("album_artist", _resolve_album_artist), + track_num=pick("track_num", _resolve_track_num), + disc_num=pick("disc_num", _resolve_disc_num), + date=pick("date", _resolve_date), + genre=pick("genre", _resolve_genre), + isrc=pick("isrc", _resolve_isrc), + mbid=pick("mbid", _resolve_mbid), + artwork=pick("artwork", _resolve_artwork), + lyrics=pick("lyrics", _resolve_lyrics), + ) + return metadata + + +def _has_value(value: Any) -> bool: + if value is None: + return False + if isinstance(value, str): + return bool(value.strip()) + return True + + +def _normalize_string(value: Any) -> str | None: + if value is None: + return None + text = unicodedata.normalize("NFKC", str(value)) + text = "".join(ch for ch in text if ord(ch) >= 32) + text = _FS_FORBIDDEN_CHARS_RE.sub("-", text) + text = _WHITESPACE_RE.sub(" ", text).strip(" .") + return text or None + + +def _parse_artist_text(value: Any) -> str | None: + if value is None: + return None + if isinstance(value, list): + names = [_normalize_string(v.get("name") if isinstance(v, dict) else v) for v in value] + names = [name for name in names if name] + if not names: + return None + if len(names) == 1: + return names[0] + return f"{names[0]} feat. {', '.join(names[1:])}" + + text = _normalize_string(value) + if not text: + return None + parts = _FEAT_RE.split(text, maxsplit=1) + main = _normalize_string(parts[0]) + if len(parts) == 1: + return main + featured_raw = parts[1] + featured_split = re.split(r"\s*(?:,|&| and | x )\s*", featured_raw, flags=re.IGNORECASE) + featured = [_normalize_string(name) for name in featured_split] + featured = [name for name in featured if name] + if not featured: + return main + return f"{main} feat. {', '.join(featured)}" + + +def _parse_int(value: Any) -> int | None: + if value is None: + return None + if isinstance(value, int): + return value + text = str(value).strip() + if not text: + return None + match = re.match(r"^(\d+)", text) + if not match: + return None + try: + return int(match.group(1)) + except ValueError: + return None + + +def _resolve_title(data: dict[str, Any]) -> str | None: + return _normalize_string(data.get("title") or data.get("track")) + + +def _resolve_artist(data: dict[str, Any]) -> str | None: + artists = data.get("artists") + if artists is not None: + parsed = _parse_artist_text(artists) + if parsed: + return parsed + return _parse_artist_text(data.get("artist")) + + +def _resolve_album(data: dict[str, Any]) -> str | None: + return _normalize_string(data.get("album")) + + +def _resolve_album_artist(data: dict[str, Any]) -> str | None: + explicit = _parse_artist_text(data.get("album_artist")) + if explicit: + return explicit + return _resolve_artist(data) + + +def _resolve_track_num(data: dict[str, Any]) -> int | None: + return _parse_int(data.get("track_num") or data.get("track_number")) + + +def _resolve_disc_num(data: dict[str, Any]) -> int | None: + return _parse_int(data.get("disc_num") or data.get("disc_number")) + + +def _resolve_date(data: dict[str, Any]) -> str | None: + return _normalize_string(data.get("date") or data.get("release_date") or data.get("year")) + + +def _resolve_genre(data: dict[str, Any]) -> str | None: + genre = data.get("genre") + if isinstance(genre, list): + normalized = [_normalize_string(entry) for entry in genre] + values = [entry for entry in normalized if entry] + if not values: + return None + return "; ".join(values) + return _normalize_string(genre) + + +def _resolve_isrc(data: dict[str, Any]) -> str | None: + value = data.get("isrc") + return _normalize_string(value).upper() if value else None + + +def _resolve_mbid(data: dict[str, Any]) -> str | None: + return _normalize_string( + data.get("mbid") + or data.get("musicbrainz_recording_id") + or data.get("recording_id") + or data.get("musicbrainz_release_id") + ) + + +def _resolve_artwork(data: dict[str, Any]) -> str | None: + artwork = data.get("artwork") or data.get("artwork_url") or data.get("thumbnail") + if isinstance(artwork, dict): + return _normalize_string(artwork.get("url")) + if artwork: + return _normalize_string(artwork) + thumbs = data.get("thumbnails") + if isinstance(thumbs, list): + for thumb in thumbs: + if isinstance(thumb, dict): + url = _normalize_string(thumb.get("url")) + if url: + return url + return None + + +def _resolve_lyrics(data: dict[str, Any]) -> str | None: + return _normalize_string(data.get("lyrics")) + diff --git a/metadata/tagging.py b/metadata/tagging.py new file mode 100644 index 0000000..4dd05de --- /dev/null +++ b/metadata/tagging.py @@ -0,0 +1,66 @@ +"""Audio tagging helpers for music files.""" + +from __future__ import annotations + +import logging +import os +from typing import Any + +try: + from mutagen.id3 import APIC, ID3, TALB, TCON, TDRC, TIT2, TPE1, TPE2, TPOS, TRCK, TSRC, TXXX, USLT +except ImportError: # pragma: no cover - handled in tests by monkeypatching + APIC = ID3 = TALB = TCON = TDRC = TIT2 = TPE1 = TPE2 = TPOS = TRCK = TSRC = TXXX = USLT = None + +from metadata.types import MusicMetadata + +_LOG = logging.getLogger(__name__) + + +def tag_file(path: str, metadata: MusicMetadata) -> None: + """Apply metadata tags to a music file using ID3v2.4 for MP3 files.""" + ext = os.path.splitext(path)[1].lower() + if ext != ".mp3": + raise ValueError(f"Unsupported file format for tagging: {ext or '(none)'}") + _tag_mp3(path, metadata) + + +def _add_text_frame(audio: Any, frame_cls: Any, value: str | int | None) -> None: + if value is None: + return + text = str(value).strip() + if not text: + return + audio.add(frame_cls(encoding=3, text=[text])) + + +def _tag_mp3(path: str, metadata: MusicMetadata) -> None: + if ID3 is None: + raise RuntimeError("mutagen is required for MP3 tagging") + + audio = ID3() + _add_text_frame(audio, TIT2, metadata.title) + _add_text_frame(audio, TPE1, metadata.artist) + _add_text_frame(audio, TALB, metadata.album) + _add_text_frame(audio, TPE2, metadata.album_artist) + _add_text_frame(audio, TRCK, metadata.track_num) + _add_text_frame(audio, TPOS, metadata.disc_num) + _add_text_frame(audio, TDRC, metadata.date) + _add_text_frame(audio, TCON, metadata.genre) + _add_text_frame(audio, TSRC, metadata.isrc) + if metadata.mbid: + audio.add(TXXX(encoding=3, desc="MBID", text=[metadata.mbid])) + + if metadata.lyrics: + try: + audio.add(USLT(encoding=3, lang="eng", desc="Lyrics", text=metadata.lyrics)) + except Exception: # pragma: no cover - non-fatal branch + _LOG.warning("Failed to write lyrics tag for %s", path, exc_info=True) + + if metadata.artwork: + try: + audio.add(APIC(encoding=3, mime="image/jpeg", type=3, desc="cover", data=metadata.artwork)) + except Exception: # pragma: no cover - non-fatal branch + _LOG.warning("Failed to embed artwork for %s", path, exc_info=True) + + audio.save(path, v2_version=4) + diff --git a/metadata/types.py b/metadata/types.py new file mode 100644 index 0000000..ff7b95b --- /dev/null +++ b/metadata/types.py @@ -0,0 +1,97 @@ +"""Structured metadata types for music processing.""" + +from __future__ import annotations + + +class MusicMetadata: + """Validated, structured music metadata container.""" + + title: str + artist: str + album: str + album_artist: str + track_num: int + disc_num: int + date: str + genre: str + isrc: str | None + mbid: str | None + artwork: bytes | None + lyrics: str | None + + def __init__( + self, + *, + title: str, + artist: str, + album: str, + album_artist: str, + track_num: int, + disc_num: int, + date: str, + genre: str, + isrc: str | None = None, + mbid: str | None = None, + artwork: bytes | None = None, + lyrics: str | None = None, + ) -> None: + """Initialize and validate metadata values.""" + self.title = self._require_non_empty_str("title", title) + self.artist = self._require_non_empty_str("artist", artist) + self.album = self._require_non_empty_str("album", album) + self.album_artist = self._require_non_empty_str("album_artist", album_artist) + self.track_num = self._require_positive_int("track_num", track_num) + self.disc_num = self._require_positive_int("disc_num", disc_num) + self.date = self._require_non_empty_str("date", date) + self.genre = self._require_non_empty_str("genre", genre) + self.isrc = self._optional_str("isrc", isrc) + self.mbid = self._optional_str("mbid", mbid) + self.artwork = self._optional_bytes("artwork", artwork) + self.lyrics = self._optional_str("lyrics", lyrics) + + @staticmethod + def _require_non_empty_str(field: str, value: str) -> str: + if not isinstance(value, str): + raise TypeError(f"{field} must be a string") + cleaned = value.strip() + if not cleaned: + raise ValueError(f"{field} must be a non-empty string") + return cleaned + + @staticmethod + def _require_positive_int(field: str, value: int) -> int: + if not isinstance(value, int): + raise TypeError(f"{field} must be an integer") + if value <= 0: + raise ValueError(f"{field} must be > 0") + return value + + @staticmethod + def _optional_str(field: str, value: str | None) -> str | None: + if value is None: + return None + if not isinstance(value, str): + raise TypeError(f"{field} must be a string or None") + cleaned = value.strip() + return cleaned or None + + @staticmethod + def _optional_bytes(field: str, value: bytes | None) -> bytes | None: + if value is None: + return None + if not isinstance(value, (bytes, bytearray)): + raise TypeError(f"{field} must be bytes or None") + return bytes(value) + + def __repr__(self) -> str: + """Return a concise debug representation of this metadata.""" + return ( + "MusicMetadata(" + f"title={self.title!r}, artist={self.artist!r}, album={self.album!r}, " + f"album_artist={self.album_artist!r}, track_num={self.track_num!r}, " + f"disc_num={self.disc_num!r}, date={self.date!r}, genre={self.genre!r}, " + f"isrc={self.isrc!r}, mbid={self.mbid!r}, " + f"artwork={'' if self.artwork is not None else None}, " + f"lyrics={self.lyrics!r})" + ) + diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py index e309136..10c82de 100644 --- a/scheduler/jobs/spotify_playlist_watch.py +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -1,67 +1,123 @@ -"""Scheduler job for Spotify playlist change detection via snapshots.""" +"""Scheduler job for Spotify playlist snapshot monitoring.""" from __future__ import annotations -from collections import Counter -from datetime import datetime, timezone +import asyncio +import logging from typing import Any, Callable -from db.playlist_snapshots import PlaylistSnapshotStore -from spotify.client import SpotifyPlaylistClient +from spotify.client import SpotifyPlaylistClient, get_playlist_items from spotify.diff import diff_playlist -def run_spotify_playlist_watch_job( - *, - playlist_id: str, - spotify_client: SpotifyPlaylistClient, - snapshot_store: PlaylistSnapshotStore, - enqueue_track: Callable[[dict[str, Any]], None], - source: str = "spotify", -) -> dict[str, Any]: - """Run one playlist watch cycle and enqueue newly added tracks only.""" - previous_uris = snapshot_store.get_latest_track_uris(source, playlist_id) - - snapshot_id, items = spotify_client.get_playlist_items(playlist_id) - fetched_at = datetime.now(timezone.utc).replace(microsecond=0).isoformat() - write_result = snapshot_store.insert_snapshot( - source=source, - playlist_id=playlist_id, - snapshot_id=snapshot_id, - items=items, - fetched_at=fetched_at, - ) - if not write_result.inserted: - return { - "status": "unchanged", - "playlist_id": playlist_id, - "snapshot_id": snapshot_id, - "snapshot_db_id": write_result.snapshot_db_id, - "enqueued": 0, - "diff": {"added": [], "removed": [], "moved": []}, - } +def _load_previous_snapshot(db: Any, playlist_id: str) -> tuple[str | None, list[dict[str, Any]]]: + if not hasattr(db, "get_latest_snapshot"): + return None, [] + latest = db.get_latest_snapshot(playlist_id) + if latest is None: + return None, [] + if isinstance(latest, tuple) and len(latest) == 2: + snapshot_id, items = latest + return snapshot_id, list(items or []) + if isinstance(latest, dict): + return latest.get("snapshot_id"), list(latest.get("items") or []) + return None, [] + + +def _run_async(coro): + try: + asyncio.get_running_loop() + except RuntimeError: + return asyncio.run(coro) + return None + + +def _enqueue_added_track(queue: Any, item: dict[str, Any]) -> None: + if callable(queue): + queue(item) + return + for method_name in ("enqueue", "put", "add", "enqueue_track"): + method = getattr(queue, method_name, None) + if callable(method): + method(item) + return + raise TypeError("queue does not expose a supported enqueue method") + + +def playlist_watch_job(spotify_client, db, queue, playlist_id: str) -> dict[str, Any]: + """Fetch playlist snapshot, diff with DB state, enqueue added tracks, and persist new snapshot.""" + pid = (playlist_id or "").strip() + if not pid: + return {"status": "error", "playlist_id": playlist_id, "error": "playlist_id is required"} - current_uris = [item.get("uri") for item in items if item.get("uri")] - changes = diff_playlist(previous_uris, current_uris) + try: + if hasattr(spotify_client, "get_playlist_items") and callable(spotify_client.get_playlist_items): + current_snapshot_id, current_items = spotify_client.get_playlist_items(pid) + else: + result = _run_async(get_playlist_items(spotify_client, pid)) + if result is None: + raise RuntimeError("Cannot run async Spotify fetch inside active event loop") + current_snapshot_id, current_items = result + except Exception as exc: + logging.exception("Spotify fetch failed for playlist %s", pid) + return {"status": "error", "playlist_id": pid, "error": f"spotify_fetch_failed: {exc}"} - prev_counts = Counter(previous_uris) - observed_counts = Counter() + try: + previous_snapshot_id, previous_items = _load_previous_snapshot(db, pid) + except Exception as exc: + logging.exception("Snapshot load failed for playlist %s", pid) + return {"status": "error", "playlist_id": pid, "error": f"snapshot_read_failed: {exc}"} + + if previous_snapshot_id == current_snapshot_id: + return {"status": "unchanged", "playlist_id": pid, "snapshot_id": current_snapshot_id, "enqueued": 0} + + diff = diff_playlist(previous_items, current_items) + added_items = list(diff["added"]) enqueued = 0 - for item in items: - uri = item.get("uri") - if not uri: - continue - observed_counts[uri] += 1 - if observed_counts[uri] <= prev_counts.get(uri, 0): - continue - enqueue_track(item) - enqueued += 1 + enqueue_errors: list[str] = [] + for item in added_items: + try: + _enqueue_added_track(queue, item) + enqueued += 1 + except Exception as exc: + track_id = item.get("spotify_track_id") + enqueue_errors.append(f"{track_id}: {exc}") + logging.exception("Enqueue failed for added Spotify track %s", track_id) + + try: + db.store_snapshot(pid, str(current_snapshot_id), current_items) + except Exception as exc: + logging.exception("Snapshot store failed for playlist %s", pid) + return { + "status": "error", + "playlist_id": pid, + "snapshot_id": current_snapshot_id, + "error": f"snapshot_store_failed: {exc}", + "enqueued": enqueued, + "added_count": len(added_items), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } return { "status": "updated", - "playlist_id": playlist_id, - "snapshot_id": snapshot_id, - "snapshot_db_id": write_result.snapshot_db_id, + "playlist_id": pid, + "snapshot_id": current_snapshot_id, "enqueued": enqueued, - "diff": changes, + "added_count": len(added_items), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, } + + +def run_spotify_playlist_watch_job( + *, + playlist_id: str, + spotify_client: SpotifyPlaylistClient, + snapshot_store: Any, + enqueue_track: Callable[[dict[str, Any]], None], +) -> dict[str, Any]: + """Compatibility wrapper around `playlist_watch_job` for existing call sites.""" + return playlist_watch_job(spotify_client, snapshot_store, enqueue_track, playlist_id) diff --git a/spotify/client.py b/spotify/client.py index 474d273..0c31a95 100644 --- a/spotify/client.py +++ b/spotify/client.py @@ -1,22 +1,35 @@ -"""Spotify API client for playlist snapshot reads.""" +"""Spotify API client for playlist snapshots and normalized playlist items.""" from __future__ import annotations +import asyncio import base64 import os import time import urllib.parse -from typing import Any +from typing import Any, TypedDict import requests +class NormalizedItem(TypedDict): + """Normalized Spotify playlist item record.""" + + spotify_track_id: str | None + position: int + added_at: str | None + artist: str | None + title: str | None + album: str | None + duration_ms: int | None + isrc: str | None + + class SpotifyPlaylistClient: """Client for reading playlist snapshots and playlist items from Spotify.""" _TOKEN_URL = "https://accounts.spotify.com/api/token" _PLAYLIST_URL = "https://api.spotify.com/v1/playlists/{playlist_id}" - _PLAYLIST_ITEMS_URL = "https://api.spotify.com/v1/playlists/{playlist_id}/tracks" def __init__( self, @@ -73,50 +86,161 @@ def _request_json(self, url: str, params: dict[str, Any] | None = None) -> dict[ raise RuntimeError(f"Spotify request failed ({response.status_code})") return response.json() - def get_playlist_items(self, playlist_id: str) -> tuple[str, list[dict[str, Any]]]: - """Fetch a playlist snapshot id and ordered normalized item records.""" + def get_playlist_items(self, playlist_id: str) -> tuple[str, list[NormalizedItem]]: + """Fetch playlist `snapshot_id` and normalized items in original playlist order.""" playlist_id = (playlist_id or "").strip() if not playlist_id: raise ValueError("playlist_id is required") encoded_id = urllib.parse.quote(playlist_id, safe="") - metadata = self._request_json( + fields = ( + "snapshot_id," + "tracks(items(added_at,track(id,name,duration_ms,external_ids(isrc),album(name),artists(name))),next)" + ) + payload = self._request_json( self._PLAYLIST_URL.format(playlist_id=encoded_id), - params={"fields": "snapshot_id"}, + params={"fields": fields, "limit": 100}, ) - snapshot_id = metadata.get("snapshot_id") + + snapshot_id = payload.get("snapshot_id") if not snapshot_id: raise RuntimeError("Spotify playlist response missing snapshot_id") - items: list[dict[str, Any]] = [] - offset = 0 - limit = 100 + items: list[NormalizedItem] = [] + absolute_position = 0 + tracks_page = payload.get("tracks") or {} while True: - payload = self._request_json( - self._PLAYLIST_ITEMS_URL.format(playlist_id=encoded_id), - params={ - "offset": offset, - "limit": limit, - "fields": "items(added_at,added_by(id),is_local,track(id,uri,name)),total,next", - }, - ) - raw_items = payload.get("items") or [] + raw_items = tracks_page.get("items") or [] for raw in raw_items: - track = raw.get("track") or {} + track = raw.get("track") + if track is None: + absolute_position += 1 + continue + artists = track.get("artists") or [] + first_artist = artists[0].get("name") if artists and isinstance(artists[0], dict) else None + album = track.get("album") or {} + external_ids = track.get("external_ids") or {} items.append( { - "uri": track.get("uri"), - "track_id": track.get("id"), + "spotify_track_id": track.get("id"), + "position": absolute_position, "added_at": raw.get("added_at"), - "added_by": (raw.get("added_by") or {}).get("id"), - "is_local": bool(raw.get("is_local")), - "name": track.get("name"), + "artist": first_artist, + "title": track.get("name"), + "album": album.get("name"), + "duration_ms": track.get("duration_ms"), + "isrc": external_ids.get("isrc"), } ) + absolute_position += 1 - if not payload.get("next"): + next_url = tracks_page.get("next") + if not next_url: break - offset += len(raw_items) + tracks_page = self._request_json(str(next_url)) return str(snapshot_id), items + +async def _request_json_with_retry( + spotify_client: SpotifyPlaylistClient, + url: str, + params: dict[str, Any] | None = None, + *, + max_rate_limit_retries: int = 3, +) -> dict[str, Any]: + """Perform a Spotify GET request and retry on HTTP 429 responses.""" + unauthorized_retry_used = False + attempts = 0 + while True: + attempts += 1 + token = await asyncio.to_thread(spotify_client._get_access_token) + headers = {"Authorization": f"Bearer {token}"} + response = await asyncio.to_thread( + requests.get, + url, + params=params, + headers=headers, + timeout=spotify_client.timeout_sec, + ) + + if response.status_code == 401 and not unauthorized_retry_used: + unauthorized_retry_used = True + spotify_client._access_token = None + continue + + if response.status_code == 429: + if attempts > max_rate_limit_retries + 1: + raise RuntimeError("Spotify request failed (429: rate limit exceeded retries)") + retry_after = response.headers.get("Retry-After", "1") + try: + sleep_sec = float(retry_after) + except (TypeError, ValueError): + sleep_sec = 1.0 + await asyncio.sleep(max(0.0, sleep_sec)) + continue + + if response.status_code != 200: + raise RuntimeError(f"Spotify request failed ({response.status_code})") + return response.json() + + +async def get_playlist_items( + spotify_client: SpotifyPlaylistClient, + playlist_id: str, +) -> tuple[str, list[dict[str, Any]]]: + """Fetch all Spotify playlist tracks with pagination and return `(snapshot_id, ordered_items)`.""" + cleaned_playlist_id = (playlist_id or "").strip() + if not cleaned_playlist_id: + raise ValueError("playlist_id is required") + + encoded_id = urllib.parse.quote(cleaned_playlist_id, safe="") + fields = ( + "snapshot_id," + "tracks(items(added_at,track(id,name,duration_ms,external_ids(isrc),album(name),artists(name))),next)" + ) + payload = await _request_json_with_retry( + spotify_client, + spotify_client._PLAYLIST_URL.format(playlist_id=encoded_id), + params={"fields": fields, "limit": 100}, + ) + + snapshot_id = payload.get("snapshot_id") + if not snapshot_id: + raise RuntimeError("Spotify playlist response missing snapshot_id") + + ordered_items: list[dict[str, Any]] = [] + absolute_position = 0 + tracks_page = payload.get("tracks") or {} + while True: + raw_items = tracks_page.get("items") or [] + for raw in raw_items: + track = raw.get("track") + if track is None: + absolute_position += 1 + continue + + artists = track.get("artists") or [] + first_artist = artists[0].get("name") if artists and isinstance(artists[0], dict) else None + album = track.get("album") or {} + external_ids = track.get("external_ids") or {} + ordered_items.append( + { + "spotify_track_id": track.get("id"), + "position": absolute_position, + "added_at": raw.get("added_at"), + "artist": first_artist, + "title": track.get("name"), + "album": album.get("name"), + "duration_ms": track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + } + ) + absolute_position += 1 + + next_url = tracks_page.get("next") + if not next_url: + break + tracks_page = await _request_json_with_retry(spotify_client, str(next_url)) + + return str(snapshot_id), ordered_items diff --git a/spotify/diff.py b/spotify/diff.py index f3df00c..5e8c97e 100644 --- a/spotify/diff.py +++ b/spotify/diff.py @@ -1,47 +1,46 @@ -"""Deterministic playlist diff utilities.""" +"""Diff helpers for Spotify playlist snapshots.""" from __future__ import annotations -from collections import Counter, defaultdict, deque - - -def diff_playlist(prev: list[str], curr: list[str]) -> dict[str, list]: - """Return duplicate-aware added/removed/moved changes from prev to curr order.""" - prev_list = [value for value in prev if value] - curr_list = [value for value in curr if value] - - prev_counts = Counter(prev_list) - curr_counts = Counter(curr_list) - - added: list[str] = [] - running_curr = Counter() - for uri in curr_list: - running_curr[uri] += 1 - if running_curr[uri] > prev_counts.get(uri, 0): - added.append(uri) - - removed: list[str] = [] - running_prev = Counter() - for uri in prev_list: - running_prev[uri] += 1 - if running_prev[uri] > curr_counts.get(uri, 0): - removed.append(uri) - - prev_positions: dict[str, deque[int]] = defaultdict(deque) - curr_positions: dict[str, deque[int]] = defaultdict(deque) - for idx, uri in enumerate(prev_list): - prev_positions[uri].append(idx) - for idx, uri in enumerate(curr_list): - curr_positions[uri].append(idx) - - moved: list[dict[str, int | str]] = [] - for uri in sorted(set(prev_positions).intersection(curr_positions)): - retained = min(len(prev_positions[uri]), len(curr_positions[uri])) - for _ in range(retained): - old_pos = prev_positions[uri].popleft() - new_pos = curr_positions[uri].popleft() - if old_pos != new_pos: - moved.append({"uri": uri, "from": old_pos, "to": new_pos}) +from collections import defaultdict, deque + +def diff_playlist(prev: list[dict], curr: list[dict]) -> dict[str, list[dict]]: + """Return duplicate-aware `added`, `removed`, and `moved` playlist items.""" + prev_occurrences: dict[str | None, deque[int]] = defaultdict(deque) + for idx, item in enumerate(prev): + prev_occurrences[item.get("spotify_track_id")].append(idx) + + matched_curr_to_prev_index: dict[int, int] = {} + added: list[dict] = [] + for curr_idx, curr_item in enumerate(curr): + item_id = curr_item.get("spotify_track_id") + remaining = prev_occurrences.get(item_id) + if remaining: + matched_curr_to_prev_index[curr_idx] = remaining.popleft() + else: + added.append(curr_item) + + matched_prev_indices = set(matched_curr_to_prev_index.values()) + removed: list[dict] = [ + prev[prev_idx] for prev_idx in range(len(prev)) if prev_idx not in matched_prev_indices + ] + + moved: list[dict] = [] + for curr_idx, curr_item in enumerate(curr): + prev_idx = matched_curr_to_prev_index.get(curr_idx) + if prev_idx is None: + continue + prev_item = prev[prev_idx] + prev_pos = int(prev_item.get("position", prev_idx)) + curr_pos = int(curr_item.get("position", curr_idx)) + if prev_pos != curr_pos: + moved.append( + { + "spotify_track_id": curr_item.get("spotify_track_id"), + "from_position": prev_pos, + "to_position": curr_pos, + "item": curr_item, + } + ) return {"added": added, "removed": removed, "moved": moved} - diff --git a/spotify/search_queries.py b/spotify/search_queries.py new file mode 100644 index 0000000..ebc7dd4 --- /dev/null +++ b/spotify/search_queries.py @@ -0,0 +1,39 @@ +"""Deterministic search-query builders for Spotify track lookups.""" + +from __future__ import annotations + + +def build_search_query(spotify_track: dict) -> str: + """Build a deterministic query in the format `Artist - Title official audio`.""" + track = spotify_track or {} + artist = _extract_artist(track) or "Unknown Artist" + title = _extract_title(track) or "Unknown Title" + return f"{artist} - {title} official audio" + + +def _extract_artist(track: dict) -> str | None: + artists = track.get("artists") + if isinstance(artists, list): + names = [] + for entry in artists: + if isinstance(entry, dict): + name = entry.get("name") + else: + name = entry + if isinstance(name, str) and name.strip(): + names.append(name.strip()) + if names: + return ", ".join(names) + artist = track.get("artist") + if isinstance(artist, str) and artist.strip(): + return artist.strip() + return None + + +def _extract_title(track: dict) -> str | None: + for key in ("title", "name", "track"): + value = track.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return None + diff --git a/tests/test_metadata_tagging.py b/tests/test_metadata_tagging.py new file mode 100644 index 0000000..3cce346 --- /dev/null +++ b/tests/test_metadata_tagging.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +from pathlib import Path + +from metadata.tagging import tag_file +from metadata.types import MusicMetadata + + +def _metadata(*, artwork: bytes | None = b"img", lyrics: str | None = "line one") -> MusicMetadata: + return MusicMetadata( + title="Test Title", + artist="Test Artist", + album="Test Album", + album_artist="Test Artist", + track_num=1, + disc_num=1, + date="2026-02-16", + genre="Rock", + isrc="USABC1234567", + mbid="mbid-123", + artwork=artwork, + lyrics=lyrics, + ) + + +def test_tag_file_writes_expected_id3_frames(monkeypatch, tmp_path: Path) -> None: + path = tmp_path / "track.mp3" + path.write_bytes(b"") + + import metadata.tagging as tagging + + class FakeAudio: + def __init__(self) -> None: + self.frames = [] + self.saved = None + + def add(self, frame) -> None: + self.frames.append(frame) + + def save(self, save_path: str, v2_version: int) -> None: + self.saved = (save_path, v2_version) + + class FakeFrame: + def __init__(self, name: str, **kwargs) -> None: + self.name = name + for key, value in kwargs.items(): + setattr(self, key, value) + + def _factory(name: str): + def _ctor(**kwargs): + return FakeFrame(name, **kwargs) + + return _ctor + + audio = FakeAudio() + + monkeypatch.setattr(tagging, "ID3", lambda: audio) + monkeypatch.setattr(tagging, "TIT2", _factory("TIT2")) + monkeypatch.setattr(tagging, "TPE1", _factory("TPE1")) + monkeypatch.setattr(tagging, "TALB", _factory("TALB")) + monkeypatch.setattr(tagging, "TPE2", _factory("TPE2")) + monkeypatch.setattr(tagging, "TRCK", _factory("TRCK")) + monkeypatch.setattr(tagging, "TPOS", _factory("TPOS")) + monkeypatch.setattr(tagging, "TDRC", _factory("TDRC")) + monkeypatch.setattr(tagging, "TCON", _factory("TCON")) + monkeypatch.setattr(tagging, "TSRC", _factory("TSRC")) + monkeypatch.setattr(tagging, "TXXX", _factory("TXXX")) + monkeypatch.setattr(tagging, "USLT", _factory("USLT")) + monkeypatch.setattr(tagging, "APIC", _factory("APIC")) + + tag_file(str(path), _metadata()) + + by_name = {frame.name: frame for frame in audio.frames} + assert by_name["TIT2"].text[0] == "Test Title" + assert by_name["TPE1"].text[0] == "Test Artist" + assert by_name["TALB"].text[0] == "Test Album" + assert by_name["TRCK"].text[0] == "1" + assert by_name["TSRC"].text[0] == "USABC1234567" + assert by_name["TXXX"].desc == "MBID" + assert by_name["USLT"].text == "line one" + assert by_name["APIC"].data == b"img" + assert audio.saved == (str(path), 4) + + +def test_tag_file_lyrics_and_artwork_fail_non_fatally(monkeypatch, tmp_path: Path) -> None: + path = tmp_path / "track.mp3" + path.write_bytes(b"") + + import metadata.tagging as tagging + + class FakeAudio: + def __init__(self) -> None: + self.frames = [] + self.saved = False + + def add(self, frame) -> None: + self.frames.append(frame) + + def save(self, save_path: str, v2_version: int) -> None: + self.saved = True + + class FakeFrame: + def __init__(self, name: str, **kwargs) -> None: + self.name = name + for key, value in kwargs.items(): + setattr(self, key, value) + + def _factory(name: str): + def _ctor(**kwargs): + return FakeFrame(name, **kwargs) + + return _ctor + + def _raise(*_args, **_kwargs): + raise RuntimeError("frame failure") + + audio = FakeAudio() + monkeypatch.setattr(tagging, "ID3", lambda: audio) + monkeypatch.setattr(tagging, "TIT2", _factory("TIT2")) + monkeypatch.setattr(tagging, "TPE1", _factory("TPE1")) + monkeypatch.setattr(tagging, "TALB", _factory("TALB")) + monkeypatch.setattr(tagging, "TPE2", _factory("TPE2")) + monkeypatch.setattr(tagging, "TRCK", _factory("TRCK")) + monkeypatch.setattr(tagging, "TPOS", _factory("TPOS")) + monkeypatch.setattr(tagging, "TDRC", _factory("TDRC")) + monkeypatch.setattr(tagging, "TCON", _factory("TCON")) + monkeypatch.setattr(tagging, "TSRC", _factory("TSRC")) + monkeypatch.setattr(tagging, "TXXX", _factory("TXXX")) + monkeypatch.setattr(tagging, "USLT", _raise) + monkeypatch.setattr(tagging, "APIC", _raise) + + # Should not raise even when lyrics/artwork frame construction fails. + tag_file(str(path), _metadata()) + + assert audio.saved is True + assert any(frame.name == "TIT2" for frame in audio.frames) + diff --git a/tests/test_music_file_naming_and_tagging.py b/tests/test_music_file_naming_and_tagging.py new file mode 100644 index 0000000..1103f42 --- /dev/null +++ b/tests/test_music_file_naming_and_tagging.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +from pathlib import Path + +from metadata.music_files import build_music_filename, tag_music_file +from metadata.music_metadata import MusicMetadata + + +def _sample_metadata(*, artwork: str | None = None, title: str = 'Song:/Name*?') -> MusicMetadata: + return MusicMetadata( + title=title, + artist="Artist One", + album="Album One", + album_artist="Artist One", + track_num=1, + disc_num=1, + date="2026-02-16", + genre="Rock", + isrc="USABC1234567", + mbid="mbid-123", + artwork=artwork, + lyrics="line one\nline two", + ) + + +def test_build_music_filename_sanitizes_and_zero_pads() -> None: + filename = build_music_filename(_sample_metadata()) + assert filename == "01 - Song--Name--.mp3" + + +def test_tag_music_file_mp3_writes_id3v24_and_artwork(tmp_path: Path) -> None: + mp3_path = tmp_path / "track.mp3" + mp3_path.write_bytes(b"") + art_path = tmp_path / "cover.jpg" + art_path.write_bytes(b"\xff\xd8\xff\xe0" + b"jpeg-bytes") + + import metadata.music_files as music_files + + created_audio = {"obj": None} + + class FakeAudio: + def __init__(self) -> None: + self.frames = [] + self.saved = None + + def add(self, frame) -> None: + self.frames.append(frame) + + def save(self, path: str, v2_version: int) -> None: + self.saved = (path, v2_version) + + class FakeFrame: + def __init__(self, frame_name: str, **kwargs) -> None: + self.frame_name = frame_name + for key, value in kwargs.items(): + setattr(self, key, value) + + def _factory(frame_name: str): + def _ctor(**kwargs): + return FakeFrame(frame_name, **kwargs) + + return _ctor + + def fake_id3_ctor(): + audio = FakeAudio() + created_audio["obj"] = audio + return audio + + # Patch mutagen constructors with fakes so tests do not require local mutagen install. + music_files.ID3 = fake_id3_ctor + music_files.TIT2 = _factory("TIT2") + music_files.TPE1 = _factory("TPE1") + music_files.TALB = _factory("TALB") + music_files.TPE2 = _factory("TPE2") + music_files.TRCK = _factory("TRCK") + music_files.TPOS = _factory("TPOS") + music_files.TDRC = _factory("TDRC") + music_files.TCON = _factory("TCON") + music_files.TSRC = _factory("TSRC") + music_files.TXXX = _factory("TXXX") + music_files.USLT = _factory("USLT") + music_files.APIC = _factory("APIC") + + metadata = _sample_metadata(artwork=str(art_path), title="Test Title") + tag_music_file(str(mp3_path), metadata) + + audio = created_audio["obj"] + assert audio is not None + assert audio.saved == (str(mp3_path), 4) + + frames_by_name = {frame.frame_name: frame for frame in audio.frames} + assert frames_by_name["TIT2"].text[0] == "Test Title" + assert frames_by_name["TPE1"].text[0] == "Artist One" + assert frames_by_name["TRCK"].text[0] == "1" + assert frames_by_name["TSRC"].text[0] == "USABC1234567" + assert frames_by_name["TXXX"].desc == "MBID" + assert frames_by_name["TXXX"].text[0] == "mbid-123" + assert frames_by_name["USLT"].text == "line one line two" + assert frames_by_name["APIC"].mime == "image/jpeg" + + +def test_tag_music_file_flac_writes_vorbis_and_artwork(monkeypatch, tmp_path: Path) -> None: + flac_path = tmp_path / "track.flac" + flac_path.write_bytes(b"fake") + art_path = tmp_path / "cover.png" + art_path.write_bytes(b"\x89PNG\r\n\x1a\npng-bytes") + + saved = {"called": False} + + class FakeFlac(dict): + def clear_pictures(self) -> None: + self["cleared"] = ["yes"] + + def add_picture(self, picture) -> None: + self["picture_mime"] = [picture.mime] + self["picture_type"] = [str(picture.type)] + + def save(self) -> None: + saved["called"] = True + + fake_flac = FakeFlac() + + def fake_flac_ctor(path: str): + assert path == str(flac_path) + return fake_flac + + class FakePicture: + def __init__(self) -> None: + self.data = b"" + self.type = 0 + self.mime = "" + self.desc = "" + + monkeypatch.setattr("metadata.music_files.FLAC", fake_flac_ctor) + monkeypatch.setattr("metadata.music_files.Picture", FakePicture) + + metadata = _sample_metadata(artwork=str(art_path), title="FLAC Title") + tag_music_file(str(flac_path), metadata) + + assert fake_flac["title"] == ["FLAC Title"] + assert fake_flac["artist"] == ["Artist One"] + assert fake_flac["tracknumber"] == ["1"] + assert fake_flac["musicbrainz_trackid"] == ["mbid-123"] + assert fake_flac["picture_mime"] == ["image/png"] + assert fake_flac["picture_type"] == ["3"] + assert saved["called"] is True diff --git a/tests/test_music_metadata_merge.py b/tests/test_music_metadata_merge.py new file mode 100644 index 0000000..17649a3 --- /dev/null +++ b/tests/test_music_metadata_merge.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from metadata.music_metadata import merge_metadata + + +def test_merge_metadata_precedence_and_normalization(caplog) -> None: + spotify = { + "title": ' Song:/Name* ', + "artists": [{"name": "Main Artist"}, {"name": "Guest One"}], + "album": " Album ", + "album_artist": " Main Artist ", + "track_number": "03/10", + "disc_number": "1/2", + "release_date": "2025-01-01", + "genre": [" Pop ", "Dance "], + "isrc": "usabc1234567", + "artwork_url": "https://img.example/cover.jpg", + } + mb = { + "title": "MB Title", + "artist": "MB Artist", + "mbid": "mbid-1", + "lyrics": "MB lyrics", + } + ytdlp = { + "title": "YT Title", + "artist": "YT Artist", + "lyrics": "YT lyrics", + } + + with caplog.at_level("INFO"): + merged = merge_metadata(spotify, mb, ytdlp) + + assert merged.title == "Song--Name-" + assert merged.artist == "Main Artist feat. Guest One" + assert merged.album == "Album" + assert merged.album_artist == "Main Artist" + assert merged.track_num == 3 + assert merged.disc_num == 1 + assert merged.date == "2025-01-01" + assert merged.genre == "Pop; Dance" + assert merged.isrc == "USABC1234567" + assert merged.mbid == "mbid-1" + assert merged.artwork == "https---img.example-cover.jpg" + assert merged.lyrics == "MB lyrics" + + # Verify source logging happens per merged field. + field_logs = [r.message for r in caplog.records if "metadata_field_source field=" in r.message] + assert len(field_logs) == 12 + assert any("field=title source=spotify" in msg for msg in field_logs) + assert any("field=mbid source=musicbrainz" in msg for msg in field_logs) + assert any("field=lyrics source=musicbrainz" in msg for msg in field_logs) + + +def test_merge_metadata_fallback_and_featured_artist_parsing() -> None: + spotify = { + "title": "", + "artists": [], + "album": None, + "album_artist": None, + } + mb = { + "title": None, + "artist": "", + "album": "", + "album_artist": "", + "genre": "", + } + ytdlp = { + "title": " Live Track ", + "artist": "Lead Artist ft. Guest A & Guest B", + "album": "YT Album", + "album_artist": "Lead Artist", + "date": "2024", + "genre": "Alt / Rock", + "isrc": "gbxyz7654321", + "recording_id": "mb-recording-xyz", + "thumbnail": "https://cdn.example/a:b.jpg", + "lyrics": " line1 \n line2 ", + } + + merged = merge_metadata(spotify, mb, ytdlp) + + assert merged.title == "Live Track" + assert merged.artist == "Lead Artist feat. Guest A, Guest B" + assert merged.album == "YT Album" + assert merged.album_artist == "Lead Artist" + assert merged.date == "2024" + assert merged.genre == "Alt - Rock" + assert merged.isrc == "GBXYZ7654321" + assert merged.mbid == "mb-recording-xyz" + assert merged.artwork == "https---cdn.example-a-b.jpg" + assert merged.lyrics == "line1 line2" + diff --git a/tests/test_retreivr_playlist_snapshot_store.py b/tests/test_retreivr_playlist_snapshot_store.py new file mode 100644 index 0000000..9e17292 --- /dev/null +++ b/tests/test_retreivr_playlist_snapshot_store.py @@ -0,0 +1,63 @@ +import sqlite3 + +from db.playlist_snapshots import PlaylistSnapshotStore + + +def _items() -> list[dict[str, object]]: + return [ + { + "spotify_track_id": "track-1", + "position": 0, + "added_at": "2026-02-16T00:00:00Z", + "artist": "Artist 1", + "title": "Title 1", + "album": "Album 1", + "duration_ms": 1000, + "isrc": "ISRC1", + }, + { + "spotify_track_id": "track-2", + "position": 1, + "added_at": "2026-02-16T00:01:00Z", + "artist": "Artist 2", + "title": "Title 2", + "album": "Album 2", + "duration_ms": 2000, + "isrc": "ISRC2", + }, + ] + + +def test_store_snapshot_inserts_rows_and_preserves_positions(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + + result = store.store_snapshot("playlist-a", "snapshot-1", _items()) + + assert result.inserted is True + latest = store.get_latest_snapshot("playlist-a") + assert latest is not None + assert latest["snapshot_id"] == "snapshot-1" + assert latest["track_count"] == 2 + assert [item["spotify_track_id"] for item in latest["items"]] == ["track-1", "track-2"] + assert [item["position"] for item in latest["items"]] == [0, 1] + + +def test_store_snapshot_fast_path_when_snapshot_unchanged(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + + first = store.store_snapshot("playlist-a", "snapshot-1", _items()) + second = store.store_snapshot("playlist-a", "snapshot-1", _items()) + + assert first.inserted is True + assert second.inserted is False + assert second.reason == "snapshot_unchanged" + assert first.snapshot_db_id == second.snapshot_db_id + + with sqlite3.connect(db_path) as conn: + snapshot_count = conn.execute("SELECT COUNT(*) FROM playlist_snapshots").fetchone()[0] + item_count = conn.execute("SELECT COUNT(*) FROM playlist_snapshot_items").fetchone()[0] + assert snapshot_count == 1 + assert item_count == 2 + diff --git a/tests/test_spotify_diff.py b/tests/test_spotify_diff.py new file mode 100644 index 0000000..8df53a9 --- /dev/null +++ b/tests/test_spotify_diff.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from spotify.diff import diff_playlist + + +def _item(track_id: str, position: int, *, added_at: str = "2026-02-16T00:00:00Z") -> dict: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": added_at, + "artist": f"artist-{track_id}", + "title": f"title-{track_id}", + "album": f"album-{track_id}", + "duration_ms": 1000, + "isrc": f"isrc-{track_id}", + } + + +def test_diff_playlist_no_change() -> None: + prev = [_item("a", 0), _item("b", 1)] + curr = [_item("a", 0), _item("b", 1)] + + diff = diff_playlist(prev, curr) + + assert diff["added"] == [] + assert diff["removed"] == [] + assert diff["moved"] == [] + + +def test_diff_playlist_simple_add() -> None: + prev = [_item("a", 0)] + curr = [_item("a", 0), _item("b", 1)] + + diff = diff_playlist(prev, curr) + + assert [item["spotify_track_id"] for item in diff["added"]] == ["b"] + assert diff["removed"] == [] + assert diff["moved"] == [] + + +def test_diff_playlist_simple_remove() -> None: + prev = [_item("a", 0), _item("b", 1)] + curr = [_item("a", 0)] + + diff = diff_playlist(prev, curr) + + assert diff["added"] == [] + assert [item["spotify_track_id"] for item in diff["removed"]] == ["b"] + assert diff["moved"] == [] + + +def test_diff_playlist_moved_only() -> None: + prev = [_item("a", 0), _item("b", 1), _item("c", 2)] + curr = [_item("b", 0), _item("a", 1), _item("c", 2)] + + diff = diff_playlist(prev, curr) + + assert diff["added"] == [] + assert diff["removed"] == [] + moved = diff["moved"] + assert [entry["spotify_track_id"] for entry in moved] == ["b", "a"] + assert moved[0]["from_position"] == 1 + assert moved[0]["to_position"] == 0 + assert moved[1]["from_position"] == 0 + assert moved[1]["to_position"] == 1 + + +def test_diff_playlist_combination_add_remove_move_with_duplicates() -> None: + prev = [ + _item("a", 0, added_at="2026-02-16T00:00:00Z"), + _item("x", 1, added_at="2026-02-16T00:01:00Z"), + _item("a", 2, added_at="2026-02-16T00:02:00Z"), + _item("b", 3, added_at="2026-02-16T00:03:00Z"), + ] + curr = [ + _item("a", 0, added_at="2026-02-16T00:10:00Z"), + _item("a", 1, added_at="2026-02-16T00:11:00Z"), + _item("c", 2, added_at="2026-02-16T00:12:00Z"), + _item("x", 3, added_at="2026-02-16T00:13:00Z"), + ] + + diff = diff_playlist(prev, curr) + + assert [item["spotify_track_id"] for item in diff["added"]] == ["c"] + assert [item["spotify_track_id"] for item in diff["removed"]] == ["b"] + moved = diff["moved"] + assert [entry["spotify_track_id"] for entry in moved] == ["a", "x"] + assert moved[0]["from_position"] == 2 + assert moved[0]["to_position"] == 1 + assert moved[1]["from_position"] == 1 + assert moved[1]["to_position"] == 3 + diff --git a/tests/test_spotify_playlist_client.py b/tests/test_spotify_playlist_client.py new file mode 100644 index 0000000..d9dd855 --- /dev/null +++ b/tests/test_spotify_playlist_client.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +from typing import Any + +from spotify.client import SpotifyPlaylistClient + + +def test_get_playlist_items_empty_playlist(monkeypatch) -> None: + client = SpotifyPlaylistClient(client_id="id", client_secret="secret") + + def fake_request_json(url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + return {"snapshot_id": "snap-empty", "tracks": {"items": [], "next": None}} + + monkeypatch.setattr(client, "_request_json", fake_request_json) + + snapshot_id, items = client.get_playlist_items("playlist-empty") + + assert snapshot_id == "snap-empty" + assert items == [] + + +def test_get_playlist_items_preserves_duplicates_and_order(monkeypatch) -> None: + client = SpotifyPlaylistClient(client_id="id", client_secret="secret") + + page_one = { + "snapshot_id": "snap-dup", + "tracks": { + "items": [ + { + "added_at": "2026-02-01T00:00:00Z", + "track": { + "id": "track-1", + "name": "Song A", + "duration_ms": 1000, + "external_ids": {"isrc": "ISRC_A"}, + "album": {"name": "Album A"}, + "artists": [{"name": "Artist A"}], + }, + }, + { + "added_at": "2026-02-01T00:01:00Z", + "track": { + "id": "track-1", + "name": "Song A", + "duration_ms": 1000, + "external_ids": {"isrc": "ISRC_A"}, + "album": {"name": "Album A"}, + "artists": [{"name": "Artist A"}], + }, + }, + ], + "next": "https://api.spotify.com/v1/playlists/p/tracks?offset=2&limit=2", + }, + } + page_two = { + "items": [ + { + "added_at": "2026-02-01T00:02:00Z", + "track": { + "id": "track-2", + "name": "Song B", + "duration_ms": 2000, + "external_ids": {"isrc": "ISRC_B"}, + "album": {"name": "Album B"}, + "artists": [{"name": "Artist B"}], + }, + } + ], + "next": None, + } + + calls: list[str] = [] + + def fake_request_json(url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + calls.append(url) + if "playlists" in url and "offset=2" not in url: + return page_one + return page_two + + monkeypatch.setattr(client, "_request_json", fake_request_json) + + snapshot_id, items = client.get_playlist_items("playlist-dup") + + assert snapshot_id == "snap-dup" + assert [item["spotify_track_id"] for item in items] == ["track-1", "track-1", "track-2"] + assert [item["position"] for item in items] == [0, 1, 2] + assert items[0]["artist"] == "Artist A" + assert items[2]["title"] == "Song B" + assert items[2]["album"] == "Album B" + assert items[2]["duration_ms"] == 2000 + assert items[2]["isrc"] == "ISRC_B" + assert len(calls) == 2 + + +def test_get_playlist_items_drops_null_track_entries(monkeypatch) -> None: + client = SpotifyPlaylistClient(client_id="id", client_secret="secret") + + def fake_request_json(url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + return { + "snapshot_id": "snap-null", + "tracks": { + "items": [ + {"added_at": "2026-02-01T00:00:00Z", "track": None}, + { + "added_at": "2026-02-01T00:01:00Z", + "track": { + "id": "track-3", + "name": "Song C", + "duration_ms": 3000, + "external_ids": {"isrc": "ISRC_C"}, + "album": {"name": "Album C"}, + "artists": [{"name": "Artist C"}], + }, + }, + ], + "next": None, + }, + } + + monkeypatch.setattr(client, "_request_json", fake_request_json) + + snapshot_id, items = client.get_playlist_items("playlist-null") + + assert snapshot_id == "snap-null" + assert len(items) == 1 + assert items[0]["spotify_track_id"] == "track-3" + assert items[0]["position"] == 1 + diff --git a/tests/test_spotify_playlist_watch_job.py b/tests/test_spotify_playlist_watch_job.py new file mode 100644 index 0000000..4b5727f --- /dev/null +++ b/tests/test_spotify_playlist_watch_job.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import run_spotify_playlist_watch_job + + +def _item(track_id: str, position: int) -> dict[str, Any]: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": f"2026-02-16T00:0{position}:00Z", + "artist": f"artist-{track_id}", + "title": f"title-{track_id}", + "album": f"album-{track_id}", + "duration_ms": 1000 + position, + "isrc": f"isrc-{track_id}", + } + + +class _MockSpotifyClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = items + self.calls: list[str] = [] + + def get_playlist_items(self, playlist_id: str) -> tuple[str, list[dict[str, Any]]]: + self.calls.append(playlist_id) + return self.snapshot_id, list(self.items) + + +class _MockSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.get_calls: list[str] = [] + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + self.get_calls.append(playlist_id) + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> Any: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + return type("WriteResult", (), {"snapshot_db_id": 42})() + + +def test_watch_job_unchanged_snapshot_exits_without_enqueue() -> None: + prev_items = [_item("a", 0)] + store = _MockSnapshotStore({"snapshot_id": "snap-1", "items": prev_items}) + client = _MockSpotifyClient("snap-1", [_item("a", 0), _item("b", 1)]) + enqueued: list[str] = [] + + result = run_spotify_playlist_watch_job( + playlist_id="playlist-1", + spotify_client=client, + snapshot_store=store, + enqueue_track=lambda item: enqueued.append(str(item["spotify_track_id"])), + ) + + assert result["status"] == "unchanged" + assert result["enqueued"] == 0 + assert enqueued == [] + assert store.store_calls == [] + + +def test_watch_job_enqueues_only_added_items_in_order() -> None: + prev_items = [_item("a", 0), _item("b", 1)] + curr_items = [_item("a", 0), _item("b", 1), _item("c", 2), _item("d", 3)] + store = _MockSnapshotStore({"snapshot_id": "snap-1", "items": prev_items}) + client = _MockSpotifyClient("snap-2", curr_items) + enqueued: list[str] = [] + + result = run_spotify_playlist_watch_job( + playlist_id="playlist-1", + spotify_client=client, + snapshot_store=store, + enqueue_track=lambda item: enqueued.append(str(item["spotify_track_id"])), + ) + + assert result["status"] == "updated" + assert result["added_count"] == 2 + assert result["enqueued"] == 2 + assert enqueued == ["c", "d"] + assert len(store.store_calls) == 1 + assert store.store_calls[0][1] == "snap-2" + + +def test_watch_job_moved_items_do_not_enqueue() -> None: + prev_items = [_item("a", 0), _item("b", 1), _item("c", 2)] + curr_items = [_item("b", 0), _item("a", 1), _item("c", 2)] + store = _MockSnapshotStore({"snapshot_id": "snap-1", "items": prev_items}) + client = _MockSpotifyClient("snap-2", curr_items) + enqueued: list[str] = [] + + result = run_spotify_playlist_watch_job( + playlist_id="playlist-1", + spotify_client=client, + snapshot_store=store, + enqueue_track=lambda item: enqueued.append(str(item["spotify_track_id"])), + ) + + assert result["status"] == "updated" + assert result["added_count"] == 0 + assert result["moved_count"] == 2 + assert result["enqueued"] == 0 + assert enqueued == [] + assert len(store.store_calls) == 1 + From 94b3f67258b145ff8c0a1892be453732fc4441d1 Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 11:40:35 -0600 Subject: [PATCH 06/45] =?UTF-8?q?feat(dev):=20add=20structured=20Codex=20p?= =?UTF-8?q?rompt=20sequence=20for=20Spotify=20resolution=20pipeline=20=09?= =?UTF-8?q?=E2=80=A2=09Introduced=2010=20incremental=20Codex=20prompt=20bl?= =?UTF-8?q?ocks=20to=20implement=20Spotify=20track=20resolution=20subsyste?= =?UTF-8?q?m=20=09=E2=80=A2=09Covers:=20=09=E2=80=A2=09Resolver=20interfac?= =?UTF-8?q?e=20definition=20=09=E2=80=A2=09Deterministic=20candidate=20sco?= =?UTF-8?q?ring=20heuristics=20=09=E2=80=A2=09Async=20search=20execution?= =?UTF-8?q?=20wrapper=20=09=E2=80=A2=09Full=20resolver=20implementation=20?= =?UTF-8?q?=09=E2=80=A2=09Unit=20tests=20for=20scoring=20+=20resolver=20be?= =?UTF-8?q?havior=20=09=E2=80=A2=09Enqueue=20integration=20with=20resolved?= =?UTF-8?q?=20media=20+=20attached=20metadata=20=09=E2=80=A2=09Download=20?= =?UTF-8?q?worker=20update=20to=20prefer=20attached=20Spotify=20metadata?= =?UTF-8?q?=20=09=E2=80=A2=09End-to-end=20integration=20test=20scaffold=20?= =?UTF-8?q?=09=E2=80=A2=09Improved=20Spotify=20search=20query=20builder=20?= =?UTF-8?q?=09=E2=80=A2=09Structured=20resolution=20logging=20=09=E2=80=A2?= =?UTF-8?q?=09Prompts=20are=20scoped=20one-task-per-block=20to=20keep=20Co?= =?UTF-8?q?dex=20focused=20and=20avoid=20breaking=20existing=20modules=20?= =?UTF-8?q?=09=E2=80=A2=09Designed=20to=20preserve=20current=20architectur?= =?UTF-8?q?e=20and=20incrementally=20integrate=20Spotify-driven=20ingestio?= =?UTF-8?q?n=20=09=E2=80=A2=09No=20runtime=20behavior=20changes=20in=20thi?= =?UTF-8?q?s=20commit=20(development=20workflow=20enhancement=20only)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- download/worker.py | 42 +++++ scheduler/jobs/spotify_playlist_watch.py | 15 ++ spotify/resolve.py | 196 +++++++++++++++++++++ spotify/search_queries.py | 20 ++- tests/test_resolver.py | 109 ++++++++++++ tests/test_resolver_scoring.py | 36 ++++ tests/test_spotify_playlist_integration.py | 136 ++++++++++++++ 7 files changed, 550 insertions(+), 4 deletions(-) create mode 100644 download/worker.py create mode 100644 spotify/resolve.py create mode 100644 tests/test_resolver.py create mode 100644 tests/test_resolver_scoring.py create mode 100644 tests/test_spotify_playlist_integration.py diff --git a/download/worker.py b/download/worker.py new file mode 100644 index 0000000..c8247c6 --- /dev/null +++ b/download/worker.py @@ -0,0 +1,42 @@ +"""Download worker behavior for resolved Spotify media jobs.""" + +from __future__ import annotations + +from typing import Any, Protocol + +from metadata.tagging import tag_file + + +class _Downloader(Protocol): + def download(self, media_url: str) -> str: + """Download a media URL and return the local file path.""" + + +class DownloadWorker: + """Worker that downloads media and applies optional music metadata tagging.""" + + def __init__(self, downloader: _Downloader) -> None: + self._downloader = downloader + + def process_job(self, job: Any) -> str: + """Process one job with music-metadata-aware flow and safe fallback behavior.""" + payload = getattr(job, "payload", None) or {} + + if payload.get("music_metadata"): + # Music metadata payloads are expected to include a resolved media URL. + resolved_media = payload.get("resolved_media") or {} + media_url = resolved_media.get("media_url") + metadata = payload.get("music_metadata") + if media_url: + # Download from the resolved media URL, then tag with attached metadata. + file_path = self._downloader.download(media_url) + tag_file(file_path, metadata) + return file_path + + # Non-music or incomplete payloads use the existing default worker behavior. + return self.default_download_and_tag(job) + + def default_download_and_tag(self, job: Any) -> str: + """Fallback behavior implemented by existing worker flows.""" + raise NotImplementedError + diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py index 10c82de..abe3b5e 100644 --- a/scheduler/jobs/spotify_playlist_watch.py +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -6,8 +6,10 @@ import logging from typing import Any, Callable +from metadata.merge import merge_metadata from spotify.client import SpotifyPlaylistClient, get_playlist_items from spotify.diff import diff_playlist +from spotify.resolve import resolve_spotify_track def _load_previous_snapshot(db: Any, playlist_id: str) -> tuple[str | None, list[dict[str, Any]]]: @@ -44,6 +46,19 @@ def _enqueue_added_track(queue: Any, item: dict[str, Any]) -> None: raise TypeError("queue does not expose a supported enqueue method") +async def enqueue_spotify_track(queue, spotify_track: dict, search_service, playlist_id: str): + """Resolve a Spotify track, merge metadata, build payload, and enqueue it.""" + resolved_media = await resolve_spotify_track(spotify_track, search_service) + merged_metadata = merge_metadata(spotify_track or {}, {}, resolved_media.get("extra") or {}) + payload = { + "playlist_id": playlist_id, + "spotify_track_id": (spotify_track or {}).get("spotify_track_id"), + "resolved_media": resolved_media, + "music_metadata": merged_metadata, + } + queue.enqueue(payload) + + def playlist_watch_job(spotify_client, db, queue, playlist_id: str) -> dict[str, Any]: """Fetch playlist snapshot, diff with DB state, enqueue added tracks, and persist new snapshot.""" pid = (playlist_id or "").strip() diff --git a/spotify/resolve.py b/spotify/resolve.py new file mode 100644 index 0000000..b1e2000 --- /dev/null +++ b/spotify/resolve.py @@ -0,0 +1,196 @@ +"""Spotify resolution stubs.""" + +from __future__ import annotations + +import logging +from typing import Any + +_LOG = logging.getLogger(__name__) + +_SOURCE_PRIORITY = ["youtube_music", "youtube", "soundcloud", "bandcamp"] + + +def log_resolution(spotify_id: str, best_candidate: dict, score: float, reason: str) -> None: + """Log a structured Spotify resolver decision. + + Example logging configuration: + ```python + import logging + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + ``` + """ + media_url = (best_candidate or {}).get("media_url") + _LOG.info( + "resolver track_id=%s best_match=%s score=%s reason=%s", + spotify_id, + media_url, + score, + reason, + ) + + +def score_search_candidates(candidates: list[dict], spotify_track: dict) -> dict: + """Return the best candidate using deterministic title/artist/duration scoring. + + Scoring behavior: + - Title match: candidates whose `title` matches the Spotify track title are + preferred. Match is case-insensitive and whitespace-normalized. + - Artist match: candidates whose `artist` (or `artist_detected`) matches the + Spotify track artist are preferred with the same normalization rules. + - Duration proximity: candidates with duration closest to the Spotify track + are preferred. Duration tolerance is +/- 3 seconds (higher preference), + then increasing absolute difference. + + Tie-breaking strategy: + - If multiple candidates have the same score tuple, source order is used. + Lower index in `_SOURCE_PRIORITY` wins. + - If source priority is also equal, original list order is preserved. + + Expected candidate fields: + - `title` + - `artist` or `artist_detected` + - `duration` (seconds) or `duration_sec` or `duration_ms` + - `source` + + The returned value is the selected candidate dictionary. If `candidates` is + empty, an empty dictionary is returned. + """ + if not candidates: + return {} + + expected_title = _normalize_text(spotify_track.get("title") or spotify_track.get("name")) + expected_artist = _normalize_text(spotify_track.get("artist")) + expected_duration_sec = _to_seconds(spotify_track) + + scored: list[tuple[tuple[int, int, int, int], int, dict]] = [] + for idx, candidate in enumerate(candidates): + candidate_title = _normalize_text(candidate.get("title")) + candidate_artist = _normalize_text(candidate.get("artist") or candidate.get("artist_detected")) + candidate_duration_sec = _to_seconds(candidate) + + title_exact = int(bool(expected_title and candidate_title == expected_title)) + artist_exact = int(bool(expected_artist and candidate_artist == expected_artist)) + + if expected_duration_sec is None or candidate_duration_sec is None: + duration_delta = 10**9 + else: + duration_delta = abs(candidate_duration_sec - expected_duration_sec) + within_tolerance = int(duration_delta <= 3) + + source_rank = _source_rank(candidate.get("source")) + score_tuple = (title_exact, artist_exact, within_tolerance, -duration_delta) + scored.append((score_tuple, source_rank, candidate)) + + # Stable sort ensures original order for identical score + source rank. + scored.sort(key=lambda item: (item[0], item[1]), reverse=True) + return scored[0][2] + + +async def execute_search(search_service, query: str) -> list[dict]: + """Run an async search and return normalized result dictionaries. + + This helper calls `search_service.search(query)`, catches/logs search + failures, and returns a normalized `list[dict]` where every item contains: + `media_url`, `title`, `duration`, `source_id`, and `extra`. + """ + try: + raw_results = await search_service.search(query) + except Exception: + _LOG.exception("Search execution failed for query=%r", query) + return [] + + if not isinstance(raw_results, list): + return [] + + normalized: list[dict] = [] + for item in raw_results: + if not isinstance(item, dict): + continue + normalized.append( + { + "media_url": item.get("media_url"), + "title": item.get("title"), + "duration": item.get("duration"), + "source_id": item.get("source_id"), + "extra": item.get("extra"), + } + ) + return normalized + + +async def resolve_spotify_track(spotify_track: dict, search_service) -> dict: + """Resolve a Spotify track dictionary into the best available media candidate. + + This function builds a deterministic query from Spotify artist/title, runs + async search execution, scores candidates, and returns the best candidate. + If no candidates are returned, it returns an empty dictionary. + """ + artist = str(spotify_track.get("artist") or "").strip() + title = str(spotify_track.get("title") or spotify_track.get("name") or "").strip() + query = f"{artist} - {title} official audio".strip() + _LOG.info("Resolving Spotify track using query=%r", query) + + results = await execute_search(search_service, query) + if not results: + _LOG.info("No search results for query=%r", query) + return {} + + # `score_search_candidates` expects `source`, while execute_search output + # uses `source_id`; map for deterministic tie-breaking compatibility. + scoring_results = [ + {**candidate, "source": candidate.get("source_id")} for candidate in results + ] + best = score_search_candidates(scoring_results, spotify_track) + if not best: + _LOG.info("No candidate selected for query=%r", query) + return {} + + # Preserve the execute_search output key shape. + best_out = { + "media_url": best.get("media_url"), + "title": best.get("title"), + "duration": best.get("duration"), + "source_id": best.get("source_id"), + "extra": best.get("extra"), + } + _LOG.info( + "Resolved Spotify track query=%r source_id=%r media_url=%r", + query, + best_out.get("source_id"), + best_out.get("media_url"), + ) + return best_out + + +def _normalize_text(value: Any) -> str: + if value is None: + return "" + return " ".join(str(value).casefold().strip().split()) + + +def _to_seconds(data: dict) -> int | None: + if "duration_ms" in data and data.get("duration_ms") is not None: + try: + return int(round(float(data["duration_ms"]) / 1000.0)) + except (TypeError, ValueError): + return None + for key in ("duration", "duration_sec"): + if data.get(key) is None: + continue + try: + return int(round(float(data[key]))) + except (TypeError, ValueError): + return None + return None + + +def _source_rank(source: Any) -> int: + src = _normalize_text(source) + try: + return len(_SOURCE_PRIORITY) - _SOURCE_PRIORITY.index(src) + except ValueError: + return 0 diff --git a/spotify/search_queries.py b/spotify/search_queries.py index ebc7dd4..8f2563c 100644 --- a/spotify/search_queries.py +++ b/spotify/search_queries.py @@ -3,12 +3,25 @@ from __future__ import annotations -def build_search_query(spotify_track: dict) -> str: - """Build a deterministic query in the format `Artist - Title official audio`.""" +def build_search_query(spotify_track: dict, prefer_official: bool = True) -> str: + """Build a deterministic search query in the form `Artist - Title {keywords}`. + + Behavior: + - Always starts with `Artist - Title`. + - Appends `official audio` when `prefer_official` is `True`. + - Appends `official music video` when `prefer_official` is `False`. + + Examples: + - `build_search_query({"artist": "Daft Punk", "title": "One More Time"})` + -> `"Daft Punk - One More Time official audio"` + - `build_search_query({"artist": "Daft Punk", "title": "One More Time"}, prefer_official=False)` + -> `"Daft Punk - One More Time official music video"` + """ track = spotify_track or {} artist = _extract_artist(track) or "Unknown Artist" title = _extract_title(track) or "Unknown Title" - return f"{artist} - {title} official audio" + keywords = "official audio" if prefer_official else "official music video" + return f"{artist} - {title} {keywords}" def _extract_artist(track: dict) -> str | None: @@ -36,4 +49,3 @@ def _extract_title(track: dict) -> str | None: if isinstance(value, str) and value.strip(): return value.strip() return None - diff --git a/tests/test_resolver.py b/tests/test_resolver.py new file mode 100644 index 0000000..aac9bce --- /dev/null +++ b/tests/test_resolver.py @@ -0,0 +1,109 @@ +import asyncio + +from spotify.resolve import resolve_spotify_track, score_search_candidates + + +def test_score_search_candidates_exact_match() -> None: + spotify_track = {"artist": "Artist A", "title": "Track A", "duration_ms": 200000} + candidates = [ + {"title": "Track A", "artist": "Artist A", "duration": 200, "source": "youtube"}, + {"title": "Track A live", "artist": "Artist A", "duration": 200, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + + assert best["title"] == "Track A" + assert best["artist"] == "Artist A" + + +def test_score_search_candidates_duration_mismatch() -> None: + spotify_track = {"artist": "Artist B", "title": "Track B", "duration_ms": 180000} + candidates = [ + {"title": "Track B", "artist": "Artist B", "duration": 181, "source": "youtube"}, + {"title": "Track B", "artist": "Artist B", "duration": 240, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + + assert best["duration"] == 181 + + +def test_score_search_candidates_tie_behavior_source_priority() -> None: + spotify_track = {"artist": "Artist C", "title": "Track C", "duration_ms": 210000} + candidates = [ + {"title": "Track C", "artist": "Artist C", "duration": 210, "source": "soundcloud"}, + {"title": "Track C", "artist": "Artist C", "duration": 210, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + + assert best["source"] == "youtube_music" + + +class _MockSearchService: + def __init__(self, results): + self._results = results + self.calls = [] + + async def search(self, query: str): + self.calls.append(query) + return self._results + + +def test_resolve_spotify_track_no_results() -> None: + search_service = _MockSearchService([]) + spotify_track = {"artist": "Artist D", "title": "Track D", "duration_ms": 180000} + + resolved = asyncio.run(resolve_spotify_track(spotify_track, search_service)) + + assert resolved == {} + assert search_service.calls == ["Artist D - Track D official audio"] + + +def test_resolve_spotify_track_single_result() -> None: + results = [ + { + "media_url": "https://example.com/one", + "title": "Track E", + "duration": 200, + "source_id": "youtube", + "extra": {"id": "1"}, + } + ] + search_service = _MockSearchService(results) + spotify_track = {"artist": "Artist E", "title": "Track E", "duration_ms": 200000} + + resolved = asyncio.run(resolve_spotify_track(spotify_track, search_service)) + + assert resolved["media_url"] == "https://example.com/one" + assert resolved["title"] == "Track E" + assert resolved["source_id"] == "youtube" + + +def test_resolve_spotify_track_multiple_results_best_match_chosen() -> None: + results = [ + { + "media_url": "https://example.com/bad", + "title": "Track F (live)", + "duration": 260, + "source_id": "youtube_music", + "extra": {"id": "bad"}, + "artist": "Artist F", + }, + { + "media_url": "https://example.com/best", + "title": "Track F", + "duration": 210, + "source_id": "youtube", + "extra": {"id": "best"}, + "artist": "Artist F", + }, + ] + search_service = _MockSearchService(results) + spotify_track = {"artist": "Artist F", "title": "Track F", "duration_ms": 210000} + + resolved = asyncio.run(resolve_spotify_track(spotify_track, search_service)) + + assert resolved["media_url"] == "https://example.com/best" + assert resolved["title"] == "Track F" + assert resolved["source_id"] == "youtube" diff --git a/tests/test_resolver_scoring.py b/tests/test_resolver_scoring.py new file mode 100644 index 0000000..07cf503 --- /dev/null +++ b/tests/test_resolver_scoring.py @@ -0,0 +1,36 @@ +from spotify.resolve import score_search_candidates + + +def test_exact_match_favored() -> None: + spotify_track = {"title": "Track One", "artist": "Artist A", "duration_ms": 200000} + candidates = [ + {"title": "Track One", "artist": "Artist A", "duration": 200, "source": "youtube"}, + {"title": "Track One (live)", "artist": "Artist A", "duration": 200, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + assert best["title"] == "Track One" + assert best["artist"] == "Artist A" + + +def test_duration_mismatch_deprioritized() -> None: + spotify_track = {"title": "Track Two", "artist": "Artist B", "duration_ms": 180000} + candidates = [ + {"title": "Track Two", "artist": "Artist B", "duration": 181, "source": "youtube"}, + {"title": "Track Two", "artist": "Artist B", "duration": 220, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + assert best["duration"] == 181 + + +def test_tie_broken_in_source_order() -> None: + spotify_track = {"title": "Track Three", "artist": "Artist C", "duration_ms": 210000} + candidates = [ + {"title": "Track Three", "artist": "Artist C", "duration": 210, "source": "soundcloud"}, + {"title": "Track Three", "artist": "Artist C", "duration": 210, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + assert best["source"] == "youtube_music" + diff --git a/tests/test_spotify_playlist_integration.py b/tests/test_spotify_playlist_integration.py new file mode 100644 index 0000000..0f05088 --- /dev/null +++ b/tests/test_spotify_playlist_integration.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +import asyncio +from typing import Any + +from metadata.types import MusicMetadata +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track + + +class _MockSpotifyClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = items + + def get_playlist_items(self, _playlist_id: str) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.items) + + +class _MockSearchService: + def __init__(self, results: list[dict[str, Any]]) -> None: + self._results = results + self.queries: list[str] = [] + + async def search(self, query: str) -> list[dict[str, Any]]: + self.queries.append(query) + return list(self._results) + + +class _MockQueue: + def __init__(self) -> None: + self.items: list[dict[str, Any]] = [] + + def enqueue(self, payload: dict[str, Any]) -> None: + self.items.append(payload) + + +def test_enqueue_spotify_track_integration_single_result() -> None: + spotify_client = _MockSpotifyClient( + "snap-1", + [ + { + "spotify_track_id": "sp-track-1", + "artist": "Artist One", + "title": "Track One", + "duration_ms": 200000, + } + ], + ) + _snapshot_id, tracks = spotify_client.get_playlist_items("playlist-1") + spotify_track = tracks[0] + + search_service = _MockSearchService( + [ + { + "media_url": "https://example.com/media-1", + "title": "Track One", + "duration": 200, + "source_id": "youtube_music", + "extra": {"lyrics": "la la"}, + } + ] + ) + queue = _MockQueue() + + asyncio.run( + enqueue_spotify_track( + queue=queue, + spotify_track=spotify_track, + search_service=search_service, + playlist_id="playlist-1", + ) + ) + + assert len(queue.items) == 1 + payload = queue.items[0] + assert payload["playlist_id"] == "playlist-1" + assert payload["spotify_track_id"] == "sp-track-1" + assert payload["resolved_media"]["media_url"] == "https://example.com/media-1" + assert isinstance(payload["music_metadata"], MusicMetadata) + assert payload["music_metadata"].title == "Track One" + assert payload["music_metadata"].artist == "Artist One" + assert search_service.queries == ["Artist One - Track One official audio"] + + +def test_enqueue_spotify_track_integration_best_result_selected() -> None: + spotify_client = _MockSpotifyClient( + "snap-2", + [ + { + "spotify_track_id": "sp-track-2", + "artist": "Artist Two", + "title": "Track Two", + "duration_ms": 210000, + } + ], + ) + _snapshot_id, tracks = spotify_client.get_playlist_items("playlist-2") + spotify_track = tracks[0] + + search_service = _MockSearchService( + [ + { + "media_url": "https://example.com/worse", + "title": "Track Two (live)", + "duration": 260, + "source_id": "youtube_music", + "extra": {}, + }, + { + "media_url": "https://example.com/best", + "title": "Track Two", + "duration": 210, + "source_id": "youtube", + "extra": {"genre": "Rock"}, + }, + ] + ) + queue = _MockQueue() + + asyncio.run( + enqueue_spotify_track( + queue=queue, + spotify_track=spotify_track, + search_service=search_service, + playlist_id="playlist-2", + ) + ) + + assert len(queue.items) == 1 + payload = queue.items[0] + assert payload["resolved_media"]["media_url"] == "https://example.com/best" + assert payload["resolved_media"]["source_id"] == "youtube" + assert isinstance(payload["music_metadata"], MusicMetadata) + assert payload["music_metadata"].title == "Track Two" + assert payload["music_metadata"].artist == "Artist Two" + From 6fbb363db80ca1d8d6c455bda7ab38c866cbf46a Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 11:52:56 -0600 Subject: [PATCH 07/45] =?UTF-8?q?feat(spotify):=20add=20ISRC-based=20idemp?= =?UTF-8?q?otency=20guard=20and=20persistent=20download=20registry=20=09?= =?UTF-8?q?=E2=80=A2=09Added=20downloaded=5Fmusic=5Ftracks=20table=20with?= =?UTF-8?q?=20unique=20constraint=20on=20(playlist=5Fid,=20isrc)=20=09?= =?UTF-8?q?=E2=80=A2=09Introduced=20DB=20helpers:=20=09=E2=80=A2=09has=5Fd?= =?UTF-8?q?ownloaded=5Fisrc=20=09=E2=80=A2=09record=5Fdownloaded=5Ftrack?= =?UTF-8?q?=20=09=E2=80=A2=09Updated=20Spotify=20enqueue=20flow=20to:=20?= =?UTF-8?q?=09=E2=80=A2=09Skip=20tracks=20already=20downloaded=20for=20pla?= =?UTF-8?q?ylist=20via=20ISRC=20=09=E2=80=A2=09Log=20duplicate=20skip=20ev?= =?UTF-8?q?ents=20=09=E2=80=A2=09Allow=20fallback=20when=20ISRC=20missing?= =?UTF-8?q?=20=09=E2=80=A2=09Updated=20download=20worker=20to:=20=09?= =?UTF-8?q?=E2=80=A2=09Record=20successful=20downloads=20into=20registry?= =?UTF-8?q?=20=09=E2=80=A2=09Only=20record=20after=20successful=20download?= =?UTF-8?q?=20+=20tagging=20=09=E2=80=A2=09Added=20rollback=20support=20to?= =?UTF-8?q?=20migration=20=09=E2=80=A2=09Added=20unit=20tests:=20=09?= =?UTF-8?q?=E2=80=A2=09ISRC=20guard=20behavior=20=09=E2=80=A2=09Duplicate?= =?UTF-8?q?=20skip=20validation=20=09=E2=80=A2=09Registry=20helper=20corre?= =?UTF-8?q?ctness=20=09=E2=80=A2=09Full=20idempotency=20integration=20scen?= =?UTF-8?q?ario?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change enforces persistent playlist-level idempotency and prevents duplicate Spotify track downloads across scheduler runs. --- db/downloaded_tracks.py | 74 ++++++++++++++++ db/migrations.py | 29 ++++++ download/worker.py | 9 +- scheduler/jobs/spotify_playlist_watch.py | 13 ++- tests/test_downloaded_tracks.py | 29 ++++++ tests/test_idempotency_full_pipeline.py | 107 +++++++++++++++++++++++ tests/test_idempotency_skip.py | 105 ++++++++++++++++++++++ 7 files changed, 364 insertions(+), 2 deletions(-) create mode 100644 db/downloaded_tracks.py create mode 100644 tests/test_downloaded_tracks.py create mode 100644 tests/test_idempotency_full_pipeline.py create mode 100644 tests/test_idempotency_skip.py diff --git a/db/downloaded_tracks.py b/db/downloaded_tracks.py new file mode 100644 index 0000000..a573390 --- /dev/null +++ b/db/downloaded_tracks.py @@ -0,0 +1,74 @@ +"""Persistence helpers for downloaded Spotify tracks by ISRC.""" + +from __future__ import annotations + +import os +import sqlite3 + +from db.migrations import ensure_downloaded_music_tracks_table + +_DEFAULT_DB_ENV_KEY = "RETREIVR_DB_PATH" + + +def _resolve_db_path() -> str: + return os.environ.get(_DEFAULT_DB_ENV_KEY, os.path.join(os.getcwd(), "retreivr.sqlite3")) + + +def _connect(db_path: str | None = None) -> sqlite3.Connection: + conn = sqlite3.connect(db_path or _resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + ensure_downloaded_music_tracks_table(conn) + return conn + + +def has_downloaded_isrc(playlist_id: str, isrc: str) -> bool: + """Return True when an ISRC is already recorded for a playlist.""" + pid = (playlist_id or "").strip() + track_isrc = (isrc or "").strip() + if not pid or not track_isrc: + return False + + conn = _connect() + try: + cur = conn.cursor() + cur.execute( + """ + SELECT 1 + FROM downloaded_music_tracks + WHERE playlist_id=? AND isrc=? + LIMIT 1 + """, + (pid, track_isrc), + ) + return cur.fetchone() is not None + finally: + conn.close() + + +def record_downloaded_track(playlist_id: str, isrc: str, file_path: str) -> None: + """Insert a downloaded track record for playlist/idempotency tracking.""" + pid = (playlist_id or "").strip() + track_isrc = (isrc or "").strip() + path = (file_path or "").strip() + if not pid: + raise ValueError("playlist_id is required") + if not track_isrc: + raise ValueError("isrc is required") + if not path: + raise ValueError("file_path is required") + + conn = _connect() + try: + cur = conn.cursor() + cur.execute( + """ + INSERT OR IGNORE INTO downloaded_music_tracks (playlist_id, isrc, file_path) + VALUES (?, ?, ?) + """, + (pid, track_isrc, path), + ) + conn.commit() + finally: + conn.close() + diff --git a/db/migrations.py b/db/migrations.py index 6388e14..a6ed48f 100644 --- a/db/migrations.py +++ b/db/migrations.py @@ -44,3 +44,32 @@ def ensure_playlist_snapshot_tables(conn: sqlite3.Connection) -> None: "ON playlist_snapshot_items (snapshot_id, spotify_track_id, position)" ) conn.commit() + + +def ensure_downloaded_music_tracks_table(conn: sqlite3.Connection) -> None: + """Create downloaded Spotify tracks table and idempotency index.""" + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE IF NOT EXISTS downloaded_music_tracks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + playlist_id TEXT NOT NULL, + isrc TEXT NOT NULL, + file_path TEXT NOT NULL, + downloaded_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + cur.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS uq_downloaded_music_tracks_playlist_isrc " + "ON downloaded_music_tracks (playlist_id, isrc)" + ) + conn.commit() + + +def rollback_downloaded_music_tracks_table(conn: sqlite3.Connection) -> None: + """Rollback downloaded Spotify tracks table migration.""" + cur = conn.cursor() + cur.execute("DROP INDEX IF EXISTS uq_downloaded_music_tracks_playlist_isrc") + cur.execute("DROP TABLE IF EXISTS downloaded_music_tracks") + conn.commit() diff --git a/download/worker.py b/download/worker.py index c8247c6..b8454fb 100644 --- a/download/worker.py +++ b/download/worker.py @@ -4,6 +4,7 @@ from typing import Any, Protocol +from db.downloaded_tracks import record_downloaded_track from metadata.tagging import tag_file @@ -31,6 +32,13 @@ def process_job(self, job: Any) -> str: # Download from the resolved media URL, then tag with attached metadata. file_path = self._downloader.download(media_url) tag_file(file_path, metadata) + # Record idempotency state only after download and tagging both succeed. + playlist_id = payload.get("playlist_id") + isrc = getattr(metadata, "isrc", None) + if not isrc and isinstance(metadata, dict): + isrc = metadata.get("isrc") + if playlist_id and isrc: + record_downloaded_track(str(playlist_id), str(isrc), file_path) return file_path # Non-music or incomplete payloads use the existing default worker behavior. @@ -39,4 +47,3 @@ def process_job(self, job: Any) -> str: def default_download_and_tag(self, job: Any) -> str: """Fallback behavior implemented by existing worker flows.""" raise NotImplementedError - diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py index abe3b5e..214cfa4 100644 --- a/scheduler/jobs/spotify_playlist_watch.py +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -6,6 +6,7 @@ import logging from typing import Any, Callable +from db.downloaded_tracks import has_downloaded_isrc from metadata.merge import merge_metadata from spotify.client import SpotifyPlaylistClient, get_playlist_items from spotify.diff import diff_playlist @@ -47,7 +48,17 @@ def _enqueue_added_track(queue: Any, item: dict[str, Any]) -> None: async def enqueue_spotify_track(queue, spotify_track: dict, search_service, playlist_id: str): - """Resolve a Spotify track, merge metadata, build payload, and enqueue it.""" + """Resolve a Spotify track, merge metadata, build payload, and enqueue it. + + Idempotency skip is applied only when a non-empty ISRC exists and that + `(playlist_id, isrc)` has already been recorded as downloaded. Tracks with + missing/empty ISRC are always treated as normal enqueue candidates. + """ + track_isrc = str((spotify_track or {}).get("isrc") or "").strip() + if track_isrc and has_downloaded_isrc(playlist_id, track_isrc): + logging.info("skip duplicate isrc=%s playlist=%s", track_isrc, playlist_id) + return + resolved_media = await resolve_spotify_track(spotify_track, search_service) merged_metadata = merge_metadata(spotify_track or {}, {}, resolved_media.get("extra") or {}) payload = { diff --git a/tests/test_downloaded_tracks.py b/tests/test_downloaded_tracks.py new file mode 100644 index 0000000..4c6b6f8 --- /dev/null +++ b/tests/test_downloaded_tracks.py @@ -0,0 +1,29 @@ +from db.downloaded_tracks import has_downloaded_isrc, record_downloaded_track + + +def test_record_downloaded_track_and_lookup(tmp_path, monkeypatch) -> None: + db_path = tmp_path / "downloaded_tracks.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + + record_downloaded_track( + playlist_id="playlist-a", + isrc="USABC1234567", + file_path="/music/playlist-a/01 - Track.mp3", + ) + + assert has_downloaded_isrc("playlist-a", "USABC1234567") is True + + +def test_has_downloaded_isrc_false_for_other_playlist_or_isrc(tmp_path, monkeypatch) -> None: + db_path = tmp_path / "downloaded_tracks.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + + record_downloaded_track( + playlist_id="playlist-a", + isrc="USABC1234567", + file_path="/music/playlist-a/01 - Track.mp3", + ) + + assert has_downloaded_isrc("playlist-b", "USABC1234567") is False + assert has_downloaded_isrc("playlist-a", "USZZZ9999999") is False + diff --git a/tests/test_idempotency_full_pipeline.py b/tests/test_idempotency_full_pipeline.py new file mode 100644 index 0000000..f0ee720 --- /dev/null +++ b/tests/test_idempotency_full_pipeline.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from db.downloaded_tracks import has_downloaded_isrc +from download.worker import DownloadWorker +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track + + +class _MockQueue: + def __init__(self) -> None: + self.items: list[dict[str, Any]] = [] + + def enqueue(self, payload: dict[str, Any]) -> None: + self.items.append(payload) + + +class _MockSearchService: + def __init__(self, results_by_query: dict[str, list[dict[str, Any]]]) -> None: + self._results_by_query = results_by_query + self.calls: list[str] = [] + + async def search(self, query: str) -> list[dict[str, Any]]: + self.calls.append(query) + return list(self._results_by_query.get(query, [])) + + +class _MockDownloader: + def __init__(self) -> None: + self.calls: list[str] = [] + + def download(self, media_url: str) -> str: + self.calls.append(media_url) + tail = media_url.rsplit("/", 1)[-1] or "track" + return f"/tmp/{tail}.mp3" + + +def test_idempotency_full_pipeline_two_tracks(tmp_path, monkeypatch) -> None: + db_path = tmp_path / "idempotency.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + monkeypatch.setattr("download.worker.tag_file", lambda _file_path, _metadata: None) + + playlist_id = "playlist-42" + tracks = [ + { + "spotify_track_id": "sp-track-1", + "artist": "Artist One", + "title": "Track One", + "isrc": "USAAA1111111", + "duration_ms": 200000, + }, + { + "spotify_track_id": "sp-track-2", + "artist": "Artist Two", + "title": "Track Two", + "isrc": "USBBB2222222", + "duration_ms": 210000, + }, + ] + search_service = _MockSearchService( + { + "Artist One - Track One official audio": [ + { + "media_url": "https://example.test/one", + "title": "Track One", + "duration": 200, + "source_id": "youtube_music", + "extra": {}, + } + ], + "Artist Two - Track Two official audio": [ + { + "media_url": "https://example.test/two", + "title": "Track Two", + "duration": 210, + "source_id": "youtube_music", + "extra": {}, + } + ], + } + ) + queue = _MockQueue() + + # First pass: enqueue and process both tracks, recording downloaded ISRCs. + for track in tracks: + asyncio.run(enqueue_spotify_track(queue, track, search_service, playlist_id)) + assert len(queue.items) == 2 + + downloader = _MockDownloader() + worker = DownloadWorker(downloader) + for payload in list(queue.items): + worker.process_job(SimpleNamespace(payload=payload)) + + assert has_downloaded_isrc(playlist_id, "USAAA1111111") is True + assert has_downloaded_isrc(playlist_id, "USBBB2222222") is True + first_pass_queries = list(search_service.calls) + assert len(first_pass_queries) == 2 + + # Second pass: same playlist + ISRC should be skipped before resolve/enqueue. + for track in tracks: + asyncio.run(enqueue_spotify_track(queue, track, search_service, playlist_id)) + + assert len(queue.items) == 2 + assert search_service.calls == first_pass_queries + diff --git a/tests/test_idempotency_skip.py b/tests/test_idempotency_skip.py new file mode 100644 index 0000000..507f28b --- /dev/null +++ b/tests/test_idempotency_skip.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import asyncio +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track + + +class _MockQueue: + def __init__(self) -> None: + self.enqueued: list[dict[str, Any]] = [] + + def enqueue(self, payload: dict[str, Any]) -> None: + self.enqueued.append(payload) + + +class _MockSearchService: + def __init__(self, results: list[dict[str, Any]]) -> None: + self._results = results + self.calls: list[str] = [] + + async def search(self, query: str) -> list[dict[str, Any]]: + self.calls.append(query) + return list(self._results) + + +def test_enqueue_spotify_track_skips_when_isrc_already_downloaded(monkeypatch) -> None: + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.has_downloaded_isrc", + lambda playlist_id, isrc: True, + ) + queue = _MockQueue() + search_service = _MockSearchService( + [ + { + "media_url": "https://example.com/track", + "title": "Track One", + "duration": 210, + "source_id": "youtube_music", + "extra": {"lyrics": "la la"}, + } + ] + ) + spotify_track = { + "spotify_track_id": "sp-track-1", + "artist": "Artist One", + "title": "Track One", + "isrc": "USABC1234567", + "duration_ms": 210000, + } + + asyncio.run( + enqueue_spotify_track( + queue=queue, + spotify_track=spotify_track, + search_service=search_service, + playlist_id="playlist-a", + ) + ) + + assert queue.enqueued == [] + assert search_service.calls == [] + + +def test_enqueue_spotify_track_enqueues_when_isrc_not_downloaded(monkeypatch) -> None: + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.has_downloaded_isrc", + lambda playlist_id, isrc: False, + ) + queue = _MockQueue() + search_service = _MockSearchService( + [ + { + "media_url": "https://example.com/track", + "title": "Track Two", + "duration": 205, + "source_id": "youtube_music", + "extra": {"genre": "Pop"}, + } + ] + ) + spotify_track = { + "spotify_track_id": "sp-track-2", + "artist": "Artist Two", + "title": "Track Two", + "isrc": "USZZZ9999999", + "duration_ms": 205000, + } + + asyncio.run( + enqueue_spotify_track( + queue=queue, + spotify_track=spotify_track, + search_service=search_service, + playlist_id="playlist-b", + ) + ) + + assert len(queue.enqueued) == 1 + payload = queue.enqueued[0] + assert payload["playlist_id"] == "playlist-b" + assert payload["spotify_track_id"] == "sp-track-2" + assert payload["resolved_media"]["media_url"] == "https://example.com/track" + assert search_service.calls == ["Artist Two - Track Two official audio"] + From a86acbc2d8c0d39f3608be09b73477cb30529f86 Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 13:01:19 -0600 Subject: [PATCH 08/45] =?UTF-8?q?feat(spotify):=20add=20media=20duration?= =?UTF-8?q?=20validation=20layer=20with=20configurable=20enforcement=20=09?= =?UTF-8?q?=E2=80=A2=09Added=20media/ffprobe.py=20wrapper=20to=20extract?= =?UTF-8?q?=20media=20duration=20via=20ffprobe=20=09=E2=80=A2=09Added=20me?= =?UTF-8?q?dia/validation.py=20with=20validate=5Fduration=20helper=20enfor?= =?UTF-8?q?cing=20duration=20tolerance=20=09=E2=80=A2=09Introduced=20confi?= =?UTF-8?q?gurable=20settings:=20=09=E2=80=A2=09ENABLE=5FDURATION=5FVALIDA?= =?UTF-8?q?TION=20=09=E2=80=A2=09SPOTIFY=5FDURATION=5FTOLERANCE=5FSECONDS?= =?UTF-8?q?=20=09=E2=80=A2=09Integrated=20duration=20validation=20into=20d?= =?UTF-8?q?ownload=20worker:=20=09=E2=80=A2=09Validates=20Spotify=20music?= =?UTF-8?q?=20jobs=20before=20tagging=20and=20recording=20=09=E2=80=A2=09S?= =?UTF-8?q?kips=20tagging=20and=20ISRC=20recording=20on=20mismatch=20=09?= =?UTF-8?q?=E2=80=A2=09Introduces=20new=20job=20status:=20validation=5Ffai?= =?UTF-8?q?led=20=09=E2=80=A2=09Added=20structured=20WARNING=20logs=20for?= =?UTF-8?q?=20validation=20failures=20(actual=20vs=20expected=20vs=20toler?= =?UTF-8?q?ance)=20=09=E2=80=A2=09Ensured=20failed=20validation=20does=20n?= =?UTF-8?q?ot=20persist=20idempotency=20records=20=09=E2=80=A2=09Added=20u?= =?UTF-8?q?nit=20tests:=20=09=E2=80=A2=09Duration=20validation=20behavior?= =?UTF-8?q?=20=09=E2=80=A2=09Worker=20validation=20enforcement=20=09?= =?UTF-8?q?=E2=80=A2=09Configurable=20tolerance=20effects=20=09=E2=80=A2?= =?UTF-8?q?=09Full=20pipeline=20validation=20guard=20(no=20ISRC=20record?= =?UTF-8?q?=20on=20failure)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces post-download media verification to prevent incorrect track versions from being permanently recorded, strengthening Spotify ingestion reliability and data integrity. --- config/settings.py | 9 +++ download/worker.py | 56 +++++++++++++++ media/ffprobe.py | 57 +++++++++++++++ media/validation.py | 41 +++++++++++ tests/test_full_validation_pipeline.py | 96 ++++++++++++++++++++++++++ tests/test_media_validation.py | 47 +++++++++++++ tests/test_validation_config.py | 71 +++++++++++++++++++ tests/test_worker_validation.py | 56 +++++++++++++++ 8 files changed, 433 insertions(+) create mode 100644 config/settings.py create mode 100644 media/ffprobe.py create mode 100644 media/validation.py create mode 100644 tests/test_full_validation_pipeline.py create mode 100644 tests/test_media_validation.py create mode 100644 tests/test_validation_config.py create mode 100644 tests/test_worker_validation.py diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000..9e06cd2 --- /dev/null +++ b/config/settings.py @@ -0,0 +1,9 @@ +"""Application settings constants.""" + +from __future__ import annotations + +# Toggle for enabling or disabling media-duration checks. +ENABLE_DURATION_VALIDATION = True + +# Allowed absolute difference between expected and actual media duration. +SPOTIFY_DURATION_TOLERANCE_SECONDS = 5.0 diff --git a/download/worker.py b/download/worker.py index b8454fb..f7879b8 100644 --- a/download/worker.py +++ b/download/worker.py @@ -2,11 +2,28 @@ from __future__ import annotations +import logging from typing import Any, Protocol +from config.settings import ENABLE_DURATION_VALIDATION, SPOTIFY_DURATION_TOLERANCE_SECONDS from db.downloaded_tracks import record_downloaded_track +from media.ffprobe import get_media_duration +from media.validation import validate_duration from metadata.tagging import tag_file +logger = logging.getLogger(__name__) + +JOB_STATUS_COMPLETED = "completed" +JOB_STATUS_FAILED = "failed" +JOB_STATUS_CANCELLED = "cancelled" +JOB_STATUS_VALIDATION_FAILED = "validation_failed" +JOB_ALLOWED_STATUSES = { + JOB_STATUS_COMPLETED, + JOB_STATUS_FAILED, + JOB_STATUS_CANCELLED, + JOB_STATUS_VALIDATION_FAILED, +} + class _Downloader(Protocol): def download(self, media_url: str) -> str: @@ -31,6 +48,35 @@ def process_job(self, job: Any) -> str: if media_url: # Download from the resolved media URL, then tag with attached metadata. file_path = self._downloader.download(media_url) + # Optionally enforce duration validation before any file tagging/write side effects. + if ENABLE_DURATION_VALIDATION: + expected_ms = None + if isinstance(metadata, dict): + expected_ms = metadata.get("expected_ms") + else: + expected_ms = getattr(metadata, "expected_ms", None) + + if expected_ms is not None: + if not validate_duration( + file_path, + int(expected_ms), + SPOTIFY_DURATION_TOLERANCE_SECONDS, + ): + expected_seconds = int(expected_ms) / 1000.0 + actual_seconds = float("nan") + try: + actual_seconds = get_media_duration(file_path) + except Exception: + logger.exception("failed to retrieve actual duration for validation log") + logger.warning( + "validation_failed actual=%.2fs expected=%.2fs tolerance=%.2f", + actual_seconds, + expected_seconds, + SPOTIFY_DURATION_TOLERANCE_SECONDS, + ) + self._set_job_status(job, payload, JOB_STATUS_VALIDATION_FAILED) + return file_path + tag_file(file_path, metadata) # Record idempotency state only after download and tagging both succeed. playlist_id = payload.get("playlist_id") @@ -39,6 +85,7 @@ def process_job(self, job: Any) -> str: isrc = metadata.get("isrc") if playlist_id and isrc: record_downloaded_track(str(playlist_id), str(isrc), file_path) + self._set_job_status(job, payload, JOB_STATUS_COMPLETED) return file_path # Non-music or incomplete payloads use the existing default worker behavior. @@ -47,3 +94,12 @@ def process_job(self, job: Any) -> str: def default_download_and_tag(self, job: Any) -> str: """Fallback behavior implemented by existing worker flows.""" raise NotImplementedError + + @staticmethod + def _set_job_status(job: Any, payload: Any, status: str) -> None: + """Set worker job status using the supported terminal status values.""" + if status not in JOB_ALLOWED_STATUSES: + raise ValueError(f"unsupported job status: {status}") + setattr(job, "status", status) + if isinstance(payload, dict): + payload["status"] = status diff --git a/media/ffprobe.py b/media/ffprobe.py new file mode 100644 index 0000000..caf9913 --- /dev/null +++ b/media/ffprobe.py @@ -0,0 +1,57 @@ +"""Wrapper utilities for retrieving media information using ffprobe.""" + +from __future__ import annotations + +import json +import subprocess + + +def get_media_duration(file_path: str) -> float: + """Return media duration in seconds using ``ffprobe`` JSON output. + + The function executes ``ffprobe`` for the provided file, parses the JSON + payload, and returns ``format.duration`` as a float. + + Raises: + RuntimeError: If ``ffprobe`` execution fails or the command is missing. + ValueError: If duration data is missing or not parseable as a float. + """ + command = [ + "ffprobe", + "-v", + "error", + "-print_format", + "json", + "-show_format", + file_path, + ] + + try: + completed = subprocess.run( + command, + capture_output=True, + text=True, + check=True, + timeout=15, + ) + except FileNotFoundError as exc: + raise RuntimeError("ffprobe is not installed or not available in PATH") from exc + except subprocess.TimeoutExpired as exc: + raise RuntimeError(f"ffprobe timed out while probing: {file_path}") from exc + except subprocess.CalledProcessError as exc: + stderr_text = (exc.stderr or "").strip() + raise RuntimeError(f"ffprobe failed for {file_path}: {stderr_text or exc}") from exc + + try: + payload = json.loads(completed.stdout or "{}") + except json.JSONDecodeError as exc: + raise ValueError(f"ffprobe returned invalid JSON for {file_path}") from exc + + duration_value = (payload.get("format") or {}).get("duration") + if duration_value in (None, ""): + raise ValueError(f"ffprobe did not return a duration for {file_path}") + + try: + return float(duration_value) + except (TypeError, ValueError) as exc: + raise ValueError(f"ffprobe returned a non-numeric duration for {file_path}") from exc diff --git a/media/validation.py b/media/validation.py new file mode 100644 index 0000000..797a05c --- /dev/null +++ b/media/validation.py @@ -0,0 +1,41 @@ +"""Media validation helpers.""" + +from __future__ import annotations + +import logging + +from media.ffprobe import get_media_duration + +logger = logging.getLogger(__name__) + + +def validate_duration(file_path: str, expected_ms: int, tolerance_seconds: float = 5.0) -> bool: + """Validate that a media file duration is within tolerance of an expected value. + + The function resolves the actual duration in seconds by calling + :func:`media.ffprobe.get_media_duration`, converts ``expected_ms`` from + milliseconds to seconds, and compares the absolute delta. + + Returns: + ``True`` when ``abs(actual_seconds - expected_seconds) <= tolerance_seconds``. + ``False`` when the duration falls outside tolerance or probing fails. + + Constraints: + - ``expected_ms`` and ``tolerance_seconds`` must be non-negative. + - Any ffprobe/probe parsing error is handled non-fatally and returns ``False``. + """ + if expected_ms < 0: + logger.warning("Duration validation failed: expected_ms must be non-negative") + return False + if tolerance_seconds < 0: + logger.warning("Duration validation failed: tolerance_seconds must be non-negative") + return False + + try: + actual_duration_seconds = get_media_duration(file_path) + except Exception: + logger.exception("Failed to probe media duration for path=%s", file_path) + return False + + expected_seconds = expected_ms / 1000.0 + return abs(actual_duration_seconds - expected_seconds) <= tolerance_seconds diff --git a/tests/test_full_validation_pipeline.py b/tests/test_full_validation_pipeline.py new file mode 100644 index 0000000..81024b7 --- /dev/null +++ b/tests/test_full_validation_pipeline.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from db.downloaded_tracks import has_downloaded_isrc +from download.worker import DownloadWorker, JOB_STATUS_VALIDATION_FAILED +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track + + +class _MockQueue: + def __init__(self) -> None: + self.items: list[dict[str, Any]] = [] + + def enqueue(self, payload: dict[str, Any]) -> None: + self.items.append(payload) + + +class _MockSearchService: + def __init__(self, results: list[dict[str, Any]]) -> None: + self._results = results + self.calls: list[str] = [] + + async def search(self, query: str) -> list[dict[str, Any]]: + self.calls.append(query) + return list(self._results) + + +class _MockDownloader: + def download(self, media_url: str) -> str: + return "/tmp/resolved-track.mp3" + + +def test_full_pipeline_validation_failure_does_not_enable_idempotent_skip(tmp_path, monkeypatch) -> None: + db_path = tmp_path / "validation_pipeline.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + + # Force worker validation to fail before tagging/recording. + monkeypatch.setattr("download.worker.validate_duration", lambda *_args, **_kwargs: False) + monkeypatch.setattr("download.worker.get_media_duration", lambda _path: 1.0) + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + + # Ensure queued payload metadata includes expected_ms for validation gating. + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.merge_metadata", + lambda spotify_data, _mb, _ytdlp: { + "title": spotify_data.get("title", "Unknown"), + "artist": spotify_data.get("artist", "Unknown"), + "album": "Unknown", + "album_artist": spotify_data.get("artist", "Unknown"), + "track_num": 1, + "disc_num": 1, + "date": "Unknown", + "genre": "Unknown", + "isrc": spotify_data.get("isrc"), + "expected_ms": spotify_data.get("duration_ms"), + }, + ) + + playlist_id = "playlist-validation" + spotify_track = { + "spotify_track_id": "sp-track-1", + "artist": "Artist One", + "title": "Track One", + "isrc": "USVAL1234567", + "duration_ms": 200_000, + } + search_service = _MockSearchService( + [ + { + "media_url": "https://example.test/track-one", + "title": "Track One", + "duration": 200, + "source_id": "youtube_music", + "extra": {}, + } + ] + ) + queue = _MockQueue() + + # First pass: enqueue + worker processing with forced validation failure. + asyncio.run(enqueue_spotify_track(queue, spotify_track, search_service, playlist_id)) + assert len(queue.items) == 1 + + worker = DownloadWorker(_MockDownloader()) + job = SimpleNamespace(payload=queue.items[0]) + worker.process_job(job) + assert job.status == JOB_STATUS_VALIDATION_FAILED + assert has_downloaded_isrc(playlist_id, spotify_track["isrc"]) is False + first_pass_calls = list(search_service.calls) + + # Second pass: should not be skipped because ISRC was never recorded. + asyncio.run(enqueue_spotify_track(queue, spotify_track, search_service, playlist_id)) + assert len(queue.items) == 2 + assert len(search_service.calls) == len(first_pass_calls) + 1 diff --git a/tests/test_media_validation.py b/tests/test_media_validation.py new file mode 100644 index 0000000..f9aca71 --- /dev/null +++ b/tests/test_media_validation.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import shutil +import wave +from pathlib import Path + +import pytest + +from media.validation import validate_duration + +_FFPROBE_AVAILABLE = shutil.which("ffprobe") is not None + + +def _write_silent_wav(path: Path, duration_seconds: float, sample_rate: int = 44_100) -> None: + nframes = int(duration_seconds * sample_rate) + with wave.open(str(path), "wb") as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(sample_rate) + wav_file.writeframes(b"\x00\x00" * nframes) + + +@pytest.mark.skipif(not _FFPROBE_AVAILABLE, reason="ffprobe is required for duration probe tests") +def test_validate_duration_returns_true_within_tolerance(tmp_path: Path) -> None: + audio_path = tmp_path / "short.wav" + _write_silent_wav(audio_path, duration_seconds=1.0) + + assert validate_duration(str(audio_path), expected_ms=1_000, tolerance_seconds=0.5) is True + + +@pytest.mark.skipif(not _FFPROBE_AVAILABLE, reason="ffprobe is required for duration probe tests") +def test_validate_duration_returns_false_when_duration_differs_significantly(tmp_path: Path) -> None: + audio_path = tmp_path / "short.wav" + _write_silent_wav(audio_path, duration_seconds=1.0) + + assert validate_duration(str(audio_path), expected_ms=10_000, tolerance_seconds=1.0) is False + + +def test_validate_duration_returns_false_when_probe_fails(monkeypatch, tmp_path: Path) -> None: + audio_path = tmp_path / "missing-or-invalid.wav" + + def _raise_probe_error(_file_path: str) -> float: + raise RuntimeError("ffprobe failed") + + monkeypatch.setattr("media.validation.get_media_duration", _raise_probe_error) + + assert validate_duration(str(audio_path), expected_ms=1_000, tolerance_seconds=0.5) is False diff --git a/tests/test_validation_config.py b/tests/test_validation_config.py new file mode 100644 index 0000000..14927fb --- /dev/null +++ b/tests/test_validation_config.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from types import SimpleNamespace + +from download.worker import ( + DownloadWorker, + JOB_STATUS_COMPLETED, + JOB_STATUS_VALIDATION_FAILED, +) + + +class _MockDownloader: + def download(self, media_url: str) -> str: + return "/tmp/mock-audio.mp3" + + +def _job() -> SimpleNamespace: + return SimpleNamespace( + payload={ + "playlist_id": "playlist-1", + "spotify_track_id": "track-1", + "resolved_media": {"media_url": "https://example.test/audio"}, + "music_metadata": { + "title": "Track", + "artist": "Artist", + "isrc": "USABC1234567", + "expected_ms": 1_000, + }, + } + ) + + +def test_duration_tolerance_config_changes_validation_outcome(monkeypatch) -> None: + recorded: list[tuple[str, str, str]] = [] + + monkeypatch.setattr("download.worker.ENABLE_DURATION_VALIDATION", True) + monkeypatch.setattr( + "download.worker.record_downloaded_track", + lambda playlist_id, isrc, file_path: recorded.append((playlist_id, isrc, file_path)), + ) + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + + # Deterministic validator model: actual=1.20s, expected=1.00s (delta=0.20s). + monkeypatch.setattr( + "download.worker.validate_duration", + lambda _file_path, expected_ms, tolerance_seconds: abs(1.2 - (expected_ms / 1000.0)) + <= tolerance_seconds, + ) + + worker = DownloadWorker(_MockDownloader()) + + # Baseline tolerance: passes. + monkeypatch.setattr("download.worker.SPOTIFY_DURATION_TOLERANCE_SECONDS", 0.30) + first_job = _job() + worker.process_job(first_job) + assert first_job.status == JOB_STATUS_COMPLETED + assert len(recorded) == 1 + + # Very small tolerance: same track now fails validation. + monkeypatch.setattr("download.worker.SPOTIFY_DURATION_TOLERANCE_SECONDS", 0.05) + second_job = _job() + worker.process_job(second_job) + assert second_job.status == JOB_STATUS_VALIDATION_FAILED + assert len(recorded) == 1 + + # Increased tolerance again: track passes. + monkeypatch.setattr("download.worker.SPOTIFY_DURATION_TOLERANCE_SECONDS", 0.30) + third_job = _job() + worker.process_job(third_job) + assert third_job.status == JOB_STATUS_COMPLETED + assert len(recorded) == 2 diff --git a/tests/test_worker_validation.py b/tests/test_worker_validation.py new file mode 100644 index 0000000..5c134ab --- /dev/null +++ b/tests/test_worker_validation.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import wave +from pathlib import Path +from types import SimpleNamespace + +from download.worker import DownloadWorker, JOB_STATUS_VALIDATION_FAILED + + +class _MockDownloader: + def __init__(self, output_path: Path) -> None: + self.output_path = output_path + + def download(self, media_url: str) -> str: + with wave.open(str(self.output_path), "wb") as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(44_100) + wav_file.writeframes(b"\x00\x00" * 44_100) # 1 second of silence + return str(self.output_path) + + +def test_worker_sets_validation_failed_and_skips_record(monkeypatch, tmp_path: Path) -> None: + recorded_calls: list[tuple[str, str, str]] = [] + + monkeypatch.setattr( + "download.worker.record_downloaded_track", + lambda playlist_id, isrc, file_path: recorded_calls.append((playlist_id, isrc, file_path)), + ) + monkeypatch.setattr( + "download.worker.validate_duration", + lambda file_path, expected_ms, tolerance_seconds: False, + ) + monkeypatch.setattr("download.worker.get_media_duration", lambda file_path: 1.0) + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + + file_path = tmp_path / "short.wav" + worker = DownloadWorker(_MockDownloader(file_path)) + job = SimpleNamespace( + payload={ + "playlist_id": "playlist-1", + "spotify_track_id": "track-1", + "resolved_media": {"media_url": "https://example.test/audio"}, + "music_metadata": { + "title": "Track", + "artist": "Artist", + "isrc": "USABC1234567", + "expected_ms": 180_000, # far from 1-second file + }, + } + ) + + worker.process_job(job) + + assert recorded_calls == [] + assert job.status == JOB_STATUS_VALIDATION_FAILED From 300a3a68c69ab5029a3edb656dd635f9b21614c5 Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 13:18:09 -0600 Subject: [PATCH 09/45] =?UTF-8?q?feat(metadata):=20add=20canonical=20norma?= =?UTF-8?q?lization=20layer=20for=20album-grade=20Spotify=20ingestion=20?= =?UTF-8?q?=09=E2=80=A2=09Introduced=20metadata/normalize.py=20with=20norm?= =?UTF-8?q?alize=5Fmusic=5Fmetadata=20as=20canonical=20metadata=20sanitati?= =?UTF-8?q?on=20layer=20=09=E2=80=A2=09Added=20deterministic=20title=20cle?= =?UTF-8?q?anup=20rules=20(removal=20of=20resolver=20artifacts=20like=20?= =?UTF-8?q?=E2=80=9COfficial=20Audio=E2=80=9D,=20=E2=80=9C[HD]=E2=80=9D,?= =?UTF-8?q?=20etc.)=20=09=E2=80=A2=09Implemented=20featured=20artist=20nor?= =?UTF-8?q?malization=20policy=20(move=20feat./ft.=20to=20title=20when=20a?= =?UTF-8?q?ppropriate)=20=09=E2=80=A2=09Enforced=20album=5Fartist=20consis?= =?UTF-8?q?tency=20for=20proper=20album=20grouping=20across=20media=20play?= =?UTF-8?q?ers=20=09=E2=80=A2=09Added=20date=20normalization=20logic=20(YY?= =?UTF-8?q?YY=20/=20YYYY-MM-DD=20handling=20with=20graceful=20fallback)=20?= =?UTF-8?q?=09=E2=80=A2=09Implemented=20genre=20deduplication=20and=20norm?= =?UTF-8?q?alization=20=09=E2=80=A2=09Applied=20Unicode=20NFC=20normalizat?= =?UTF-8?q?ion=20to=20prevent=20duplicate=20album=20grouping=20issues=20?= =?UTF-8?q?=09=E2=80=A2=09Integrated=20normalization=20into=20worker=20pip?= =?UTF-8?q?eline=20(download=20=E2=86=92=20validate=20=E2=86=92=20normaliz?= =?UTF-8?q?e=20=E2=86=92=20tag=20=E2=86=92=20record)=20=09=E2=80=A2=09Adde?= =?UTF-8?q?d=20comprehensive=20unit=20tests=20for=20normalization=20behavi?= =?UTF-8?q?or=20=09=E2=80=A2=09Added=20album-level=20integration=20test=20?= =?UTF-8?q?to=20verify=20consistent=20grouping=20across=20multi-track=20do?= =?UTF-8?q?wnloads?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change hardens metadata integrity and ensures album downloads remain clean, consistent, and correctly grouped in Apple Music, Jellyfin, Plex, and other media players. --- download/worker.py | 28 +++- metadata/normalize.py | 220 +++++++++++++++++++++++++++ tests/test_album_consistency.py | 52 +++++++ tests/test_metadata_normalization.py | 97 ++++++++++++ 4 files changed, 396 insertions(+), 1 deletion(-) create mode 100644 metadata/normalize.py create mode 100644 tests/test_album_consistency.py create mode 100644 tests/test_metadata_normalization.py diff --git a/download/worker.py b/download/worker.py index f7879b8..51e0d07 100644 --- a/download/worker.py +++ b/download/worker.py @@ -9,7 +9,9 @@ from db.downloaded_tracks import record_downloaded_track from media.ffprobe import get_media_duration from media.validation import validate_duration +from metadata.normalize import normalize_music_metadata from metadata.tagging import tag_file +from metadata.types import MusicMetadata logger = logging.getLogger(__name__) @@ -77,7 +79,9 @@ def process_job(self, job: Any) -> str: self._set_job_status(job, payload, JOB_STATUS_VALIDATION_FAILED) return file_path - tag_file(file_path, metadata) + metadata_obj = self._coerce_music_metadata(metadata) + normalized_metadata = normalize_music_metadata(metadata_obj) + tag_file(file_path, normalized_metadata) # Record idempotency state only after download and tagging both succeed. playlist_id = payload.get("playlist_id") isrc = getattr(metadata, "isrc", None) @@ -103,3 +107,25 @@ def _set_job_status(job: Any, payload: Any, status: str) -> None: setattr(job, "status", status) if isinstance(payload, dict): payload["status"] = status + + @staticmethod + def _coerce_music_metadata(metadata: Any) -> MusicMetadata: + """Coerce payload metadata into ``MusicMetadata`` for normalization/tagging.""" + if isinstance(metadata, MusicMetadata): + return metadata + + payload = metadata if isinstance(metadata, dict) else {} + return MusicMetadata( + title=str(payload.get("title") or "Unknown Title"), + artist=str(payload.get("artist") or "Unknown Artist"), + album=str(payload.get("album") or "Unknown Album"), + album_artist=str(payload.get("album_artist") or payload.get("artist") or "Unknown Artist"), + track_num=int(payload.get("track_num") or 1), + disc_num=int(payload.get("disc_num") or 1), + date=str(payload.get("date") or "Unknown"), + genre=str(payload.get("genre") or "Unknown"), + isrc=(str(payload.get("isrc")).strip() if payload.get("isrc") else None), + mbid=(str(payload.get("mbid")).strip() if payload.get("mbid") else None), + artwork=payload.get("artwork"), + lyrics=(str(payload.get("lyrics")).strip() if payload.get("lyrics") else None), + ) diff --git a/metadata/normalize.py b/metadata/normalize.py new file mode 100644 index 0000000..5488637 --- /dev/null +++ b/metadata/normalize.py @@ -0,0 +1,220 @@ +"""Normalization helpers for structured music metadata.""" + +from __future__ import annotations + +import logging +import re +import unicodedata +from datetime import date +from typing import Any + +from metadata.types import MusicMetadata + +logger = logging.getLogger(__name__) + +_WHITESPACE_RE = re.compile(r"\s+") +_YEAR_RE = re.compile(r"^(\d{4})") +_DATE_RE = re.compile(r"^(\d{4})[-/](\d{1,2})[-/](\d{1,2})$") +_YEAR_MONTH_RE = re.compile(r"^(\d{4})[-/](\d{1,2})$") +_TITLE_SUFFIX_RE = re.compile( + r"\s*(?:\((?:official audio|official video|audio)\)|\[(?:hd)\])\s*$", + re.IGNORECASE, +) +_TOPIC_SUFFIX_RE = re.compile(r"\s*-\s*topic\s*$", re.IGNORECASE) +_TRAILING_HYPHENS_RE = re.compile(r"(?:\s*-\s*)+$") +_FEAT_SPLIT_RE = re.compile(r"^(?P
.+?)\s+(?:feat\.|ft\.)\s+(?P.+)$", re.IGNORECASE) +_TITLE_FEAT_RE = re.compile(r"\(\s*feat\.\s*([^)]+)\)", re.IGNORECASE) + + +def normalize_music_metadata(metadata: MusicMetadata) -> MusicMetadata: + """Return a normalized copy of ``MusicMetadata`` without mutating the input. + + Responsibilities: + - Normalize all string fields to Unicode NFC. + This matters for media-library grouping because visually identical Unicode + strings can have different binary forms; NFC avoids duplicate album/artist + buckets caused by mixed normalization forms. + - Strip leading/trailing whitespace. + - Collapse repeated internal whitespace to single spaces. + - Normalize ``track_num`` and ``disc_num`` to integers. + - Normalize ``date`` to ``YYYY`` or ``YYYY-MM-DD`` when parseable. + - Ensure ``album_artist`` is non-empty by falling back to ``artist``. + + The returned value is always a newly constructed ``MusicMetadata`` instance. + """ + # NFC normalization is applied via _normalize_text for stable player grouping. + title = clean_title(_normalize_text(metadata.title)) or "Unknown Title" + artist = _normalize_text(metadata.artist) or "Unknown Artist" + artist, title = normalize_featured_artists(artist, title) + album = _normalize_text(metadata.album) or "Unknown Album" + # Media players group albums by album_artist; blank/variant values fragment one album. + album_artist_raw = _normalize_optional_text(metadata.album_artist) + if not album_artist_raw: + # Fallback to track artist so all tracks in the same release can group together. + album_artist = artist + else: + album_artist = album_artist_raw + # When artist fields include comma-separated collaborators, keep primary artist for grouping. + album_artist = _primary_artist(album_artist) + genre = _normalize_genre(metadata.genre) or "Unknown" + normalized_date = _normalize_release_date(metadata.date) or "Unknown" + + isrc = _normalize_optional_text(metadata.isrc) + mbid = _normalize_optional_text(metadata.mbid) + lyrics = _normalize_optional_text(metadata.lyrics) + artwork = bytes(metadata.artwork) if metadata.artwork is not None else None + + track_num = _normalize_positive_int(metadata.track_num, default=1) + disc_num = _normalize_positive_int(metadata.disc_num, default=1) + + return MusicMetadata( + title=title, + artist=artist, + album=album, + album_artist=album_artist, + track_num=track_num, + disc_num=disc_num, + date=normalized_date, + genre=genre, + isrc=isrc, + mbid=mbid, + artwork=artwork, + lyrics=lyrics, + ) + + +def clean_title(title: str) -> str: + """Return a deterministically cleaned track title. + + Cleanup rules: + - Remove trailing ``(Official Audio)``, ``(Official Video)``, ``(Audio)``, and ``[HD]``. + - Remove trailing ``- Topic``. + - Remove trailing hyphen artifacts. + - Preserve other parenthetical context such as ``(Live)``. + """ + cleaned = _normalize_text(title) + while True: + updated = _TITLE_SUFFIX_RE.sub("", cleaned) + updated = _TOPIC_SUFFIX_RE.sub("", updated) + updated = _TRAILING_HYPHENS_RE.sub("", updated) + updated = _normalize_text(updated) if updated else "" + if updated == cleaned: + break + cleaned = updated + return cleaned + + +def normalize_featured_artists(artist: str, title: str) -> tuple[str, str]: + """Normalize featured artist credits between artist and title fields. + + If ``artist`` includes ``feat.``/``ft.`` credits, move the featured segment + into ``title`` as ``(feat. X)`` and keep only the main artist name in + ``artist``. Existing title feat credits are preserved and not duplicated. + Matching is case-insensitive. + """ + normalized_artist = _normalize_text(artist) + normalized_title = _normalize_text(title) + + match = _FEAT_SPLIT_RE.match(normalized_artist) + if not match: + return normalized_artist, normalized_title + + main_artist = _normalize_text(match.group("main")) + featured_segment = _normalize_text(match.group("feat")) + if not featured_segment: + return main_artist, normalized_title + + existing = {_normalize_text(item).lower() for item in _TITLE_FEAT_RE.findall(normalized_title)} + if featured_segment.lower() in existing: + return main_artist, normalized_title + + return main_artist, f"{normalized_title} (feat. {featured_segment})" + + +def _normalize_text(value: str) -> str: + return _WHITESPACE_RE.sub(" ", unicodedata.normalize("NFC", value).strip()) + + +def _normalize_optional_text(value: str | None) -> str | None: + if value is None: + return None + normalized = _normalize_text(value) + return normalized or None + + +def _primary_artist(value: str) -> str: + primary = value.split(",", 1)[0] + normalized = _normalize_text(primary) + return normalized or value + + +def _normalize_genre(value: Any) -> str | None: + if value is None: + return None + + raw_parts: list[str] + if isinstance(value, list): + raw_parts = [str(part) for part in value] + else: + raw_parts = re.split(r"[;,]", str(value)) + + seen: set[str] = set() + ordered: list[str] = [] + for part in raw_parts: + normalized = _normalize_text(part) + if not normalized: + continue + key = normalized.casefold() + if key in seen: + continue + seen.add(key) + ordered.append(normalized) + + if not ordered: + return None + return ", ".join(ordered) + + +def _normalize_positive_int(value: int, *, default: int) -> int: + try: + parsed = int(value) + except (TypeError, ValueError): + return default + return parsed if parsed > 0 else default + + +def _normalize_release_date(value: str) -> str | None: + normalized = _normalize_text(value) + if not normalized: + return None + + # YYYY + if normalized.isdigit() and len(normalized) == 4: + return normalized + + # YYYY-MM -> YYYY + year_month_match = _YEAR_MONTH_RE.match(normalized) + if year_month_match: + year_s, month_s = year_month_match.groups() + month = int(month_s) + if 1 <= month <= 12: + return year_s + + # YYYY-MM-DD (or slash-separated equivalent) -> YYYY-MM-DD + match = _DATE_RE.match(normalized) + if match: + year_s, month_s, day_s = match.groups() + try: + parsed = date(int(year_s), int(month_s), int(day_s)) + except ValueError: + return _YEAR_RE.match(normalized).group(1) if _YEAR_RE.match(normalized) else None + return parsed.isoformat() + + # Invalid formats: strip to first 4 digits when present. + year_match = _YEAR_RE.match(normalized) + if year_match: + return year_match.group(1) + + # No usable year; keep the source value but surface inconsistency. + logger.warning("unparseable release date; preserving original value=%s", normalized) + return normalized diff --git a/tests/test_album_consistency.py b/tests/test_album_consistency.py new file mode 100644 index 0000000..626c020 --- /dev/null +++ b/tests/test_album_consistency.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from metadata.normalize import normalize_music_metadata +from metadata.types import MusicMetadata + + +def test_album_download_metadata_normalization_consistency() -> None: + track_three = MusicMetadata( + title="Song Three - Topic", + artist="Main Artist", + album="Album Name", + album_artist="Main Artist", + track_num=3, + disc_num=1, + date="2024/07/11", + genre="Pop", + isrc="USAAA1111113", + ) + track_three.album_artist = "" + + tracks = [ + MusicMetadata( + title="Song One (Official Audio)", + artist="Main Artist", + album="Album Name", + album_artist="Main Artist", + track_num=1, + disc_num=1, + date="2024-07", + genre="Pop", + isrc="USAAA1111111", + ), + MusicMetadata( + title="Song Two [HD]", + artist="Main Artist", + album="Album Name", + album_artist="Main Artist, Guest Artist", + track_num=2, + disc_num=1, + date="2024", + genre="Pop", + isrc="USAAA1111112", + ), + track_three, + ] + + normalized = [normalize_music_metadata(track) for track in tracks] + + assert {track.album_artist for track in normalized} == {"Main Artist"} + assert [track.title for track in normalized] == ["Song One", "Song Two", "Song Three"] + assert [track.date for track in normalized] == ["2024", "2024", "2024-07-11"] + assert [track.track_num for track in normalized] == [1, 2, 3] diff --git a/tests/test_metadata_normalization.py b/tests/test_metadata_normalization.py new file mode 100644 index 0000000..03cd102 --- /dev/null +++ b/tests/test_metadata_normalization.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import unicodedata + +from metadata.normalize import normalize_music_metadata +from metadata.types import MusicMetadata + + +def _metadata(**overrides) -> MusicMetadata: + base = { + "title": "Song", + "artist": "Artist", + "album": "Album", + "album_artist": "Album Artist", + "track_num": 1, + "disc_num": 1, + "date": "2024", + "genre": "Pop", + "isrc": "USABC1234567", + "mbid": "mbid-1", + "artwork": None, + "lyrics": None, + } + base.update(overrides) + return MusicMetadata(**base) + + +def test_title_cleanup_rules() -> None: + metadata = _metadata(title=" Song Name (Official Audio) - Topic - ") + + normalized = normalize_music_metadata(metadata) + + assert normalized.title == "Song Name" + + +def test_featured_artist_moves_into_title() -> None: + metadata = _metadata(artist="Main Artist ft. Guest Artist", title="My Track") + + normalized = normalize_music_metadata(metadata) + + assert normalized.artist == "Main Artist" + assert normalized.title == "My Track (feat. Guest Artist)" + + +def test_album_artist_fallback_and_primary_artist_grouping() -> None: + missing_album_artist = _metadata(artist="Lead Artist") + missing_album_artist.album_artist = "" + + normalized_missing = normalize_music_metadata(missing_album_artist) + assert normalized_missing.album_artist == "Lead Artist" + + multi_album_artist = _metadata(album_artist="Lead Artist, Guest One, Guest Two") + normalized_multi = normalize_music_metadata(multi_album_artist) + assert normalized_multi.album_artist == "Lead Artist" + + +def test_date_normalization_cases() -> None: + year_only = normalize_music_metadata(_metadata(date="2024")) + year_month = normalize_music_metadata(_metadata(date="2024-07")) + full_date = normalize_music_metadata(_metadata(date="2024-07-09")) + invalid_with_year = normalize_music_metadata(_metadata(date="2024-99-99")) + invalid_no_year = normalize_music_metadata(_metadata(date="Unknown date string")) + + assert year_only.date == "2024" + assert year_month.date == "2024" + assert full_date.date == "2024-07-09" + assert invalid_with_year.date == "2024" + assert invalid_no_year.date == "Unknown date string" + + +def test_genre_deduplication_and_casing_from_first_occurrence() -> None: + metadata = _metadata(genre=" Pop ; pop, ROCK, Rock , Jazz ") + + normalized = normalize_music_metadata(metadata) + + assert normalized.genre == "Pop, ROCK, Jazz" + + +def test_unicode_normalization_nfc_applies_to_core_grouping_fields() -> None: + decomposed = "Cafe\u0301" + metadata = _metadata( + title=f"{decomposed} Song", + artist=decomposed, + album=decomposed, + album_artist=decomposed, + genre=decomposed, + ) + + normalized = normalize_music_metadata(metadata) + + expected = unicodedata.normalize("NFC", decomposed) + assert expected == "Café" + assert normalized.title == "Café Song" + assert normalized.artist == "Café" + assert normalized.album == "Café" + assert normalized.album_artist == "Café" + assert normalized.genre == "Café" From b978a5975c4a79895af42d47b15855c3125cad75 Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 14:12:38 -0600 Subject: [PATCH 10/45] fixes to the download worker --- download/worker.py | 133 +++++++++++++++++---------- tests/test_worker_return_contract.py | 51 ++++++++++ 2 files changed, 136 insertions(+), 48 deletions(-) create mode 100644 tests/test_worker_return_contract.py diff --git a/download/worker.py b/download/worker.py index 51e0d07..7aa75ef 100644 --- a/download/worker.py +++ b/download/worker.py @@ -3,7 +3,8 @@ from __future__ import annotations import logging -from typing import Any, Protocol +import re +from typing import Any, Optional, Protocol from config.settings import ENABLE_DURATION_VALIDATION, SPOTIFY_DURATION_TOLERANCE_SECONDS from db.downloaded_tracks import record_downloaded_track @@ -38,8 +39,14 @@ class DownloadWorker: def __init__(self, downloader: _Downloader) -> None: self._downloader = downloader - def process_job(self, job: Any) -> str: - """Process one job with music-metadata-aware flow and safe fallback behavior.""" + def process_job(self, job: Any) -> dict[str, str | None]: + """Process one job and return a structured status/file-path result. + + Returns: + A dict with keys: + - ``status``: one of ``completed``, ``failed``, ``validation_failed``. + - ``file_path``: output path when completed, otherwise ``None``. + """ payload = getattr(job, "payload", None) or {} if payload.get("music_metadata"): @@ -48,52 +55,58 @@ def process_job(self, job: Any) -> str: media_url = resolved_media.get("media_url") metadata = payload.get("music_metadata") if media_url: - # Download from the resolved media URL, then tag with attached metadata. - file_path = self._downloader.download(media_url) - # Optionally enforce duration validation before any file tagging/write side effects. - if ENABLE_DURATION_VALIDATION: - expected_ms = None - if isinstance(metadata, dict): - expected_ms = metadata.get("expected_ms") - else: - expected_ms = getattr(metadata, "expected_ms", None) - - if expected_ms is not None: - if not validate_duration( - file_path, - int(expected_ms), - SPOTIFY_DURATION_TOLERANCE_SECONDS, - ): - expected_seconds = int(expected_ms) / 1000.0 - actual_seconds = float("nan") - try: - actual_seconds = get_media_duration(file_path) - except Exception: - logger.exception("failed to retrieve actual duration for validation log") - logger.warning( - "validation_failed actual=%.2fs expected=%.2fs tolerance=%.2f", - actual_seconds, - expected_seconds, + try: + # Download from the resolved media URL, then tag with attached metadata. + file_path = self._downloader.download(media_url) + # Optionally enforce duration validation before any file tagging/write side effects. + if ENABLE_DURATION_VALIDATION: + expected_ms = None + if isinstance(metadata, dict): + expected_ms = metadata.get("expected_ms") + else: + expected_ms = getattr(metadata, "expected_ms", None) + + if expected_ms is not None: + if not validate_duration( + file_path, + int(expected_ms), SPOTIFY_DURATION_TOLERANCE_SECONDS, - ) - self._set_job_status(job, payload, JOB_STATUS_VALIDATION_FAILED) - return file_path - - metadata_obj = self._coerce_music_metadata(metadata) - normalized_metadata = normalize_music_metadata(metadata_obj) - tag_file(file_path, normalized_metadata) - # Record idempotency state only after download and tagging both succeed. - playlist_id = payload.get("playlist_id") - isrc = getattr(metadata, "isrc", None) - if not isrc and isinstance(metadata, dict): - isrc = metadata.get("isrc") - if playlist_id and isrc: - record_downloaded_track(str(playlist_id), str(isrc), file_path) - self._set_job_status(job, payload, JOB_STATUS_COMPLETED) - return file_path + ): + expected_seconds = int(expected_ms) / 1000.0 + actual_seconds = float("nan") + try: + actual_seconds = get_media_duration(file_path) + except Exception: + logger.exception("failed to retrieve actual duration for validation log") + logger.warning( + "validation_failed actual=%.2fs expected=%.2fs tolerance=%.2f", + actual_seconds, + expected_seconds, + SPOTIFY_DURATION_TOLERANCE_SECONDS, + ) + self._set_job_status(job, payload, JOB_STATUS_VALIDATION_FAILED) + return {"status": JOB_STATUS_VALIDATION_FAILED, "file_path": None} + + metadata_obj = self._coerce_music_metadata(metadata) + normalized_metadata = normalize_music_metadata(metadata_obj) + tag_file(file_path, normalized_metadata) + # Record idempotency state only after download and tagging both succeed. + playlist_id = payload.get("playlist_id") + isrc = getattr(metadata, "isrc", None) + if not isrc and isinstance(metadata, dict): + isrc = metadata.get("isrc") + if playlist_id and isrc: + record_downloaded_track(str(playlist_id), str(isrc), file_path) + self._set_job_status(job, payload, JOB_STATUS_COMPLETED) + return {"status": JOB_STATUS_COMPLETED, "file_path": file_path} + except Exception: + logger.exception("music job processing failed") + self._set_job_status(job, payload, JOB_STATUS_FAILED) + return {"status": JOB_STATUS_FAILED, "file_path": None} # Non-music or incomplete payloads use the existing default worker behavior. - return self.default_download_and_tag(job) + file_path = self.default_download_and_tag(job) + return {"status": JOB_STATUS_COMPLETED, "file_path": file_path} def default_download_and_tag(self, job: Any) -> str: """Fallback behavior implemented by existing worker flows.""" @@ -115,13 +128,15 @@ def _coerce_music_metadata(metadata: Any) -> MusicMetadata: return metadata payload = metadata if isinstance(metadata, dict) else {} + track_num = safe_int(payload.get("track_num")) + disc_num = safe_int(payload.get("disc_num")) return MusicMetadata( title=str(payload.get("title") or "Unknown Title"), artist=str(payload.get("artist") or "Unknown Artist"), album=str(payload.get("album") or "Unknown Album"), album_artist=str(payload.get("album_artist") or payload.get("artist") or "Unknown Artist"), - track_num=int(payload.get("track_num") or 1), - disc_num=int(payload.get("disc_num") or 1), + track_num=track_num if track_num is not None and track_num > 0 else 1, + disc_num=disc_num if disc_num is not None and disc_num > 0 else 1, date=str(payload.get("date") or "Unknown"), genre=str(payload.get("genre") or "Unknown"), isrc=(str(payload.get("isrc")).strip() if payload.get("isrc") else None), @@ -129,3 +144,25 @@ def _coerce_music_metadata(metadata: Any) -> MusicMetadata: artwork=payload.get("artwork"), lyrics=(str(payload.get("lyrics")).strip() if payload.get("lyrics") else None), ) + + +def safe_int(value: Any) -> Optional[int]: + """Parse an integer from mixed input, returning ``None`` when unavailable. + + The parser extracts the first numeric portion from string inputs, e.g. + ``"01/12" -> 1`` and ``"Disc 1" -> 1``. ``None`` and non-numeric values + return ``None``. + """ + if value is None: + return None + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + match = re.search(r"\d+", str(value)) + if not match: + return None + try: + return int(match.group(0)) + except (TypeError, ValueError): + return None diff --git a/tests/test_worker_return_contract.py b/tests/test_worker_return_contract.py new file mode 100644 index 0000000..d81a0be --- /dev/null +++ b/tests/test_worker_return_contract.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from types import SimpleNamespace + +from download.worker import JOB_STATUS_VALIDATION_FAILED, DownloadWorker, safe_int + + +class _MockDownloader: + def download(self, media_url: str) -> str: + return "/tmp/mock-track.mp3" + + +def test_process_job_returns_validation_failed_without_file_path(monkeypatch) -> None: + recorded: list[tuple[str, str, str]] = [] + + monkeypatch.setattr("download.worker.validate_duration", lambda *_args, **_kwargs: False) + monkeypatch.setattr("download.worker.get_media_duration", lambda _path: 1.0) + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + monkeypatch.setattr( + "download.worker.record_downloaded_track", + lambda playlist_id, isrc, file_path: recorded.append((playlist_id, isrc, file_path)), + ) + + worker = DownloadWorker(_MockDownloader()) + job = SimpleNamespace( + payload={ + "playlist_id": "playlist-1", + "spotify_track_id": "track-1", + "resolved_media": {"media_url": "https://example.test/audio"}, + "music_metadata": { + "title": "Track One", + "artist": "Artist One", + "isrc": "USABC1234567", + "expected_ms": 180_000, + }, + } + ) + + result = worker.process_job(job) + + assert result == {"status": JOB_STATUS_VALIDATION_FAILED, "file_path": None} + assert job.status == JOB_STATUS_VALIDATION_FAILED + assert recorded == [] + + +def test_safe_int_parses_or_returns_none_for_malformed_values() -> None: + assert safe_int("01/12") == 1 + assert safe_int("") is None + assert safe_int(None) is None + assert safe_int("Disc 1") == 1 + assert safe_int("no number") is None From 95eaf8280cb1da1ad2bd0de2fd6f27b16d65b5cc Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 14:21:22 -0600 Subject: [PATCH 11/45] feat(worker): enforce canonical music path flow before tagging Imported canonical path helpers in worker.py: build_music_path, ensure_parent_dir Marked the music-processing section with: # === Canonical Path Enforcement Starts Here === Updated music job flow to: derive extension from temp download path build canonical path from normalized metadata ensure parent directories exist move temp file to canonical path tag file at canonical path return canonical path in process_job result Added explicit failure handling: move failure -> log + {"status": "failed", "file_path": None} tagging failure -> log + {"status": "failed", "file_path": None} Ensured idempotency persistence uses final canonical path: record_downloaded_track(..., file_path=str(canonical_path)) only after successful move + tagging never on validation_failed or move/tag failure Added regression coverage: test_worker_canonical_path.py verifies returned canonical path, file moved to canonical location, temp file removed --- .../Unknown Album/Disc 1/01 - Track One.mp3 | 1 + .../Unknown Album/Disc 1/01 - Track Two.mp3 | 1 + .../Unknown Album/Disc 1/01 - Track.mp3 | 1 + download/worker.py | 44 +++++++++- media/path_builder.py | 58 +++++++++++++ tests/test_idempotency_full_pipeline.py | 6 +- tests/test_path_builder.py | 84 +++++++++++++++++++ tests/test_validation_config.py | 5 +- tests/test_worker_canonical_path.py | 47 +++++++++++ 9 files changed, 241 insertions(+), 6 deletions(-) create mode 100644 Music/Artist One/Unknown Album/Disc 1/01 - Track One.mp3 create mode 100644 Music/Artist Two/Unknown Album/Disc 1/01 - Track Two.mp3 create mode 100644 Music/Artist/Unknown Album/Disc 1/01 - Track.mp3 create mode 100644 media/path_builder.py create mode 100644 tests/test_path_builder.py create mode 100644 tests/test_worker_canonical_path.py diff --git a/Music/Artist One/Unknown Album/Disc 1/01 - Track One.mp3 b/Music/Artist One/Unknown Album/Disc 1/01 - Track One.mp3 new file mode 100644 index 0000000..950ea57 --- /dev/null +++ b/Music/Artist One/Unknown Album/Disc 1/01 - Track One.mp3 @@ -0,0 +1 @@ +mock-audio \ No newline at end of file diff --git a/Music/Artist Two/Unknown Album/Disc 1/01 - Track Two.mp3 b/Music/Artist Two/Unknown Album/Disc 1/01 - Track Two.mp3 new file mode 100644 index 0000000..950ea57 --- /dev/null +++ b/Music/Artist Two/Unknown Album/Disc 1/01 - Track Two.mp3 @@ -0,0 +1 @@ +mock-audio \ No newline at end of file diff --git a/Music/Artist/Unknown Album/Disc 1/01 - Track.mp3 b/Music/Artist/Unknown Album/Disc 1/01 - Track.mp3 new file mode 100644 index 0000000..950ea57 --- /dev/null +++ b/Music/Artist/Unknown Album/Disc 1/01 - Track.mp3 @@ -0,0 +1 @@ +mock-audio \ No newline at end of file diff --git a/download/worker.py b/download/worker.py index 7aa75ef..a1b245a 100644 --- a/download/worker.py +++ b/download/worker.py @@ -4,11 +4,14 @@ import logging import re +import shutil +from pathlib import Path from typing import Any, Optional, Protocol from config.settings import ENABLE_DURATION_VALIDATION, SPOTIFY_DURATION_TOLERANCE_SECONDS from db.downloaded_tracks import record_downloaded_track from media.ffprobe import get_media_duration +from media.path_builder import build_music_path, ensure_parent_dir from media.validation import validate_duration from metadata.normalize import normalize_music_metadata from metadata.tagging import tag_file @@ -89,16 +92,33 @@ def process_job(self, job: Any) -> dict[str, str | None]: metadata_obj = self._coerce_music_metadata(metadata) normalized_metadata = normalize_music_metadata(metadata_obj) - tag_file(file_path, normalized_metadata) + # === Canonical Path Enforcement Starts Here === + temp_path = Path(file_path) + ext = temp_path.suffix.lstrip(".") + root_path = self._resolve_music_root(payload) + canonical_path = build_music_path(root_path, normalized_metadata, ext) + ensure_parent_dir(canonical_path) + try: + shutil.move(str(temp_path), str(canonical_path)) + except Exception: + logger.exception("failed to move file to canonical path path=%s", canonical_path) + self._set_job_status(job, payload, JOB_STATUS_FAILED) + return {"status": JOB_STATUS_FAILED, "file_path": None} + try: + tag_file(str(canonical_path), normalized_metadata) + except Exception: + logger.exception("failed to tag canonical file path=%s", canonical_path) + self._set_job_status(job, payload, JOB_STATUS_FAILED) + return {"status": JOB_STATUS_FAILED, "file_path": None} # Record idempotency state only after download and tagging both succeed. playlist_id = payload.get("playlist_id") isrc = getattr(metadata, "isrc", None) if not isrc and isinstance(metadata, dict): isrc = metadata.get("isrc") if playlist_id and isrc: - record_downloaded_track(str(playlist_id), str(isrc), file_path) + record_downloaded_track(str(playlist_id), str(isrc), str(canonical_path)) self._set_job_status(job, payload, JOB_STATUS_COMPLETED) - return {"status": JOB_STATUS_COMPLETED, "file_path": file_path} + return {"status": JOB_STATUS_COMPLETED, "file_path": str(canonical_path)} except Exception: logger.exception("music job processing failed") self._set_job_status(job, payload, JOB_STATUS_FAILED) @@ -145,6 +165,24 @@ def _coerce_music_metadata(metadata: Any) -> MusicMetadata: lyrics=(str(payload.get("lyrics")).strip() if payload.get("lyrics") else None), ) + @staticmethod + def _resolve_music_root(payload: dict[str, Any]) -> Path: + """Resolve music root path from existing payload/config fields.""" + config = payload.get("config") if isinstance(payload, dict) else None + root_value = ( + payload.get("music_root") + or payload.get("destination") + or payload.get("destination_dir") + or payload.get("output_dir") + or (config.get("music_download_folder") if isinstance(config, dict) else None) + or "." + ) + root = Path(str(root_value)) + # build_music_path already inserts the "Music/" segment. + if root.name.lower() == "music": + return root.parent if str(root.parent) != "" else Path(".") + return root + def safe_int(value: Any) -> Optional[int]: """Parse an integer from mixed input, returning ``None`` when unavailable. diff --git a/media/path_builder.py b/media/path_builder.py new file mode 100644 index 0000000..9fdfd10 --- /dev/null +++ b/media/path_builder.py @@ -0,0 +1,58 @@ +"""Canonical music path construction utilities.""" + +from __future__ import annotations + +import re +from pathlib import Path + +from metadata.types import MusicMetadata + +_INVALID_FS_CHARS_RE = re.compile(r'[<>:"/\\|?*]') +_MULTISPACE_RE = re.compile(r"\s+") + + +def sanitize_for_filesystem(value: str) -> str: + """Return a filesystem-safe string with invalid characters removed.""" + sanitized = _INVALID_FS_CHARS_RE.sub("", str(value)) + sanitized = _MULTISPACE_RE.sub(" ", sanitized).strip() + sanitized = sanitized.rstrip(" .") + return sanitized or "Unknown" + + +def build_music_path(root: Path, metadata: MusicMetadata, ext: str) -> Path: + """Build and return a canonical music path without creating directories. + + Layout: + Music/ + {album_artist}/ + {album} ({year})/ + Disc {disc_num}/ + {track_num:02d} - {title}.{ext} + """ + album_artist = sanitize_for_filesystem(metadata.album_artist or metadata.artist or "Unknown Artist") + album = sanitize_for_filesystem(metadata.album or "Unknown Album") + title = sanitize_for_filesystem(metadata.title or "Unknown Title") + + date_value = (metadata.date or "").strip() + year = date_value[:4] if len(date_value) >= 4 and date_value[:4].isdigit() else "" + album_folder = f"{album} ({year})" if year else album + + track_num_raw = getattr(metadata, "track_num", None) + track_num = int(track_num_raw) if isinstance(track_num_raw, int) else 0 + if track_num < 0: + track_num = 0 + + disc_num_raw = getattr(metadata, "disc_num", None) + disc_num = int(disc_num_raw) if isinstance(disc_num_raw, int) and disc_num_raw > 0 else 1 + + extension = str(ext or "").lstrip(".") + filename = f"{track_num:02d} - {title}" + if extension: + filename = f"{filename}.{extension}" + + return root / "Music" / album_artist / album_folder / f"Disc {disc_num}" / filename + + +def ensure_parent_dir(path: Path) -> None: + """Ensure the parent directory for a file path exists.""" + path.parent.mkdir(parents=True, exist_ok=True) diff --git a/tests/test_idempotency_full_pipeline.py b/tests/test_idempotency_full_pipeline.py index f0ee720..0389489 100644 --- a/tests/test_idempotency_full_pipeline.py +++ b/tests/test_idempotency_full_pipeline.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +from pathlib import Path from types import SimpleNamespace from typing import Any @@ -34,7 +35,9 @@ def __init__(self) -> None: def download(self, media_url: str) -> str: self.calls.append(media_url) tail = media_url.rsplit("/", 1)[-1] or "track" - return f"/tmp/{tail}.mp3" + path = Path(f"/tmp/{tail}.mp3") + path.write_bytes(b"mock-audio") + return str(path) def test_idempotency_full_pipeline_two_tracks(tmp_path, monkeypatch) -> None: @@ -104,4 +107,3 @@ def test_idempotency_full_pipeline_two_tracks(tmp_path, monkeypatch) -> None: assert len(queue.items) == 2 assert search_service.calls == first_pass_queries - diff --git a/tests/test_path_builder.py b/tests/test_path_builder.py new file mode 100644 index 0000000..c1d0000 --- /dev/null +++ b/tests/test_path_builder.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from pathlib import Path + +from media.path_builder import build_music_path +from metadata.types import MusicMetadata + + +def _metadata(**overrides) -> MusicMetadata: + base = { + "title": "Track Title", + "artist": "Artist Name", + "album": "Album Name", + "album_artist": "Artist Name", + "track_num": 1, + "disc_num": 1, + "date": "2024-01-10", + "genre": "Pop", + } + base.update(overrides) + return MusicMetadata(**base) + + +def test_single_disc_album_with_year() -> None: + path = build_music_path(Path("/library"), _metadata(), "mp3") + + assert path == Path("/library/Music/Artist Name/Album Name (2024)/Disc 1/01 - Track Title.mp3") + + +def test_multi_disc_album() -> None: + path = build_music_path(Path("/library"), _metadata(disc_num=2, track_num=7), "flac") + + assert path == Path("/library/Music/Artist Name/Album Name (2024)/Disc 2/07 - Track Title.flac") + + +def test_missing_year_omits_parentheses() -> None: + metadata = _metadata() + metadata.date = "" + + path = build_music_path(Path("/library"), metadata, "m4a") + + assert path == Path("/library/Music/Artist Name/Album Name/Disc 1/01 - Track Title.m4a") + + +def test_missing_disc_num_defaults_to_disc_1() -> None: + metadata = _metadata() + metadata.disc_num = None # type: ignore[assignment] + + path = build_music_path(Path("/library"), metadata, "mp3") + + assert path == Path("/library/Music/Artist Name/Album Name (2024)/Disc 1/01 - Track Title.mp3") + + +def test_missing_track_num_defaults_to_00() -> None: + metadata = _metadata() + metadata.track_num = None # type: ignore[assignment] + + path = build_music_path(Path("/library"), metadata, "mp3") + + assert path == Path("/library/Music/Artist Name/Album Name (2024)/Disc 1/00 - Track Title.mp3") + + +def test_unicode_characters_are_preserved() -> None: + metadata = _metadata( + album_artist="Beyoncé", + title="Café del Mar", + album="Été", + ) + + path = build_music_path(Path("/library"), metadata, "mp3") + + assert path == Path("/library/Music/Beyoncé/Été (2024)/Disc 1/01 - Café del Mar.mp3") + + +def test_invalid_filesystem_characters_are_removed() -> None: + metadata = _metadata( + album_artist='A<>:"/\\|?*rtist', + album='Alb<>:"/\\|?*um', + title='Ti<>:"/\\|?*tle', + ) + + path = build_music_path(Path("/library"), metadata, "mp3") + + assert path == Path("/library/Music/Artist/Album (2024)/Disc 1/01 - Title.mp3") diff --git a/tests/test_validation_config.py b/tests/test_validation_config.py index 14927fb..a3066df 100644 --- a/tests/test_validation_config.py +++ b/tests/test_validation_config.py @@ -1,5 +1,6 @@ from __future__ import annotations +from pathlib import Path from types import SimpleNamespace from download.worker import ( @@ -11,7 +12,9 @@ class _MockDownloader: def download(self, media_url: str) -> str: - return "/tmp/mock-audio.mp3" + path = Path("/tmp/mock-audio.mp3") + path.write_bytes(b"mock-audio") + return str(path) def _job() -> SimpleNamespace: diff --git a/tests/test_worker_canonical_path.py b/tests/test_worker_canonical_path.py new file mode 100644 index 0000000..af9f730 --- /dev/null +++ b/tests/test_worker_canonical_path.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace + +from download.worker import DownloadWorker, JOB_STATUS_COMPLETED + + +class _MockDownloader: + def __init__(self, temp_path: Path) -> None: + self.temp_path = temp_path + + def download(self, media_url: str) -> str: + self.temp_path.write_bytes(b"mock-audio") + return str(self.temp_path) + + +def test_worker_moves_to_canonical_path_and_returns_it(tmp_path, monkeypatch) -> None: + root = tmp_path / "Music" + temp_file = tmp_path / "download-temp.mp3" + + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + + worker = DownloadWorker(_MockDownloader(temp_file)) + job = SimpleNamespace( + payload={ + "music_root": str(root), + "resolved_media": {"media_url": "https://example.test/audio"}, + "music_metadata": { + "album_artist": "Artist", + "artist": "Artist", + "album": "Album", + "date": "2020", + "disc_num": 2, + "track_num": 3, + "title": "Song", + "genre": "Pop", + }, + } + ) + + result = worker.process_job(job) + + expected = root / "Artist" / "Album (2020)" / "Disc 2" / "03 - Song.mp3" + assert result == {"status": JOB_STATUS_COMPLETED, "file_path": str(expected)} + assert expected.exists() is True + assert temp_file.exists() is False From d509162a6958aef49f967c09036ffec1f61fa99a Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 14:34:41 -0600 Subject: [PATCH 12/45] feat(playlists): add deterministic M3U export with safe naming and overwrite semantics Added export.py with write_m3u(playlist_root, playlist_name, track_paths): Creates/overwrites {playlist_name}.m3u under playlist root Ensures playlist root exists Writes UTF-8 output Writes paths relative to configured music root via .relative_to() Skips non-existing tracks and tracks outside music root Performs atomic overwrite (temp file + replace) Added explicit playlist name sanitizer: sanitize_playlist_name(name: str) -> str Removes invalid filesystem characters <>:"/\\|?* Collapses whitespace Strips trailing spaces/dots Wired into write_m3u Added test_playlist_export.py: Verifies file creation Verifies relative-path entries Verifies skipping missing files Verifies clean overwrite behavior after second write --- playlist/export.py | 62 +++++++++++++++++++++++++++++++++++ tests/test_playlist_export.py | 44 +++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 playlist/export.py create mode 100644 tests/test_playlist_export.py diff --git a/playlist/export.py b/playlist/export.py new file mode 100644 index 0000000..f2938bf --- /dev/null +++ b/playlist/export.py @@ -0,0 +1,62 @@ +"""Playlist export helpers.""" + +from __future__ import annotations + +import os +import re +from pathlib import Path +from typing import Iterable + +_INVALID_FS_CHARS_RE = re.compile(r'[<>:"/\\|?*]') +_MULTISPACE_RE = re.compile(r"\s+") +_DEFAULT_MUSIC_ROOT = Path("Music") + + +def write_m3u(playlist_root: Path, playlist_name: str, track_paths: Iterable[Path]) -> Path: + """Create or overwrite an M3U playlist file. + + Rules: + - Playlist files live under ``playlist_root``. + - Filename format is ``{playlist_name}.m3u``. + - Paths are written relative to configured music root. + - Missing tracks are skipped. + - Writes are atomic (temp file then replace). + """ + root = Path(playlist_root) + root.mkdir(parents=True, exist_ok=True) + + safe_name = sanitize_playlist_name(playlist_name) or "playlist" + target_path = root / f"{safe_name}.m3u" + temp_path = root / f".{safe_name}.m3u.tmp" + + music_root = _configured_music_root().resolve() + lines: list[str] = ["#EXTM3U"] + for track_path in track_paths: + candidate = Path(track_path) + if not candidate.exists(): + continue + try: + rel_path = candidate.resolve().relative_to(music_root) + except ValueError: + continue + lines.append(rel_path.as_posix()) + + content = "\n".join(lines) + "\n" + temp_path.write_text(content, encoding="utf-8") + temp_path.replace(target_path) + return target_path + + +def _configured_music_root() -> Path: + value = (os.environ.get("RETREIVR_MUSIC_ROOT") or "").strip() + if value: + return Path(value) + return _DEFAULT_MUSIC_ROOT + + +def sanitize_playlist_name(name: str) -> str: + """Return a filesystem-safe playlist name.""" + value = name + text = _INVALID_FS_CHARS_RE.sub("", str(value)) + text = _MULTISPACE_RE.sub(" ", text).strip() + return text.rstrip(" .") diff --git a/tests/test_playlist_export.py b/tests/test_playlist_export.py new file mode 100644 index 0000000..d3c6188 --- /dev/null +++ b/tests/test_playlist_export.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from pathlib import Path + +from playlist.export import write_m3u + + +def test_write_m3u_writes_relative_paths_skips_missing_and_overwrites(tmp_path, monkeypatch) -> None: + music_root = tmp_path / "Music" + monkeypatch.setenv("RETREIVR_MUSIC_ROOT", str(music_root)) + + track_one = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / "01 - Song One.mp3" + track_two = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / "02 - Song Two.mp3" + missing = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / "03 - Missing.mp3" + track_one.parent.mkdir(parents=True, exist_ok=True) + track_one.write_bytes(b"a") + track_two.write_bytes(b"b") + + playlist_root = tmp_path / "playlists" + + first_path = write_m3u( + playlist_root=playlist_root, + playlist_name="My: Playlist", + track_paths=[track_one, missing, track_two], + ) + + assert first_path.exists() is True + assert first_path.name == "My Playlist.m3u" + first_content = first_path.read_text(encoding="utf-8") + assert "#EXTM3U" in first_content + assert "Artist A/Album A (2020)/Disc 1/01 - Song One.mp3" in first_content + assert "Artist A/Album A (2020)/Disc 1/02 - Song Two.mp3" in first_content + assert "03 - Missing.mp3" not in first_content + + second_path = write_m3u( + playlist_root=playlist_root, + playlist_name="My: Playlist", + track_paths=[track_two], + ) + + assert second_path == first_path + second_content = second_path.read_text(encoding="utf-8") + assert "01 - Song One.mp3" not in second_content + assert "02 - Song Two.mp3" in second_content From 7cd79f545be6959bfbc1de6108bac580daa64a1e Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 14:38:21 -0600 Subject: [PATCH 13/45] feat(playlists): add M3U rebuild pipeline and watcher integration after Spotify sync Added rebuild.py with reusable helper: rebuild_playlist_from_tracks(playlist_name, playlist_root, music_root, track_file_paths) Rebuilds playlist M3U from canonical absolute DB file paths via write_m3u Added deterministic rebuild test: test_playlist_rebuild.py Verifies existing-only inclusion and relative path output Integrated post-sync playlist export into watcher: Updated spotify_playlist_watch.py After successful snapshot store, loads downloaded canonical file paths from downloaded_music_tracks Rebuilds M3U using configured playlist/music directories Logs summary: Playlist M3U updated: {playlist_name} ({count} tracks) Wrapped rebuild in best-effort error handling so scheduler/watcher flow never crashes on M3U failures Added watcher integration test: test_playlist_watcher_m3u.py Confirms rebuild is called after successful sync and generated M3U contains expected paths --- Music/Playlists/playlist-1.m3u | 1 + playlist/rebuild.py | 45 ++++++++++ retreivr.sqlite3 | 0 scheduler/jobs/spotify_playlist_watch.py | 74 +++++++++++++++- tests/test_playlist_rebuild.py | 31 +++++++ tests/test_playlist_watcher_m3u.py | 108 +++++++++++++++++++++++ 6 files changed, 258 insertions(+), 1 deletion(-) create mode 100644 Music/Playlists/playlist-1.m3u create mode 100644 playlist/rebuild.py create mode 100644 retreivr.sqlite3 create mode 100644 tests/test_playlist_rebuild.py create mode 100644 tests/test_playlist_watcher_m3u.py diff --git a/Music/Playlists/playlist-1.m3u b/Music/Playlists/playlist-1.m3u new file mode 100644 index 0000000..fcd7187 --- /dev/null +++ b/Music/Playlists/playlist-1.m3u @@ -0,0 +1 @@ +#EXTM3U diff --git a/playlist/rebuild.py b/playlist/rebuild.py new file mode 100644 index 0000000..b8d898a --- /dev/null +++ b/playlist/rebuild.py @@ -0,0 +1,45 @@ +"""Playlist rebuild helpers.""" + +from __future__ import annotations + +import os +from contextlib import contextmanager +from pathlib import Path +from typing import Iterable, Iterator + +from playlist.export import write_m3u + + +def rebuild_playlist_from_tracks( + playlist_name: str, + playlist_root: Path, + music_root: Path, + track_file_paths: Iterable[str], +) -> Path: + """Rebuild a playlist M3U file from canonical track file paths. + + Args: + playlist_name: Playlist display name used to derive M3U filename. + playlist_root: Directory where the resulting M3U file is stored. + music_root: Root directory used for relative path entries. + track_file_paths: Absolute canonical file paths loaded from storage. + + Returns: + Final path to the rebuilt M3U file. + """ + track_paths = [Path(path) for path in track_file_paths if str(path).strip()] + with _music_root_env(music_root): + return write_m3u(playlist_root=playlist_root, playlist_name=playlist_name, track_paths=track_paths) + + +@contextmanager +def _music_root_env(music_root: Path) -> Iterator[None]: + previous = os.environ.get("RETREIVR_MUSIC_ROOT") + os.environ["RETREIVR_MUSIC_ROOT"] = str(music_root) + try: + yield + finally: + if previous is None: + os.environ.pop("RETREIVR_MUSIC_ROOT", None) + else: + os.environ["RETREIVR_MUSIC_ROOT"] = previous diff --git a/retreivr.sqlite3 b/retreivr.sqlite3 new file mode 100644 index 0000000..e69de29 diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py index 214cfa4..8ffec29 100644 --- a/scheduler/jobs/spotify_playlist_watch.py +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -4,10 +4,14 @@ import asyncio import logging +import os +import sqlite3 +from pathlib import Path from typing import Any, Callable from db.downloaded_tracks import has_downloaded_isrc from metadata.merge import merge_metadata +from playlist.rebuild import rebuild_playlist_from_tracks from spotify.client import SpotifyPlaylistClient, get_playlist_items from spotify.diff import diff_playlist from spotify.resolve import resolve_spotify_track @@ -35,6 +39,51 @@ def _run_async(coro): return None +def _resolve_db_path() -> str: + return os.environ.get("RETREIVR_DB_PATH", os.path.join(os.getcwd(), "retreivr.sqlite3")) + + +def _load_downloaded_track_paths(playlist_id: str) -> list[str]: + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(_resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + cur = conn.cursor() + cur.execute( + """ + SELECT file_path + FROM downloaded_music_tracks + WHERE playlist_id=? + ORDER BY downloaded_at ASC, id ASC + """, + (playlist_id,), + ) + rows = cur.fetchall() + return [str(row["file_path"]) for row in rows if row["file_path"]] + except sqlite3.Error: + logging.exception("Failed to load downloaded tracks for playlist %s", playlist_id) + return [] + finally: + try: + if conn is not None: + conn.close() + except Exception: + pass + + +def _resolve_playlist_dirs(config: dict[str, Any] | None) -> tuple[Path, Path]: + cfg = config or {} + music_root = Path(str(cfg.get("music_download_folder") or "Music")) + playlist_root = Path( + str( + cfg.get("playlists_folder") + or cfg.get("playlist_export_folder") + or (music_root / "Playlists") + ) + ) + return playlist_root, music_root + + def _enqueue_added_track(queue: Any, item: dict[str, Any]) -> None: if callable(queue): queue(item) @@ -70,7 +119,15 @@ async def enqueue_spotify_track(queue, spotify_track: dict, search_service, play queue.enqueue(payload) -def playlist_watch_job(spotify_client, db, queue, playlist_id: str) -> dict[str, Any]: +def playlist_watch_job( + spotify_client, + db, + queue, + playlist_id: str, + *, + playlist_name: str | None = None, + config: dict[str, Any] | None = None, +) -> dict[str, Any]: """Fetch playlist snapshot, diff with DB state, enqueue added tracks, and persist new snapshot.""" pid = (playlist_id or "").strip() if not pid: @@ -126,6 +183,21 @@ def playlist_watch_job(spotify_client, db, queue, playlist_id: str) -> dict[str, "enqueue_errors": enqueue_errors, } + # Best effort: refresh the playlist M3U from canonical downloaded file paths. + try: + track_paths = _load_downloaded_track_paths(pid) + resolved_playlist_name = (playlist_name or pid).strip() or pid + playlist_root, music_root = _resolve_playlist_dirs(config) + rebuild_playlist_from_tracks( + playlist_name=resolved_playlist_name, + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=track_paths, + ) + logging.info("Playlist M3U updated: %s (%d tracks)", resolved_playlist_name, len(track_paths)) + except Exception: + logging.exception("Playlist M3U rebuild failed for playlist %s", pid) + return { "status": "updated", "playlist_id": pid, diff --git a/tests/test_playlist_rebuild.py b/tests/test_playlist_rebuild.py new file mode 100644 index 0000000..0e8983e --- /dev/null +++ b/tests/test_playlist_rebuild.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from pathlib import Path + +from playlist.rebuild import rebuild_playlist_from_tracks + + +def test_rebuild_playlist_from_tracks_writes_existing_relative_entries(tmp_path) -> None: + music_root = tmp_path / "Music" + playlist_root = tmp_path / "Playlists" + + track_one = music_root / "Artist" / "Album (2020)" / "Disc 1" / "01 - Song One.mp3" + track_two = music_root / "Artist" / "Album (2020)" / "Disc 1" / "02 - Song Two.mp3" + missing = music_root / "Artist" / "Album (2020)" / "Disc 1" / "03 - Missing.mp3" + track_one.parent.mkdir(parents=True, exist_ok=True) + track_one.write_bytes(b"a") + track_two.write_bytes(b"b") + + result_path = rebuild_playlist_from_tracks( + playlist_name="My Playlist", + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=[str(track_one), str(missing), str(track_two)], + ) + + assert result_path.exists() is True + content = result_path.read_text(encoding="utf-8") + assert "#EXTM3U" in content + assert "Artist/Album (2020)/Disc 1/01 - Song One.mp3" in content + assert "Artist/Album (2020)/Disc 1/02 - Song Two.mp3" in content + assert "03 - Missing.mp3" not in content diff --git a/tests/test_playlist_watcher_m3u.py b/tests/test_playlist_watcher_m3u.py new file mode 100644 index 0000000..9e6100d --- /dev/null +++ b/tests/test_playlist_watcher_m3u.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from playlist.rebuild import rebuild_playlist_from_tracks as _real_rebuild +from scheduler.jobs.spotify_playlist_watch import playlist_watch_job + + +def _item(track_id: str, position: int) -> dict[str, Any]: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": f"2026-02-16T00:0{position}:00Z", + "artist": f"artist-{track_id}", + "title": f"title-{track_id}", + "album": f"album-{track_id}", + "duration_ms": 1000 + position, + "isrc": f"isrc-{track_id}", + } + + +class _MockSpotifyClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = items + + def get_playlist_items(self, playlist_id: str) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.items) + + +class _MockSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> Any: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + return type("WriteResult", (), {"snapshot_db_id": 42})() + + +def test_playlist_watch_job_rebuilds_m3u_after_successful_sync(tmp_path, monkeypatch) -> None: + music_root = tmp_path / "Music" + playlist_root = tmp_path / "Playlists" + track_paths: list[str] = [] + for n in (1, 2, 3): + track = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / f"{n:02d} - Song {n}.mp3" + track.parent.mkdir(parents=True, exist_ok=True) + track.write_bytes(b"x") + track_paths.append(str(track)) + + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._load_downloaded_track_paths", + lambda playlist_id: list(track_paths), + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._resolve_playlist_dirs", + lambda config: (playlist_root, music_root), + ) + + calls: list[dict[str, Any]] = [] + + def _spy_rebuild(playlist_name, playlist_root, music_root, track_file_paths): + calls.append( + { + "playlist_name": playlist_name, + "playlist_root": Path(playlist_root), + "music_root": Path(music_root), + "track_file_paths": list(track_file_paths), + } + ) + return _real_rebuild( + playlist_name=playlist_name, + playlist_root=Path(playlist_root), + music_root=Path(music_root), + track_file_paths=track_file_paths, + ) + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.rebuild_playlist_from_tracks", _spy_rebuild) + + prev_items = [_item("a", 0)] + curr_items = [_item("a", 0), _item("b", 1)] + store = _MockSnapshotStore({"snapshot_id": "snap-1", "items": prev_items}) + client = _MockSpotifyClient("snap-2", curr_items) + enqueued: list[str] = [] + + result = playlist_watch_job( + client, + store, + lambda item: enqueued.append(str(item["spotify_track_id"])), + "playlist-1", + playlist_name="Country Bangers", + ) + + assert result["status"] == "updated" + assert len(calls) == 1 + assert calls[0]["playlist_name"] == "Country Bangers" + assert calls[0]["track_file_paths"] == track_paths + + m3u_path = playlist_root / "Country Bangers.m3u" + assert m3u_path.exists() is True + content = m3u_path.read_text(encoding="utf-8") + assert "Artist A/Album A (2020)/Disc 1/01 - Song 1.mp3" in content + assert "Artist A/Album A (2020)/Disc 1/02 - Song 2.mp3" in content + assert "Artist A/Album A (2020)/Disc 1/03 - Song 3.mp3" in content From 6ddab0b88fe313ebd5dddff9f2a09e243ffba76e Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 14:43:48 -0600 Subject: [PATCH 14/45] feat(spotify-liked-songs): add virtual playlist scaffolding and M3U compatibility Added Spotify Liked Songs virtual playlist scaffolding in spotify_playlist_watch.py: SPOTIFY_LIKED_SONGS_PLAYLIST_ID = "__spotify_liked_songs__" get_liked_songs_playlist_name() -> "Spotify - Liked Songs" Added placeholder sync entrypoint: run_liked_songs_sync() Includes future OAuth flow docstring and logs: "Liked Songs sync not enabled (OAuth required)" Not wired into scheduler yet Hardened rebuild.py for virtual playlist usage: Normalizes/cleans playlist_name before calling write_m3u No playlist-ID assumptions or special-case logic Added regression test for virtual playlist M3U flow: test_liked_songs_virtual_playlist.py Verifies liked-songs name and canonical-path M3U generation work end-to-end --- playlist/rebuild.py | 7 ++++- scheduler/jobs/spotify_playlist_watch.py | 20 +++++++++++++++ tests/test_liked_songs_virtual_playlist.py | 30 ++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 tests/test_liked_songs_virtual_playlist.py diff --git a/playlist/rebuild.py b/playlist/rebuild.py index b8d898a..5e1a607 100644 --- a/playlist/rebuild.py +++ b/playlist/rebuild.py @@ -27,9 +27,14 @@ def rebuild_playlist_from_tracks( Returns: Final path to the rebuilt M3U file. """ + normalized_playlist_name = str(playlist_name or "").strip() or "playlist" track_paths = [Path(path) for path in track_file_paths if str(path).strip()] with _music_root_env(music_root): - return write_m3u(playlist_root=playlist_root, playlist_name=playlist_name, track_paths=track_paths) + return write_m3u( + playlist_root=playlist_root, + playlist_name=normalized_playlist_name, + track_paths=track_paths, + ) @contextmanager diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py index 8ffec29..2fa6267 100644 --- a/scheduler/jobs/spotify_playlist_watch.py +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -16,6 +16,8 @@ from spotify.diff import diff_playlist from spotify.resolve import resolve_spotify_track +SPOTIFY_LIKED_SONGS_PLAYLIST_ID = "__spotify_liked_songs__" + def _load_previous_snapshot(db: Any, playlist_id: str) -> tuple[str | None, list[dict[str, Any]]]: if not hasattr(db, "get_latest_snapshot"): @@ -31,6 +33,24 @@ def _load_previous_snapshot(db: Any, playlist_id: str) -> tuple[str | None, list return None, [] +def get_liked_songs_playlist_name() -> str: + """Return the virtual playlist display name for Spotify Liked Songs.""" + return "Spotify - Liked Songs" + + +def run_liked_songs_sync() -> None: + """Placeholder for future OAuth-based liked songs sync. + + Will: + - Fetch /me/tracks + - Diff snapshot + - Enqueue new tracks + - Rebuild M3U + Currently not implemented. + """ + logging.info("Liked Songs sync not enabled (OAuth required)") + + def _run_async(coro): try: asyncio.get_running_loop() diff --git a/tests/test_liked_songs_virtual_playlist.py b/tests/test_liked_songs_virtual_playlist.py new file mode 100644 index 0000000..932ec6c --- /dev/null +++ b/tests/test_liked_songs_virtual_playlist.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from scheduler.jobs.spotify_playlist_watch import SPOTIFY_LIKED_SONGS_PLAYLIST_ID +from playlist.rebuild import rebuild_playlist_from_tracks + + +def test_liked_songs_virtual_playlist_rebuild_creates_m3u(tmp_path) -> None: + assert SPOTIFY_LIKED_SONGS_PLAYLIST_ID == "__spotify_liked_songs__" + + music_root = tmp_path / "Music" + playlist_root = tmp_path / "Playlists" + track_one = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / "01 - Song One.mp3" + track_two = music_root / "Artist B" / "Album B (2021)" / "Disc 1" / "02 - Song Two.mp3" + track_one.parent.mkdir(parents=True, exist_ok=True) + track_two.parent.mkdir(parents=True, exist_ok=True) + track_one.write_bytes(b"a") + track_two.write_bytes(b"b") + + result_path = rebuild_playlist_from_tracks( + playlist_name="Spotify - Liked Songs", + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=[str(track_one), str(track_two)], + ) + + assert result_path.exists() is True + assert result_path.name == "Spotify - Liked Songs.m3u" + content = result_path.read_text(encoding="utf-8") + assert "Artist A/Album A (2020)/Disc 1/01 - Song One.mp3" in content + assert "Artist B/Album B (2021)/Disc 1/02 - Song Two.mp3" in content From 7d097107259f7d96dddff5565231e525e7396aa3 Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 14:47:04 -0600 Subject: [PATCH 15/45] feat(search): add intent-routing surface and virtual liked-songs compatibility scaffolding Added pure intent routing module intent_router.py: IntentType enum and Intent dataclass detect_intent(user_input) for deterministic detection of: Spotify album/playlist/track/artist URLs YouTube playlist URLs (list= query param) fallback SEARCH No network/UI/ingestion coupling Added intent router tests test_intent_router.py: Covers Spotify URL variants, YouTube playlist URL, plain search text, and malformed URL fallback Wired API search handler to intent router in main.py: POST /api/search/requests now runs detect_intent on raw query For non-SEARCH intents, returns structured detection response: {"detected_intent": "", "identifier": ""} For SEARCH, existing behavior remains unchanged Hardened virtual playlist rebuild compatibility: rebuild.py now normalizes/cleans playlist_name generically with no ID-format assumptions Added virtual liked-songs playlist regression test: test_liked_songs_virtual_playlist.py verifies canonical-path M3U generation for "Spotify - Liked Songs" --- api/main.py | 8 ++++ input/intent_router.py | 85 +++++++++++++++++++++++++++++++++++++ tests/test_intent_router.py | 45 ++++++++++++++++++++ 3 files changed, 138 insertions(+) create mode 100644 input/intent_router.py create mode 100644 tests/test_intent_router.py diff --git a/api/main.py b/api/main.py index cb06ec6..0bfe470 100644 --- a/api/main.py +++ b/api/main.py @@ -107,6 +107,7 @@ def _require_python_311(): resolve_dir, ) from engine.runtime import get_runtime_info +from input.intent_router import IntentType, detect_intent APP_NAME = "Retreivr API" STATUS_SCHEMA_VERSION = 1 @@ -3289,6 +3290,13 @@ async def create_search_request(request: dict = Body(...)): if normalized["delivery_mode"] == "client" and not normalized["search_only"]: raise HTTPException(status_code=400, detail="Search & Download is not available for client delivery") + intent = detect_intent(str(normalized.get("query") or "")) + if intent.type != IntentType.SEARCH: + return { + "detected_intent": intent.type.value, + "identifier": intent.identifier, + } + if "source_priority" not in raw_payload or not raw_payload.get("source_priority"): raw_payload["source_priority"] = normalized["sources"] if "auto_enqueue" not in raw_payload: diff --git a/input/intent_router.py b/input/intent_router.py new file mode 100644 index 0000000..0e35324 --- /dev/null +++ b/input/intent_router.py @@ -0,0 +1,85 @@ +"""Intent routing helpers for raw homepage input.""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from typing import Optional +from urllib.parse import parse_qs, urlparse + + +class IntentType(Enum): + SPOTIFY_ALBUM = "spotify_album" + SPOTIFY_PLAYLIST = "spotify_playlist" + SPOTIFY_TRACK = "spotify_track" + SPOTIFY_ARTIST = "spotify_artist" + YOUTUBE_PLAYLIST = "youtube_playlist" + SEARCH = "search" + + +@dataclass +class Intent: + type: IntentType + identifier: str # ID extracted or original search string + + +def detect_intent(user_input: str) -> Intent: + """Detect intent from user input without network calls. + + Rules: + - Detect Spotify URLs for album/playlist/track/artist. + - Detect YouTube playlist URLs via ``list=`` query parameter. + - Otherwise treat input as plain ``SEARCH``. + - Extract clean IDs without query strings. + """ + raw = (user_input or "").strip() + if not raw: + return Intent(type=IntentType.SEARCH, identifier="") + + spotify_album = _extract_spotify_id(raw, "album") + if spotify_album: + return Intent(type=IntentType.SPOTIFY_ALBUM, identifier=spotify_album) + + spotify_playlist = _extract_spotify_id(raw, "playlist") + if spotify_playlist: + return Intent(type=IntentType.SPOTIFY_PLAYLIST, identifier=spotify_playlist) + + spotify_track = _extract_spotify_id(raw, "track") + if spotify_track: + return Intent(type=IntentType.SPOTIFY_TRACK, identifier=spotify_track) + + spotify_artist = _extract_spotify_id(raw, "artist") + if spotify_artist: + return Intent(type=IntentType.SPOTIFY_ARTIST, identifier=spotify_artist) + + youtube_playlist = _extract_youtube_playlist_id(raw) + if youtube_playlist: + return Intent(type=IntentType.YOUTUBE_PLAYLIST, identifier=youtube_playlist) + + return Intent(type=IntentType.SEARCH, identifier=raw) + + +def _extract_spotify_id(raw: str, resource: str) -> Optional[str]: + parsed = urlparse(raw) + if parsed.scheme and "spotify.com" in (parsed.netloc or "").lower(): + parts = [segment for segment in (parsed.path or "").split("/") if segment] + if len(parts) >= 2 and parts[0].lower() == resource: + return _clean_identifier(parts[1]) + return None + + +def _extract_youtube_playlist_id(raw: str) -> Optional[str]: + parsed = urlparse(raw) + if not parsed.scheme: + return None + netloc = (parsed.netloc or "").lower() + if "youtube.com" not in netloc and "youtu.be" not in netloc: + return None + values = parse_qs(parsed.query).get("list") + if not values: + return None + return _clean_identifier(values[0]) + + +def _clean_identifier(value: str) -> str: + return (value or "").split("?", 1)[0].strip().strip("/") diff --git a/tests/test_intent_router.py b/tests/test_intent_router.py new file mode 100644 index 0000000..08bb0fb --- /dev/null +++ b/tests/test_intent_router.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from input.intent_router import IntentType, detect_intent + + +def test_detect_spotify_album_with_query_string() -> None: + intent = detect_intent("https://open.spotify.com/album/1A2B3C4D5E?si=abc123") + assert intent.type == IntentType.SPOTIFY_ALBUM + assert intent.identifier == "1A2B3C4D5E" + + +def test_detect_spotify_playlist_url() -> None: + intent = detect_intent("https://open.spotify.com/playlist/37i9dQZF1DX1lVhptIYRda") + assert intent.type == IntentType.SPOTIFY_PLAYLIST + assert intent.identifier == "37i9dQZF1DX1lVhptIYRda" + + +def test_detect_spotify_track_url() -> None: + intent = detect_intent("https://open.spotify.com/track/6rqhFgbbKwnb9MLmUQDhG6") + assert intent.type == IntentType.SPOTIFY_TRACK + assert intent.identifier == "6rqhFgbbKwnb9MLmUQDhG6" + + +def test_detect_spotify_artist_url() -> None: + intent = detect_intent("https://open.spotify.com/artist/1dfeR4HaWDbWqFHLkxsg1d") + assert intent.type == IntentType.SPOTIFY_ARTIST + assert intent.identifier == "1dfeR4HaWDbWqFHLkxsg1d" + + +def test_detect_youtube_playlist_url() -> None: + intent = detect_intent("https://www.youtube.com/watch?v=abc123&list=PL1234567890XYZ") + assert intent.type == IntentType.YOUTUBE_PLAYLIST + assert intent.identifier == "PL1234567890XYZ" + + +def test_detect_plain_text_search() -> None: + intent = detect_intent("best synthwave tracks") + assert intent.type == IntentType.SEARCH + assert intent.identifier == "best synthwave tracks" + + +def test_detect_malformed_url_falls_back_to_search() -> None: + intent = detect_intent("https://open.spotify.com/album") + assert intent.type == IntentType.SEARCH + assert intent.identifier == "https://open.spotify.com/album" From ca279585027afa418ff13781e8076d75ebc479ec Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 14:53:06 -0600 Subject: [PATCH 16/45] feat(home-intents): add intent execution plumbing and Spotify preview-gated confirmation UI Added homepage intent-routing integration end-to-end: Search API now returns intent detection payload for non-search inputs Frontend handles detected intents with dedicated confirmation state Added backend execution plumbing: New endpoint POST /api/intent/execute Validates intent_type and identifier Returns deterministic acceptance payload (no enqueue/ingestion yet) Added deterministic endpoint tests scaffold: test_intent_execute_endpoint.py Valid intent returns accepted payload Invalid intent_type returns 400 Added Spotify intent preview endpoint: New POST /api/intent/preview Supports spotify_album and spotify_playlist Fetches preview metadata (title, artist, track_count) from Spotify API No ingestion side effects Upgraded homepage confirmation card flow: For Spotify album/playlist intents: Fetch metadata first Render Title / Artist / Track count Show Confirm Download only after successful preview Show error state on preview failure Cancel returns user to search state Confirm Download still calls /api/intent/execute only --- api/main.py | 82 +++++++++- tests/test_intent_execute_endpoint.py | 51 ++++++ webUI/app.js | 216 ++++++++++++++++++++++++++ 3 files changed, 348 insertions(+), 1 deletion(-) create mode 100644 tests/test_intent_execute_endpoint.py diff --git a/api/main.py b/api/main.py index 0bfe470..1642302 100644 --- a/api/main.py +++ b/api/main.py @@ -31,7 +31,7 @@ def _require_python_311(): from datetime import datetime, timedelta, timezone from zoneinfo import ZoneInfo from uuid import uuid4 -from urllib.parse import urlparse +from urllib.parse import quote, urlparse from typing import Optional import anyio @@ -108,6 +108,7 @@ def _require_python_311(): ) from engine.runtime import get_runtime_info from input.intent_router import IntentType, detect_intent +from spotify.client import SpotifyPlaylistClient APP_NAME = "Retreivr API" STATUS_SCHEMA_VERSION = 1 @@ -519,6 +520,11 @@ class SpotifyPlaylistImportPayload(BaseModel): playlist_url: str +class IntentExecutePayload(BaseModel): + intent_type: IntentType + identifier: str + + class SafeJSONResponse(JSONResponse): def render(self, content): return json.dumps( @@ -3346,6 +3352,80 @@ async def create_search_request(request: dict = Body(...)): return {"request_id": request_id} +@app.post("/api/intent/execute") +async def execute_intent(payload: dict = Body(...)): + """Accept intent execution requests (plumbing only; no ingestion side effects yet).""" + intent_raw = str((payload or {}).get("intent_type") or "").strip() + identifier = str((payload or {}).get("identifier") or "").strip() + if not intent_raw: + raise HTTPException(status_code=400, detail="intent_type is required") + if not identifier: + raise HTTPException(status_code=400, detail="identifier is required") + try: + intent_type = IntentType(intent_raw) + except ValueError as exc: + raise HTTPException(status_code=400, detail="invalid intent_type") from exc + return { + "status": "accepted", + "intent_type": intent_type.value, + "identifier": identifier, + } + + +@app.post("/api/intent/preview") +async def preview_intent(payload: dict = Body(...)): + """Fetch metadata preview for supported intents (plumbing only).""" + intent_raw = str((payload or {}).get("intent_type") or "").strip() + identifier = str((payload or {}).get("identifier") or "").strip() + if not intent_raw: + raise HTTPException(status_code=400, detail="intent_type is required") + if not identifier: + raise HTTPException(status_code=400, detail="identifier is required") + try: + intent_type = IntentType(intent_raw) + except ValueError as exc: + raise HTTPException(status_code=400, detail="invalid intent_type") from exc + + if intent_type not in {IntentType.SPOTIFY_ALBUM, IntentType.SPOTIFY_PLAYLIST}: + raise HTTPException(status_code=400, detail="intent preview not supported for this intent_type") + + client = SpotifyPlaylistClient() + encoded = quote(identifier, safe="") + try: + if intent_type == IntentType.SPOTIFY_ALBUM: + data = client._request_json( + f"https://api.spotify.com/v1/albums/{encoded}", + params={"fields": "name,artists(name),total_tracks"}, + ) + artists = data.get("artists") or [] + artist = artists[0].get("name") if artists and isinstance(artists[0], dict) else "" + track_count = int(data.get("total_tracks") or 0) + return { + "intent_type": intent_type.value, + "identifier": identifier, + "title": str(data.get("name") or ""), + "artist": str(artist or ""), + "track_count": track_count, + } + + data = client._request_json( + f"https://api.spotify.com/v1/playlists/{encoded}", + params={"fields": "name,owner(display_name),tracks(total)"}, + ) + owner = (data.get("owner") or {}).get("display_name") + track_count = int(((data.get("tracks") or {}).get("total")) or 0) + return { + "intent_type": intent_type.value, + "identifier": identifier, + "title": str(data.get("name") or ""), + "artist": str(owner or ""), + "track_count": track_count, + } + except Exception as exc: + logging.exception("Intent preview failed for intent=%s identifier=%s", intent_type.value, identifier) + raise HTTPException(status_code=502, detail=f"intent preview failed: {exc}") from exc + + @app.get("/api/search/requests") async def list_search_requests(status: str | None = None, limit: int | None = None): try: diff --git a/tests/test_intent_execute_endpoint.py b/tests/test_intent_execute_endpoint.py new file mode 100644 index 0000000..5a7a7da --- /dev/null +++ b/tests/test_intent_execute_endpoint.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import importlib +import sys + +import pytest + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +def _build_client(monkeypatch) -> TestClient: + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return TestClient(module.app) + + +def test_intent_execute_accepts_valid_spotify_album_intent(monkeypatch) -> None: + client = _build_client(monkeypatch) + + response = client.post( + "/api/intent/execute", + json={ + "intent_type": "spotify_album", + "identifier": "1A2B3C4D5E", + }, + ) + + assert response.status_code == 200 + payload = response.json() + assert payload["status"] == "accepted" + assert payload["intent_type"] == "spotify_album" + assert payload["identifier"] == "1A2B3C4D5E" + + +def test_intent_execute_rejects_invalid_intent_type(monkeypatch) -> None: + client = _build_client(monkeypatch) + + response = client.post( + "/api/intent/execute", + json={ + "intent_type": "not_real_intent", + "identifier": "abc", + }, + ) + + assert response.status_code == 400 diff --git a/webUI/app.js b/webUI/app.js index fa5c069..af0c78e 100644 --- a/webUI/app.js +++ b/webUI/app.js @@ -2369,6 +2369,138 @@ function renderHomeDirectUrlCard(preview, status) { return card; } +function formatDetectedIntentLabel(intentType) { + const mapping = { + spotify_album: "Album", + spotify_playlist: "Playlist", + spotify_track: "Track", + spotify_artist: "Artist", + youtube_playlist: "Playlist", + }; + return mapping[intentType] || intentType || "Unknown"; +} + +function isSpotifyPreviewIntent(intentType) { + return intentType === "spotify_album" || intentType === "spotify_playlist"; +} + +async function fetchIntentPreview(intentType, identifier) { + return fetchJson("/api/intent/preview", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + intent_type: intentType, + identifier, + }), + }); +} + +function renderHomeIntentCard(intentType, identifier, options = {}) { + const { + loading = false, + error = "", + preview = null, + canConfirm = false, + } = options; + const card = document.createElement("article"); + card.className = "home-result-card"; + card.dataset.intentType = intentType || ""; + card.dataset.intentIdentifier = identifier || ""; + + const header = document.createElement("div"); + header.className = "home-result-header"; + const title = document.createElement("div"); + const strong = document.createElement("strong"); + strong.textContent = `Detected: ${formatDetectedIntentLabel(intentType)}`; + title.appendChild(strong); + header.appendChild(title); + header.appendChild(renderHomeStatusBadge("candidate_found")); + card.appendChild(header); + + if (loading) { + const loadingEl = document.createElement("div"); + loadingEl.className = "home-candidate-title"; + loadingEl.textContent = "Fetching Spotify metadata…"; + card.appendChild(loadingEl); + } else if (error) { + const errorEl = document.createElement("div"); + errorEl.className = "home-candidate-title"; + errorEl.textContent = `Preview failed: ${error}`; + card.appendChild(errorEl); + } else if (preview) { + const titleEl = document.createElement("div"); + titleEl.className = "home-candidate-title"; + titleEl.textContent = `Title: ${preview.title || "-"}`; + card.appendChild(titleEl); + + const artistEl = document.createElement("div"); + artistEl.className = "home-candidate-meta"; + artistEl.textContent = `Artist: ${preview.artist || "-"}`; + card.appendChild(artistEl); + + const countEl = document.createElement("div"); + countEl.className = "home-candidate-meta"; + countEl.textContent = `Track count: ${Number.isFinite(preview.track_count) ? preview.track_count : "-"}`; + card.appendChild(countEl); + } else { + const detail = document.createElement("div"); + detail.className = "home-candidate-title"; + detail.textContent = `Identifier: ${identifier || "-"}`; + card.appendChild(detail); + } + + const actions = document.createElement("div"); + actions.className = "row"; + if (canConfirm) { + const confirmButton = document.createElement("button"); + confirmButton.className = "button"; + confirmButton.dataset.action = "home-intent-confirm"; + confirmButton.dataset.intentType = intentType || ""; + confirmButton.dataset.identifier = identifier || ""; + confirmButton.textContent = "Confirm Download"; + actions.appendChild(confirmButton); + } + + const cancelButton = document.createElement("button"); + cancelButton.className = "button ghost"; + cancelButton.dataset.action = "home-intent-cancel"; + cancelButton.textContent = "Cancel"; + actions.appendChild(cancelButton); + card.appendChild(actions); + return card; +} + +function resetHomeIntentConfirmation() { + state.homeSearchRequestId = null; + updateHomeViewAdvancedLink(); + stopHomeResultPolling(); + stopHomeJobPolling(); + setHomeSearchControlsEnabled(true); + setHomeSearchActive(false); + setHomeResultsState({ hasResults: false, terminal: false }); + showHomeResults(false); + const list = $("#home-results-list"); + if (list) { + list.textContent = ""; + } + setHomeResultsStatus("Ready to discover media"); + setHomeResultsDetail( + "Search Only is the default discovery action; use Search & Download to enqueue jobs.", + false + ); +} + +async function executeDetectedIntent(intentType, identifier) { + return fetchJson("/api/intent/execute", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + intent_type: intentType, + identifier, + }), + }); +} + function showHomeDirectUrlError(url, message, messageEl) { const text = message || DIRECT_URL_PLAYLIST_ERROR; if (messageEl) { @@ -2741,6 +2873,60 @@ async function submitHomeSearch(autoEnqueue) { headers: { "Content-Type": "application/json" }, body: JSON.stringify(payload), }); + if (data && data.detected_intent) { + state.homeSearchRequestId = null; + updateHomeViewAdvancedLink(); + stopHomeResultPolling(); + setHomeSearchActive(false); + setHomeSearchControlsEnabled(true); + showHomeResults(true); + setHomeResultsState({ hasResults: true, terminal: true }); + setHomeResultsStatus("Intent detected"); + setHomeResultsDetail("Preparing intent preview...", false); + const list = $("#home-results-list"); + if (list) { + list.textContent = ""; + const intentType = data.detected_intent; + const identifier = data.identifier || ""; + const needsPreview = isSpotifyPreviewIntent(intentType); + list.appendChild( + renderHomeIntentCard(intentType, identifier, { + loading: needsPreview, + canConfirm: !needsPreview, + }) + ); + if (needsPreview) { + try { + const preview = await fetchIntentPreview(intentType, identifier); + list.textContent = ""; + list.appendChild( + renderHomeIntentCard(intentType, identifier, { + preview, + canConfirm: true, + }) + ); + setHomeResultsStatus("Intent preview ready"); + setHomeResultsDetail("Review metadata and confirm to continue.", false); + setNotice(messageEl, "Intent metadata loaded.", false); + } catch (previewErr) { + list.textContent = ""; + list.appendChild( + renderHomeIntentCard(intentType, identifier, { + error: previewErr.message || "Failed to fetch metadata", + canConfirm: false, + }) + ); + setHomeResultsStatus("Intent preview failed"); + setHomeResultsDetail("Could not fetch Spotify metadata. Please retry.", true); + setNotice(messageEl, `Intent preview failed: ${previewErr.message}`, true); + } + } else { + setHomeResultsDetail("Confirm to proceed or cancel to return to search.", false); + setNotice(messageEl, "Intent detected. Confirm to continue.", false); + } + } + return; + } state.homeRequestContext = {}; state.homeBestScores = {}; state.homeCandidateCache = {}; @@ -4144,6 +4330,36 @@ function bindEvents() { const homeResultsList = $("#home-results-list"); if (homeResultsList) { homeResultsList.addEventListener("click", async (event) => { + const cancelIntentButton = event.target.closest('button[data-action="home-intent-cancel"]'); + if (cancelIntentButton) { + resetHomeIntentConfirmation(); + setNotice($("#home-search-message"), "Intent confirmation cancelled.", false); + return; + } + const confirmIntentButton = event.target.closest('button[data-action="home-intent-confirm"]'); + if (confirmIntentButton) { + const messageEl = $("#home-search-message"); + const intentType = confirmIntentButton.dataset.intentType || ""; + const identifier = confirmIntentButton.dataset.identifier || ""; + if (!intentType || !identifier) { + setNotice(messageEl, "Intent payload is incomplete.", true); + return; + } + confirmIntentButton.disabled = true; + setNotice(messageEl, "Submitting intent...", false); + try { + await executeDetectedIntent(intentType, identifier); + setNotice(messageEl, "Intent submitted.", false); + setHomeResultsStatus("Intent submitted"); + setHomeResultsDetail("Download flow will continue once backend execution is implemented.", false); + } catch (err) { + setNotice(messageEl, `Intent submit failed: ${err.message}`, true); + setHomeResultsDetail(`Intent submit failed: ${err.message}`, true); + } finally { + confirmIntentButton.disabled = false; + } + return; + } const directButton = event.target.closest('button[data-action="home-direct-download"]'); if (directButton) { if (directButton.disabled) return; From f12c042189c2d07a0323e4d812ec76b23003ac3c Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 15:01:17 -0600 Subject: [PATCH 17/45] feat(intent-execution): wire dispatcher-based Spotify intent sync with album/playlist parity Added intent_dispatcher.py as thin execution router using existing ingestion patterns: spotify_playlist routes to existing playlist_watch_job sync flow spotify_album routes to new run_spotify_album_sync(...) orchestration spotify_track reuses enqueue_spotify_track(...) spotify_artist returns accepted response requiring user selection Implemented run_spotify_album_sync(...): Fetches ordered album tracks from Spotify public API Enqueues via existing enqueue/metadata pipeline Best-effort M3U rebuild for album: name format: Spotify - Album - {Artist} - {Album} uses canonical downloaded paths from DB + existing rebuild.py Returns deterministic summary with enqueued_count Updated POST /api/intent/execute in main.py: Delegates to dispatcher instead of static accepted response Passes dependencies from app state (config/db/queue/search_service/spotify client) Keeps /api/intent/preview unchanged Keeps /api/search/requests behavior unchanged Added deterministic dispatcher routing tests: test_intent_dispatcher.py verifies artist/playlist/album/track branches with monkeypatched spies (no network calls) Added API-level intent execute delegation tests: test_api_intent_execute.py verifies endpoint returns mocked dispatcher payload and invalid intent validation (400) --- api/intent_dispatcher.py | 352 +++++++++++++++++++++++++++++++ api/main.py | 35 ++- tests/test_api_intent_execute.py | 67 ++++++ tests/test_intent_dispatcher.py | 161 ++++++++++++++ 4 files changed, 610 insertions(+), 5 deletions(-) create mode 100644 api/intent_dispatcher.py create mode 100644 tests/test_api_intent_execute.py create mode 100644 tests/test_intent_dispatcher.py diff --git a/api/intent_dispatcher.py b/api/intent_dispatcher.py new file mode 100644 index 0000000..95178e3 --- /dev/null +++ b/api/intent_dispatcher.py @@ -0,0 +1,352 @@ +"""Intent execution dispatcher for API-layer intent plumbing.""" + +from __future__ import annotations + +import logging +import os +import sqlite3 +from pathlib import Path +from typing import Any, Dict +from urllib.parse import quote + +from input.intent_router import IntentType +from playlist.rebuild import rebuild_playlist_from_tracks +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track, playlist_watch_job + + +async def execute_intent( + intent_type: str, + identifier: str, + config, + db, + queue, + spotify_client, +) -> Dict[str, Any]: + """Dispatch intent execution to existing Spotify ingestion behaviors. + + This function keeps intent execution thin by delegating to established + watcher/enqueue helpers where possible. + """ + raw_intent = str(intent_type or "").strip() + raw_identifier = str(identifier or "").strip() + if not raw_intent: + return _error_response(raw_intent, raw_identifier, "intent_type is required") + if not raw_identifier: + return _error_response(raw_intent, raw_identifier, "identifier is required") + + try: + intent = IntentType(raw_intent) + except ValueError: + return _error_response(raw_intent, raw_identifier, "unsupported intent_type") + + if intent == IntentType.SPOTIFY_PLAYLIST: + playlist_name = _resolve_playlist_name(raw_identifier, config) + result = playlist_watch_job( + spotify_client, + db, + queue, + raw_identifier, + playlist_name=playlist_name, + config=config if isinstance(config, dict) else None, + ) + status = "accepted" if result.get("status") in {"updated", "unchanged"} else "error" + return { + "status": status, + "intent_type": intent.value, + "identifier": raw_identifier, + "message": f"playlist sync {result.get('status', 'completed')}", + "enqueued_count": int(result.get("enqueued") or 0), + } + + if intent == IntentType.SPOTIFY_TRACK: + search_service = _resolve_search_service(config) + if search_service is None: + return _error_response( + intent.value, + raw_identifier, + "search_service is required for spotify_track execution", + ) + track = _fetch_spotify_track(spotify_client, raw_identifier) + if not track: + return _error_response(intent.value, raw_identifier, "track not found") + await enqueue_spotify_track( + queue=queue, + spotify_track=track, + search_service=search_service, + playlist_id=f"spotify_track_{raw_identifier}", + ) + return { + "status": "accepted", + "intent_type": intent.value, + "identifier": raw_identifier, + "message": "track enqueue attempted", + "enqueued_count": 1, + } + + if intent == IntentType.SPOTIFY_ALBUM: + result = await run_spotify_album_sync( + album_id=raw_identifier, + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + ) + result["intent_type"] = intent.value + result["identifier"] = raw_identifier + return result + + if intent == IntentType.SPOTIFY_ARTIST: + return { + "status": "accepted", + "intent_type": intent.value, + "identifier": raw_identifier, + "message": "artist intent requires selection before enqueue", + "enqueued_count": 0, + } + + return _error_response(intent.value, raw_identifier, "intent type not implemented") + + +def _error_response(intent_type: str, identifier: str, message: str) -> Dict[str, Any]: + return { + "status": "error", + "intent_type": intent_type, + "identifier": identifier, + "message": message, + "enqueued_count": 0, + } + + +def _resolve_search_service(config: Any) -> Any: + if isinstance(config, dict): + return config.get("search_service") + return getattr(config, "search_service", None) + + +def _resolve_playlist_name(playlist_id: str, config: Any) -> str: + if not isinstance(config, dict): + return playlist_id + entries = config.get("spotify_playlists") or [] + for entry in entries: + if not isinstance(entry, dict): + continue + candidate = str(entry.get("playlist_id") or "").strip() + if candidate and candidate == playlist_id: + name = str(entry.get("name") or "").strip() + if name: + return name + return playlist_id + + +def _fetch_spotify_track(spotify_client: Any, track_id: str) -> dict[str, Any] | None: + encoded = quote(track_id, safe="") + payload = spotify_client._request_json( + f"https://api.spotify.com/v1/tracks/{encoded}", + params={"market": "from_token"}, + ) + if not isinstance(payload, dict) or not payload.get("id"): + return None + return _normalize_track_payload(payload, album_name=(payload.get("album") or {}).get("name")) + + +def _fetch_spotify_album_tracks(spotify_client: Any, album_id: str) -> tuple[str, list[dict[str, Any]]]: + encoded = quote(album_id, safe="") + album = spotify_client._request_json( + f"https://api.spotify.com/v1/albums/{encoded}", + params={"fields": "name,tracks(items(id,name,duration_ms,artists(name),disc_number,track_number),next)"}, + ) + title = str(album.get("name") or "") + tracks_page = album.get("tracks") or {} + items: list[dict[str, Any]] = [] + while True: + for raw in tracks_page.get("items") or []: + if not raw or not raw.get("id"): + continue + items.append(_normalize_track_payload(raw, album_name=title)) + next_url = tracks_page.get("next") + if not next_url: + break + tracks_page = spotify_client._request_json(str(next_url)) + return title, items + + +async def run_spotify_album_sync( + album_id: str, + config, + db, + queue, + spotify_client, +) -> Dict[str, Any]: + """Run a one-shot Spotify album sync using existing enqueue and rebuild pipelines. + + Behavior mirrors playlist sync style orchestration: + - fetch album metadata + ordered tracks, + - enqueue each track via ``enqueue_spotify_track``, + - best-effort rebuild of an album-scoped M3U from downloaded canonical paths. + """ + album_identifier = str(album_id or "").strip() + if not album_identifier: + return { + "status": "error", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": "album_id is required", + "enqueued_count": 0, + } + + search_service = _resolve_search_service(config) + if search_service is None: + return { + "status": "error", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": "search_service is required for spotify_album execution", + "enqueued_count": 0, + } + + try: + album_title, album_tracks, album_artist = _fetch_spotify_album_tracks_with_artist( + spotify_client, + album_identifier, + ) + except Exception as exc: + return { + "status": "error", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": f"album fetch failed: {exc}", + "enqueued_count": 0, + } + + if not album_tracks: + return { + "status": "error", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": "album contains no tracks", + "enqueued_count": 0, + } + + playlist_id = f"spotify_album_{album_identifier}" + enqueued = 0 + for track in album_tracks: + await enqueue_spotify_track( + queue=queue, + spotify_track=track, + search_service=search_service, + playlist_id=playlist_id, + ) + enqueued += 1 + + try: + downloaded_paths = _load_downloaded_paths_for_playlist(playlist_id) + playlist_root, music_root = _resolve_playlist_dirs(config) + artist_name = album_artist or "Unknown Artist" + album_name = album_title or album_identifier + playlist_name = f"Spotify - Album - {artist_name} - {album_name}" + rebuild_playlist_from_tracks( + playlist_name=playlist_name, + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=downloaded_paths, + ) + except Exception: + logging.exception("Album M3U rebuild failed for album %s", album_identifier) + + return { + "status": "accepted", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": f"album sync completed: {album_title or album_identifier}", + "enqueued_count": enqueued, + } + + +def _fetch_spotify_album_tracks_with_artist( + spotify_client: Any, + album_id: str, +) -> tuple[str, list[dict[str, Any]], str]: + encoded = quote(album_id, safe="") + album = spotify_client._request_json( + f"https://api.spotify.com/v1/albums/{encoded}", + params={ + "fields": ( + "name,artists(name)," + "tracks(items(id,name,duration_ms,artists(name),disc_number,track_number,external_ids(isrc)),next)" + ) + }, + ) + title = str(album.get("name") or "") + album_artists = album.get("artists") or [] + album_artist = ( + album_artists[0].get("name") + if album_artists and isinstance(album_artists[0], dict) + else "" + ) + + tracks_page = album.get("tracks") or {} + items: list[dict[str, Any]] = [] + while True: + for raw in tracks_page.get("items") or []: + if not raw or not raw.get("id"): + continue + items.append(_normalize_track_payload(raw, album_name=title)) + next_url = tracks_page.get("next") + if not next_url: + break + tracks_page = spotify_client._request_json(str(next_url)) + return title, items, str(album_artist or "") + + +def _resolve_db_path() -> str: + return os.environ.get("RETREIVR_DB_PATH", os.path.join(os.getcwd(), "retreivr.sqlite3")) + + +def _load_downloaded_paths_for_playlist(playlist_id: str) -> list[str]: + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(_resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + cur = conn.cursor() + cur.execute( + """ + SELECT file_path + FROM downloaded_music_tracks + WHERE playlist_id=? + ORDER BY downloaded_at ASC, id ASC + """, + (playlist_id,), + ) + rows = cur.fetchall() + return [str(row["file_path"]) for row in rows if row["file_path"]] + except sqlite3.Error: + logging.exception("Failed to load downloaded tracks for playlist %s", playlist_id) + return [] + finally: + if conn is not None: + conn.close() + + +def _resolve_playlist_dirs(config: Any) -> tuple[Path, Path]: + cfg = config if isinstance(config, dict) else {} + music_root = Path(str(cfg.get("music_download_folder") or "Music")) + playlist_root = Path( + str(cfg.get("playlists_folder") or cfg.get("playlist_export_folder") or (music_root / "Playlists")) + ) + return playlist_root, music_root + + +def _normalize_track_payload(track: dict[str, Any], *, album_name: str | None = None) -> dict[str, Any]: + artists = track.get("artists") or [] + first_artist = artists[0].get("name") if artists and isinstance(artists[0], dict) else None + external_ids = track.get("external_ids") or {} + return { + "spotify_track_id": track.get("id"), + "artist": first_artist, + "title": track.get("name"), + "album": album_name or ((track.get("album") or {}).get("name")), + "duration_ms": track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + "track_num": track.get("track_number"), + "disc_num": track.get("disc_number"), + } diff --git a/api/main.py b/api/main.py index 1642302..a041c8e 100644 --- a/api/main.py +++ b/api/main.py @@ -108,7 +108,9 @@ def _require_python_311(): ) from engine.runtime import get_runtime_info from input.intent_router import IntentType, detect_intent +from api.intent_dispatcher import execute_intent as dispatch_intent from spotify.client import SpotifyPlaylistClient +from db.playlist_snapshots import PlaylistSnapshotStore APP_NAME = "Retreivr API" STATUS_SCHEMA_VERSION = 1 @@ -525,6 +527,20 @@ class IntentExecutePayload(BaseModel): identifier: str +class _IntentQueueAdapter: + """Minimal queue adapter for intent-dispatched Spotify payloads.""" + + def enqueue(self, payload: dict) -> None: + if not hasattr(app.state, "intent_dispatch_queue"): + app.state.intent_dispatch_queue = [] + app.state.intent_dispatch_queue.append(payload) + logging.info( + "Intent payload queued playlist_id=%s spotify_track_id=%s", + payload.get("playlist_id"), + payload.get("spotify_track_id"), + ) + + class SafeJSONResponse(JSONResponse): def render(self, content): return json.dumps( @@ -3365,11 +3381,20 @@ async def execute_intent(payload: dict = Body(...)): intent_type = IntentType(intent_raw) except ValueError as exc: raise HTTPException(status_code=400, detail="invalid intent_type") from exc - return { - "status": "accepted", - "intent_type": intent_type.value, - "identifier": identifier, - } + config = _read_config_or_404() + dispatcher_config = dict(config) + dispatcher_config["search_service"] = app.state.search_service + db = PlaylistSnapshotStore(app.state.paths.db_path) + queue = _IntentQueueAdapter() + spotify_client = SpotifyPlaylistClient() + return await dispatch_intent( + intent_type=intent_type.value, + identifier=identifier, + config=dispatcher_config, + db=db, + queue=queue, + spotify_client=spotify_client, + ) @app.post("/api/intent/preview") diff --git a/tests/test_api_intent_execute.py b/tests/test_api_intent_execute.py new file mode 100644 index 0000000..b9c01c9 --- /dev/null +++ b/tests/test_api_intent_execute.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import importlib +import sys + +import pytest + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +def _build_client(monkeypatch) -> TestClient: + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return TestClient(module.app) + + +def test_api_intent_execute_delegates_to_dispatcher(monkeypatch) -> None: + client = _build_client(monkeypatch) + + expected = { + "status": "accepted", + "intent_type": "spotify_album", + "identifier": "album-123", + "message": "album sync completed", + "enqueued_count": 7, + } + + async def _fake_dispatch_intent(*, intent_type, identifier, config, db, queue, spotify_client): + assert intent_type == "spotify_album" + assert identifier == "album-123" + assert config is not None + assert db is not None + assert queue is not None + assert spotify_client is not None + return expected + + monkeypatch.setattr("api.main.dispatch_intent", _fake_dispatch_intent) + + response = client.post( + "/api/intent/execute", + json={ + "intent_type": "spotify_album", + "identifier": "album-123", + }, + ) + + assert response.status_code == 200 + assert response.json() == expected + + +def test_api_intent_execute_invalid_intent_type_returns_400(monkeypatch) -> None: + client = _build_client(monkeypatch) + + response = client.post( + "/api/intent/execute", + json={ + "intent_type": "invalid_intent", + "identifier": "abc", + }, + ) + + assert response.status_code == 400 diff --git a/tests/test_intent_dispatcher.py b/tests/test_intent_dispatcher.py new file mode 100644 index 0000000..bacff3c --- /dev/null +++ b/tests/test_intent_dispatcher.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +import asyncio +from typing import Any + +from api.intent_dispatcher import execute_intent + + +def test_execute_intent_spotify_artist_requires_selection() -> None: + result = asyncio.run( + execute_intent( + intent_type="spotify_artist", + identifier="artist-123", + config={}, + db=object(), + queue=object(), + spotify_client=object(), + ) + ) + + assert result["status"] == "accepted" + assert result["intent_type"] == "spotify_artist" + assert result["identifier"] == "artist-123" + assert "selection" in result["message"].lower() + assert result["enqueued_count"] == 0 + + +def test_execute_intent_spotify_playlist_triggers_playlist_sync(monkeypatch) -> None: + calls: list[dict[str, Any]] = [] + + def _fake_playlist_watch_job(spotify_client, db, queue, playlist_id, *, playlist_name=None, config=None): + calls.append( + { + "spotify_client": spotify_client, + "db": db, + "queue": queue, + "playlist_id": playlist_id, + "playlist_name": playlist_name, + "config": config, + } + ) + return {"status": "updated", "enqueued": 2} + + monkeypatch.setattr("api.intent_dispatcher.playlist_watch_job", _fake_playlist_watch_job) + + db = object() + queue = object() + spotify_client = object() + result = asyncio.run( + execute_intent( + intent_type="spotify_playlist", + identifier="playlist-abc", + config={"spotify_playlists": []}, + db=db, + queue=queue, + spotify_client=spotify_client, + ) + ) + + assert len(calls) == 1 + assert calls[0]["playlist_id"] == "playlist-abc" + assert calls[0]["db"] is db + assert calls[0]["queue"] is queue + assert calls[0]["spotify_client"] is spotify_client + assert result["status"] == "accepted" + assert result["enqueued_count"] == 2 + + +def test_execute_intent_spotify_album_triggers_album_sync(monkeypatch) -> None: + calls: list[dict[str, Any]] = [] + + async def _fake_album_sync(album_id, config, db, queue, spotify_client): + calls.append( + { + "album_id": album_id, + "config": config, + "db": db, + "queue": queue, + "spotify_client": spotify_client, + } + ) + return { + "status": "accepted", + "intent_type": "spotify_album", + "identifier": album_id, + "message": "album sync completed", + "enqueued_count": 4, + } + + monkeypatch.setattr("api.intent_dispatcher.run_spotify_album_sync", _fake_album_sync) + + db = object() + queue = object() + spotify_client = object() + result = asyncio.run( + execute_intent( + intent_type="spotify_album", + identifier="album-xyz", + config={"search_service": object()}, + db=db, + queue=queue, + spotify_client=spotify_client, + ) + ) + + assert len(calls) == 1 + assert calls[0]["album_id"] == "album-xyz" + assert calls[0]["db"] is db + assert calls[0]["queue"] is queue + assert calls[0]["spotify_client"] is spotify_client + assert result["status"] == "accepted" + assert result["enqueued_count"] == 4 + + +def test_execute_intent_spotify_track_enqueues_once(monkeypatch) -> None: + calls: list[dict[str, Any]] = [] + + monkeypatch.setattr( + "api.intent_dispatcher._fetch_spotify_track", + lambda _spotify_client, track_id: { + "spotify_track_id": track_id, + "artist": "Artist", + "title": "Title", + "album": "Album", + "duration_ms": 123000, + "isrc": "USABC1234567", + }, + ) + + async def _fake_enqueue_spotify_track(*, queue, spotify_track, search_service, playlist_id): + calls.append( + { + "queue": queue, + "spotify_track": spotify_track, + "search_service": search_service, + "playlist_id": playlist_id, + } + ) + + monkeypatch.setattr("api.intent_dispatcher.enqueue_spotify_track", _fake_enqueue_spotify_track) + + queue = object() + search_service = object() + result = asyncio.run( + execute_intent( + intent_type="spotify_track", + identifier="track-777", + config={"search_service": search_service}, + db=object(), + queue=queue, + spotify_client=object(), + ) + ) + + assert len(calls) == 1 + assert calls[0]["queue"] is queue + assert calls[0]["spotify_track"]["spotify_track_id"] == "track-777" + assert calls[0]["search_service"] is search_service + assert calls[0]["playlist_id"] == "spotify_track_track-777" + assert result["status"] == "accepted" + assert result["enqueued_count"] == 1 From f599d4cf5069c743db6b1565fe4ad8b9de0a6f5d Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 15:05:52 -0600 Subject: [PATCH 18/45] feat(spotify-oauth): add deterministic SQLite token store and lifecycle tests Added oauth_store.py: SpotifyOAuthToken dataclass (access_token, refresh_token, expires_at, scope) SpotifyOAuthStore with single-row persistence (id=1) table bootstrap via _ensure_table() save() upsert semantics load() returns token or None clear() deletes stored token Uses direct sqlite3; no encryption and no network/OAuth calls Added deterministic unit test coverage: test_spotify_oauth_store.py Verifies save/load equality, overwrite behavior, and clear -> None behavior using tmp_path DB --- spotify/oauth_store.py | 115 ++++++++++++++++++++++++++++++ tests/test_spotify_oauth_store.py | 41 +++++++++++ 2 files changed, 156 insertions(+) create mode 100644 spotify/oauth_store.py create mode 100644 tests/test_spotify_oauth_store.py diff --git a/spotify/oauth_store.py b/spotify/oauth_store.py new file mode 100644 index 0000000..fdcd329 --- /dev/null +++ b/spotify/oauth_store.py @@ -0,0 +1,115 @@ +"""SQLite persistence for optional Spotify OAuth tokens.""" + +from __future__ import annotations + +import sqlite3 +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + + +@dataclass +class SpotifyOAuthToken: + access_token: str + refresh_token: str + expires_at: int # epoch seconds + scope: str + + +class SpotifyOAuthStore: + """Single-row SQLite storage for Spotify OAuth tokens.""" + + def __init__(self, db_path: Path): + self.db_path = Path(db_path) + self._ensure_table() + + def _connect(self) -> sqlite3.Connection: + conn = sqlite3.connect(str(self.db_path), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + return conn + + def _ensure_table(self): + """Create token table when it does not already exist.""" + conn = self._connect() + try: + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE IF NOT EXISTS spotify_oauth_tokens ( + id INTEGER PRIMARY KEY, + access_token TEXT NOT NULL, + refresh_token TEXT NOT NULL, + expires_at INTEGER NOT NULL, + scope TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """ + ) + conn.commit() + finally: + conn.close() + + def save(self, token: SpotifyOAuthToken) -> None: + """Upsert a single token row using fixed key ``id=1``.""" + updated_at = datetime.now(timezone.utc).isoformat() + conn = self._connect() + try: + cur = conn.cursor() + cur.execute( + """ + INSERT INTO spotify_oauth_tokens (id, access_token, refresh_token, expires_at, scope, updated_at) + VALUES (1, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + access_token=excluded.access_token, + refresh_token=excluded.refresh_token, + expires_at=excluded.expires_at, + scope=excluded.scope, + updated_at=excluded.updated_at + """, + ( + token.access_token, + token.refresh_token, + int(token.expires_at), + token.scope, + updated_at, + ), + ) + conn.commit() + finally: + conn.close() + + def load(self) -> Optional[SpotifyOAuthToken]: + """Load token from row ``id=1``; return ``None`` when absent.""" + conn = self._connect() + try: + cur = conn.cursor() + cur.execute( + """ + SELECT access_token, refresh_token, expires_at, scope + FROM spotify_oauth_tokens + WHERE id=1 + LIMIT 1 + """ + ) + row = cur.fetchone() + if not row: + return None + return SpotifyOAuthToken( + access_token=str(row["access_token"]), + refresh_token=str(row["refresh_token"]), + expires_at=int(row["expires_at"]), + scope=str(row["scope"]), + ) + finally: + conn.close() + + def clear(self) -> None: + """Delete stored token row.""" + conn = self._connect() + try: + cur = conn.cursor() + cur.execute("DELETE FROM spotify_oauth_tokens WHERE id=1") + conn.commit() + finally: + conn.close() diff --git a/tests/test_spotify_oauth_store.py b/tests/test_spotify_oauth_store.py new file mode 100644 index 0000000..8979dbe --- /dev/null +++ b/tests/test_spotify_oauth_store.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from spotify.oauth_store import SpotifyOAuthStore, SpotifyOAuthToken + + +def test_spotify_oauth_store_lifecycle(tmp_path) -> None: + db_path = tmp_path / "spotify_oauth.sqlite" + store = SpotifyOAuthStore(db_path) + + first = SpotifyOAuthToken( + access_token="access-1", + refresh_token="refresh-1", + expires_at=1_800_000_000, + scope="user-library-read", + ) + store.save(first) + + loaded_first = store.load() + assert loaded_first is not None + assert loaded_first.access_token == first.access_token + assert loaded_first.refresh_token == first.refresh_token + assert loaded_first.expires_at == first.expires_at + assert loaded_first.scope == first.scope + + second = SpotifyOAuthToken( + access_token="access-2", + refresh_token="refresh-2", + expires_at=1_900_000_000, + scope="user-library-read playlist-read-private", + ) + store.save(second) + + loaded_second = store.load() + assert loaded_second is not None + assert loaded_second.access_token == second.access_token + assert loaded_second.refresh_token == second.refresh_token + assert loaded_second.expires_at == second.expires_at + assert loaded_second.scope == second.scope + + store.clear() + assert store.load() is None From 66e7ea3d99c8a493c6c2a6ce8bbc606bda1b9201 Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 15:10:54 -0600 Subject: [PATCH 19/45] feat(spotify-oauth): add refresh helper, valid-token resolver, and deterministic refresh tests Extended oauth_client.py: Added refresh_access_token(client_id, client_secret, refresh_token) -> dict Performs token refresh request to Spotify token endpoint Raises on non-200 responses Returns parsed JSON payload on success Extended oauth_store.py: Added get_valid_token(client_id, client_secret) Loads stored token, checks expiration, refreshes when expired, saves updated token Clears token and returns None if refresh fails Added deterministic test coverage in test_spotify_oauth_refresh.py: not-expired token returns unchanged token expired token refreshes and persists updated values refresh failure clears token and returns None all refresh behavior mocked via monkeypatch (no Spotify network calls) --- api/main.py | 124 ++++++++++++++++++++++++++ spotify/oauth_client.py | 53 +++++++++++ spotify/oauth_store.py | 42 +++++++++ tests/test_spotify_oauth_endpoints.py | 91 +++++++++++++++++++ tests/test_spotify_oauth_refresh.py | 79 ++++++++++++++++ 5 files changed, 389 insertions(+) create mode 100644 spotify/oauth_client.py create mode 100644 tests/test_spotify_oauth_endpoints.py create mode 100644 tests/test_spotify_oauth_refresh.py diff --git a/api/main.py b/api/main.py index a041c8e..5793020 100644 --- a/api/main.py +++ b/api/main.py @@ -28,6 +28,7 @@ def _require_python_311(): import tempfile import threading import time +from pathlib import Path from datetime import datetime, timedelta, timezone from zoneinfo import ZoneInfo from uuid import uuid4 @@ -109,7 +110,9 @@ def _require_python_311(): from engine.runtime import get_runtime_info from input.intent_router import IntentType, detect_intent from api.intent_dispatcher import execute_intent as dispatch_intent +from spotify.oauth_client import SPOTIFY_TOKEN_URL, build_auth_url from spotify.client import SpotifyPlaylistClient +from spotify.oauth_store import SpotifyOAuthStore, SpotifyOAuthToken from db.playlist_snapshots import PlaylistSnapshotStore APP_NAME = "Retreivr API" @@ -3552,6 +3555,127 @@ async def spotify_playlist_status(): return {"statuses": app.state.spotify_import_status} +@app.get("/api/spotify/oauth/connect") +async def spotify_oauth_connect(): + """Build Spotify OAuth connect URL and store anti-CSRF state in memory.""" + config = _read_config_or_404() + spotify_cfg = (config.get("spotify") or {}) if isinstance(config, dict) else {} + client_id = ( + str(spotify_cfg.get("client_id") or config.get("SPOTIFY_CLIENT_ID") or "").strip() + if isinstance(config, dict) + else "" + ) + redirect_uri = ( + str(spotify_cfg.get("redirect_uri") or config.get("SPOTIFY_REDIRECT_URI") or "").strip() + if isinstance(config, dict) + else "" + ) + if not client_id: + raise HTTPException(status_code=400, detail="SPOTIFY_CLIENT_ID is required in config") + if not redirect_uri: + raise HTTPException(status_code=400, detail="SPOTIFY_REDIRECT_URI is required in config") + + state = str(uuid4()) + app.state.spotify_oauth_state = state + scope = "user-library-read playlist-read-private playlist-read-collaborative" + auth_url = build_auth_url( + client_id=client_id, + redirect_uri=redirect_uri, + scope=scope, + state=state, + ) + return {"auth_url": auth_url} + + +@app.get("/api/spotify/oauth/callback") +async def spotify_oauth_callback(code: str | None = None, state: str | None = None, error: str | None = None): + """Handle Spotify OAuth callback and persist tokens.""" + if error: + raise HTTPException(status_code=400, detail=f"spotify_oauth_error: {error}") + if not code: + raise HTTPException(status_code=400, detail="missing code") + if not state: + raise HTTPException(status_code=400, detail="missing state") + + expected_state = str(getattr(app.state, "spotify_oauth_state", "") or "") + if not expected_state or state != expected_state: + raise HTTPException(status_code=400, detail="invalid oauth state") + + config = _read_config_or_404() + spotify_cfg = (config.get("spotify") or {}) if isinstance(config, dict) else {} + client_id = ( + str(spotify_cfg.get("client_id") or config.get("SPOTIFY_CLIENT_ID") or "").strip() + if isinstance(config, dict) + else "" + ) + client_secret = ( + str(spotify_cfg.get("client_secret") or config.get("SPOTIFY_CLIENT_SECRET") or "").strip() + if isinstance(config, dict) + else "" + ) + redirect_uri = ( + str(spotify_cfg.get("redirect_uri") or config.get("SPOTIFY_REDIRECT_URI") or "").strip() + if isinstance(config, dict) + else "" + ) + if not client_id: + raise HTTPException(status_code=400, detail="SPOTIFY_CLIENT_ID is required in config") + if not client_secret: + raise HTTPException(status_code=400, detail="SPOTIFY_CLIENT_SECRET is required in config") + if not redirect_uri: + raise HTTPException(status_code=400, detail="SPOTIFY_REDIRECT_URI is required in config") + + try: + token_response = requests.post( + SPOTIFY_TOKEN_URL, + data={ + "grant_type": "authorization_code", + "code": code, + "redirect_uri": redirect_uri, + "client_id": client_id, + "client_secret": client_secret, + }, + timeout=20, + ) + except Exception as exc: + raise HTTPException(status_code=400, detail=f"token exchange failed: {exc}") from exc + + if token_response.status_code != 200: + detail = (token_response.text or "").strip() or f"status={token_response.status_code}" + raise HTTPException(status_code=400, detail=f"token exchange failed: {detail}") + + payload = token_response.json() + access_token = str(payload.get("access_token") or "").strip() + refresh_token = str(payload.get("refresh_token") or "").strip() + expires_in = payload.get("expires_in") + scope = str(payload.get("scope") or "").strip() + if not access_token: + raise HTTPException(status_code=400, detail="token exchange failed: missing access_token") + if not refresh_token: + raise HTTPException(status_code=400, detail="token exchange failed: missing refresh_token") + if expires_in is None: + raise HTTPException(status_code=400, detail="token exchange failed: missing expires_in") + if not scope: + raise HTTPException(status_code=400, detail="token exchange failed: missing scope") + + try: + expires_at = int(time.time()) + int(expires_in) + except (TypeError, ValueError) as exc: + raise HTTPException(status_code=400, detail="token exchange failed: invalid expires_in") from exc + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + store.save( + SpotifyOAuthToken( + access_token=access_token, + refresh_token=refresh_token, + expires_at=expires_at, + scope=scope, + ) + ) + app.state.spotify_oauth_state = None + return {"status": "connected"} + + @app.get("/api/search/items/{item_id}/candidates") async def get_search_candidates(item_id: str): service = app.state.search_service diff --git a/spotify/oauth_client.py b/spotify/oauth_client.py new file mode 100644 index 0000000..cd0c481 --- /dev/null +++ b/spotify/oauth_client.py @@ -0,0 +1,53 @@ +"""Spotify OAuth client helpers.""" + +from __future__ import annotations + +import os +import time +from urllib.parse import urlencode + +import requests + +SPOTIFY_AUTH_URL = "https://accounts.spotify.com/authorize" +SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token" + + +def build_auth_url(client_id: str, redirect_uri: str, scope: str, state: str) -> str: + """Build Spotify authorization URL.""" + params = { + "client_id": client_id, + "response_type": "code", + "redirect_uri": redirect_uri, + "scope": scope, + "state": state, + } + return f"{SPOTIFY_AUTH_URL}?{urlencode(params)}" + + +def refresh_access_token( + client_id: str, + client_secret: str, + refresh_token: str, +) -> dict: + """Exchange refresh token for a new Spotify access token payload. + + Returns: + Parsed JSON token response from Spotify. + + Raises: + Exception: When request fails or response code is non-200. + """ + response = requests.post( + SPOTIFY_TOKEN_URL, + data={ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + "client_secret": client_secret, + }, + timeout=20, + ) + if response.status_code != 200: + detail = (response.text or "").strip() or f"status={response.status_code}" + raise Exception(f"spotify refresh failed: {detail}") + return response.json() diff --git a/spotify/oauth_store.py b/spotify/oauth_store.py index fdcd329..4be9a09 100644 --- a/spotify/oauth_store.py +++ b/spotify/oauth_store.py @@ -3,11 +3,14 @@ from __future__ import annotations import sqlite3 +import time from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path from typing import Optional +from spotify.oauth_client import refresh_access_token + @dataclass class SpotifyOAuthToken: @@ -113,3 +116,42 @@ def clear(self) -> None: conn.commit() finally: conn.close() + + def get_valid_token(self, client_id: str, client_secret: str) -> Optional[SpotifyOAuthToken]: + """Return a valid token, refreshing and persisting it when expired. + + Behavior: + - If no token is stored, return ``None``. + - If token is not expired, return as-is. + - If expired, attempt refresh and persist updated token. + - If refresh fails, clear stored token and return ``None``. + """ + token = self.load() + if token is None: + return None + + now = int(time.time()) + if int(token.expires_at) > now: + return token + + try: + payload = refresh_access_token( + client_id=client_id, + client_secret=client_secret, + refresh_token=token.refresh_token, + ) + new_access_token = str(payload.get("access_token") or "").strip() + expires_in = payload.get("expires_in") + if not new_access_token or expires_in is None: + raise ValueError("refresh payload missing access_token or expires_in") + refreshed = SpotifyOAuthToken( + access_token=new_access_token, + refresh_token=str(payload.get("refresh_token") or token.refresh_token), + expires_at=now + int(expires_in), + scope=str(payload.get("scope") or token.scope), + ) + self.save(refreshed) + return refreshed + except Exception: + self.clear() + return None diff --git a/tests/test_spotify_oauth_endpoints.py b/tests/test_spotify_oauth_endpoints.py new file mode 100644 index 0000000..4ccfd05 --- /dev/null +++ b/tests/test_spotify_oauth_endpoints.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import importlib +import sys +from types import SimpleNamespace + +import pytest + +pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + +from spotify.oauth_store import SpotifyOAuthStore + + +def _build_client(monkeypatch, tmp_path) -> tuple[TestClient, object]: + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + + db_path = tmp_path / "oauth_endpoints.sqlite" + module.app.state.paths = SimpleNamespace(db_path=str(db_path)) + module.app.state.spotify_oauth_state = None + monkeypatch.setattr( + module, + "_read_config_or_404", + lambda: { + "spotify": { + "client_id": "test-client-id", + "client_secret": "test-client-secret", + "redirect_uri": "http://localhost/callback", + } + }, + ) + return TestClient(module.app), module + + +def test_oauth_connect_returns_auth_url_with_client_id(monkeypatch, tmp_path) -> None: + client, _module = _build_client(monkeypatch, tmp_path) + + response = client.get("/api/spotify/oauth/connect") + + assert response.status_code == 200 + payload = response.json() + auth_url = payload["auth_url"] + assert "accounts.spotify.com/authorize" in auth_url + assert "client_id=test-client-id" in auth_url + + +def test_oauth_callback_stores_token_and_returns_connected(monkeypatch, tmp_path) -> None: + client, module = _build_client(monkeypatch, tmp_path) + module.app.state.spotify_oauth_state = "state-123" + + class _FakeResponse: + status_code = 200 + text = "" + + @staticmethod + def json(): + return { + "access_token": "access-token", + "refresh_token": "refresh-token", + "expires_in": 3600, + "scope": "user-library-read", + } + + monkeypatch.setattr("api.main.requests.post", lambda *args, **kwargs: _FakeResponse()) + + response = client.get("/api/spotify/oauth/callback?code=abc&state=state-123") + + assert response.status_code == 200 + assert response.json() == {"status": "connected"} + + store = SpotifyOAuthStore(tmp_path / "oauth_endpoints.sqlite") + token = store.load() + assert token is not None + assert token.access_token == "access-token" + assert token.refresh_token == "refresh-token" + assert token.scope == "user-library-read" + assert token.expires_at > 0 + + +def test_oauth_callback_invalid_state_returns_400(monkeypatch, tmp_path) -> None: + client, module = _build_client(monkeypatch, tmp_path) + module.app.state.spotify_oauth_state = "expected-state" + + response = client.get("/api/spotify/oauth/callback?code=abc&state=wrong-state") + + assert response.status_code == 400 diff --git a/tests/test_spotify_oauth_refresh.py b/tests/test_spotify_oauth_refresh.py new file mode 100644 index 0000000..2a68d79 --- /dev/null +++ b/tests/test_spotify_oauth_refresh.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import time + +from spotify.oauth_store import SpotifyOAuthStore, SpotifyOAuthToken + + +def test_get_valid_token_returns_original_when_not_expired(tmp_path) -> None: + store = SpotifyOAuthStore(tmp_path / "oauth_refresh.sqlite") + token = SpotifyOAuthToken( + access_token="access-current", + refresh_token="refresh-current", + expires_at=int(time.time()) + 3600, + scope="user-library-read", + ) + store.save(token) + + result = store.get_valid_token(client_id="cid", client_secret="secret") + + assert result is not None + assert result.access_token == "access-current" + assert result.refresh_token == "refresh-current" + assert result.scope == "user-library-read" + + +def test_get_valid_token_refreshes_and_updates_db_when_expired(tmp_path, monkeypatch) -> None: + store = SpotifyOAuthStore(tmp_path / "oauth_refresh.sqlite") + old = SpotifyOAuthToken( + access_token="old-access", + refresh_token="old-refresh", + expires_at=int(time.time()) - 10, + scope="user-library-read", + ) + store.save(old) + + monkeypatch.setattr( + "spotify.oauth_store.refresh_access_token", + lambda client_id, client_secret, refresh_token: { + "access_token": "new-access", + "refresh_token": "new-refresh", + "expires_in": 7200, + "scope": "user-library-read playlist-read-private", + }, + ) + + result = store.get_valid_token(client_id="cid", client_secret="secret") + + assert result is not None + assert result.access_token == "new-access" + assert result.refresh_token == "new-refresh" + assert result.scope == "user-library-read playlist-read-private" + assert result.expires_at > int(time.time()) + + persisted = store.load() + assert persisted is not None + assert persisted.access_token == "new-access" + assert persisted.refresh_token == "new-refresh" + assert persisted.scope == "user-library-read playlist-read-private" + + +def test_get_valid_token_clears_token_when_refresh_fails(tmp_path, monkeypatch) -> None: + store = SpotifyOAuthStore(tmp_path / "oauth_refresh.sqlite") + token = SpotifyOAuthToken( + access_token="expired-access", + refresh_token="expired-refresh", + expires_at=int(time.time()) - 10, + scope="user-library-read", + ) + store.save(token) + + def _raise_refresh_error(client_id, client_secret, refresh_token): + raise RuntimeError("refresh failed") + + monkeypatch.setattr("spotify.oauth_store.refresh_access_token", _raise_refresh_error) + + result = store.get_valid_token(client_id="cid", client_secret="secret") + + assert result is None + assert store.load() is None From cfaba13a3245d9ce2dc90e43256bfbe9fea1649d Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 15:19:27 -0600 Subject: [PATCH 20/45] feat(spotify-oauth): add token refresh + client injection with failure alerting Added refresh_access_token(...) in oauth_client.py for refresh-token exchange against Spotify token endpoint. Extended SpotifyOAuthStore.get_valid_token(...) in oauth_store.py to refresh expired tokens, persist updates, clear invalid tokens, and send best-effort Telegram notification on refresh failure. Updated Spotify client bootstrap path in main.py to prefer valid OAuth access tokens when available and gracefully fall back to public/client-credentials mode when unavailable. Added OAuth injection coverage in test_spotify_oauth_injection.py to verify client construction with and without access_token via monkeypatched token store behavior. --- api/main.py | 39 +++++++++++- spotify/client.py | 81 ++++++++++++++++++++++++ spotify/oauth_store.py | 27 +++++++- tests/test_spotify_oauth_injection.py | 90 +++++++++++++++++++++++++++ 4 files changed, 234 insertions(+), 3 deletions(-) create mode 100644 tests/test_spotify_oauth_injection.py diff --git a/api/main.py b/api/main.py index 5793020..fc0cd64 100644 --- a/api/main.py +++ b/api/main.py @@ -1710,6 +1710,40 @@ def _read_config_or_404(): return safe_json(_strip_deprecated_fields(config)) +def _spotify_client_credentials(config: dict | None) -> tuple[str, str]: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + client_id = str(spotify_cfg.get("client_id") or cfg.get("SPOTIFY_CLIENT_ID") or "").strip() + client_secret = str(spotify_cfg.get("client_secret") or cfg.get("SPOTIFY_CLIENT_SECRET") or "").strip() + return client_id, client_secret + + +def _build_spotify_client_with_optional_oauth(config: dict | None) -> SpotifyPlaylistClient: + """Build a Spotify client using OAuth access token when valid, else public mode.""" + client_id, client_secret = _spotify_client_credentials(config) + if not client_id or not client_secret: + return SpotifyPlaylistClient() + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + existing = store.load() + try: + token = store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + except Exception as exc: + logging.warning("Spotify OAuth token validation failed; using public mode: %s", exc) + token = None + + if token is not None: + return SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ) + + if existing is not None: + logging.warning("Spotify OAuth token expired/invalid and was cleared; using public mode") + return SpotifyPlaylistClient(client_id=client_id, client_secret=client_secret) + + def _read_config_for_scheduler(): config_path = app.state.config_path if not os.path.exists(config_path): @@ -3389,7 +3423,7 @@ async def execute_intent(payload: dict = Body(...)): dispatcher_config["search_service"] = app.state.search_service db = PlaylistSnapshotStore(app.state.paths.db_path) queue = _IntentQueueAdapter() - spotify_client = SpotifyPlaylistClient() + spotify_client = _build_spotify_client_with_optional_oauth(config) return await dispatch_intent( intent_type=intent_type.value, identifier=identifier, @@ -3417,7 +3451,8 @@ async def preview_intent(payload: dict = Body(...)): if intent_type not in {IntentType.SPOTIFY_ALBUM, IntentType.SPOTIFY_PLAYLIST}: raise HTTPException(status_code=400, detail="intent preview not supported for this intent_type") - client = SpotifyPlaylistClient() + config = _read_config_or_404() + client = _build_spotify_client_with_optional_oauth(config) encoded = quote(identifier, safe="") try: if intent_type == IntentType.SPOTIFY_ALBUM: diff --git a/spotify/client.py b/spotify/client.py index 0c31a95..6599538 100644 --- a/spotify/client.py +++ b/spotify/client.py @@ -4,6 +4,7 @@ import asyncio import base64 +import hashlib import os import time import urllib.parse @@ -36,15 +37,20 @@ def __init__( *, client_id: str | None = None, client_secret: str | None = None, + access_token: str | None = None, timeout_sec: int = 20, ) -> None: self.client_id = client_id or os.environ.get("SPOTIFY_CLIENT_ID") self.client_secret = client_secret or os.environ.get("SPOTIFY_CLIENT_SECRET") self.timeout_sec = timeout_sec + self._provided_access_token = (access_token or "").strip() or None self._access_token: str | None = None self._access_token_expire_at: float = 0.0 def _get_access_token(self) -> str: + if self._provided_access_token: + return self._provided_access_token + if not self.client_id or not self.client_secret: raise RuntimeError("Spotify credentials are required") @@ -141,6 +147,81 @@ def get_playlist_items(self, playlist_id: str) -> tuple[str, list[NormalizedItem return str(snapshot_id), items + async def get_liked_songs(self) -> tuple[str, list[dict[str, Any]]]: + """Fetch the authenticated user's saved tracks from Spotify. + + Returns: + A tuple of ``(snapshot_id, items)`` where ``snapshot_id`` is a deterministic + SHA-256 hash of the ordered track-id sequence, and ``items`` is an ordered + list of normalized track dicts matching playlist ingestion structure. + """ + if not self._provided_access_token: + raise RuntimeError("Spotify OAuth access_token is required for liked songs") + + fields = ( + "items(added_at,track(id,name,duration_ms,external_ids(isrc)," + "artists(name),album(id,name,release_date))),next,total" + ) + offset = 0 + limit = 50 + position = 0 + items: list[dict[str, Any]] = [] + ordered_track_ids: list[str] = [] + + while True: + payload = await _request_json_with_retry( + self, + "https://api.spotify.com/v1/me/tracks", + params={"limit": limit, "offset": offset, "fields": fields}, + ) + raw_items = payload.get("items") or [] + + for raw in raw_items: + track = raw.get("track") + if not isinstance(track, dict): + continue + + track_id = track.get("id") + if not track_id: + continue + + artists = track.get("artists") or [] + artist_names = [ + str(artist.get("name")).strip() + for artist in artists + if isinstance(artist, dict) and artist.get("name") + ] + first_artist = artist_names[0] if artist_names else None + album = track.get("album") or {} + external_ids = track.get("external_ids") or {} + + items.append( + { + "spotify_track_id": track_id, + "position": position, + "added_at": raw.get("added_at"), + "artist": first_artist, + "title": track.get("name"), + "album": album.get("name"), + "duration_ms": track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + "artists": artist_names, + "album_id": album.get("id"), + "album_release_date": album.get("release_date"), + } + ) + ordered_track_ids.append(str(track_id)) + position += 1 + + next_url = payload.get("next") + if not next_url: + break + offset += limit + + snapshot_source = "\n".join(ordered_track_ids).encode("utf-8") + snapshot_id = hashlib.sha256(snapshot_source).hexdigest() + return snapshot_id, items + async def _request_json_with_retry( spotify_client: SpotifyPlaylistClient, diff --git a/spotify/oauth_store.py b/spotify/oauth_store.py index 4be9a09..20fa7f6 100644 --- a/spotify/oauth_store.py +++ b/spotify/oauth_store.py @@ -117,7 +117,12 @@ def clear(self) -> None: finally: conn.close() - def get_valid_token(self, client_id: str, client_secret: str) -> Optional[SpotifyOAuthToken]: + def get_valid_token( + self, + client_id: str, + client_secret: str, + config: Optional[dict] = None, + ) -> Optional[SpotifyOAuthToken]: """Return a valid token, refreshing and persisting it when expired. Behavior: @@ -154,4 +159,24 @@ def get_valid_token(self, client_id: str, client_secret: str) -> Optional[Spotif return refreshed except Exception: self.clear() + telegram_cfg = (config or {}).get("telegram") if isinstance(config, dict) else None + if isinstance(telegram_cfg, dict) and bool(telegram_cfg.get("enabled")): + try: + send_telegram_message( + config, + "Spotify OAuth token expired and refresh failed. Reconnect required.", + ) + except Exception: + # Notification path is best-effort only. + pass return None + + +def send_telegram_message(config: Optional[dict], message: str) -> bool: + """Best-effort Telegram notification hook for OAuth lifecycle events.""" + try: + from engine.core import telegram_notify + + return bool(telegram_notify(config or {}, message)) + except Exception: + return False diff --git a/tests/test_spotify_oauth_injection.py b/tests/test_spotify_oauth_injection.py new file mode 100644 index 0000000..6e27289 --- /dev/null +++ b/tests/test_spotify_oauth_injection.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import importlib +import sys +from types import SimpleNamespace + +import pytest + +pytest.importorskip("fastapi") + + +class _FakeSpotifyClient: + def __init__(self, **kwargs): + self.kwargs = kwargs + + +class _FakeStoreWithToken: + def __init__(self, _db_path): + self._token = SimpleNamespace(access_token="oauth-access-token") + + def load(self): + return self._token + + def get_valid_token(self, _client_id, _client_secret, config=None): + return self._token + + +class _FakeStoreNoToken: + def __init__(self, _db_path): + self._token = None + + def load(self): + return self._token + + def get_valid_token(self, _client_id, _client_secret, config=None): + return None + + +def _import_api_main(monkeypatch): + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return module + + +def test_build_spotify_client_uses_oauth_access_token_when_valid(monkeypatch, tmp_path) -> None: + module = _import_api_main(monkeypatch) + module.app.state.paths = SimpleNamespace(db_path=str(tmp_path / "oauth.sqlite")) + + monkeypatch.setattr(module, "SpotifyOAuthStore", _FakeStoreWithToken) + monkeypatch.setattr(module, "SpotifyPlaylistClient", _FakeSpotifyClient) + + client = module._build_spotify_client_with_optional_oauth( + { + "spotify": { + "client_id": "client-id", + "client_secret": "client-secret", + } + } + ) + + assert isinstance(client, _FakeSpotifyClient) + assert client.kwargs["client_id"] == "client-id" + assert client.kwargs["client_secret"] == "client-secret" + assert client.kwargs["access_token"] == "oauth-access-token" + + +def test_build_spotify_client_falls_back_to_public_mode_when_no_token(monkeypatch, tmp_path) -> None: + module = _import_api_main(monkeypatch) + module.app.state.paths = SimpleNamespace(db_path=str(tmp_path / "oauth.sqlite")) + + monkeypatch.setattr(module, "SpotifyOAuthStore", _FakeStoreNoToken) + monkeypatch.setattr(module, "SpotifyPlaylistClient", _FakeSpotifyClient) + + client = module._build_spotify_client_with_optional_oauth( + { + "spotify": { + "client_id": "client-id", + "client_secret": "client-secret", + } + } + ) + + assert isinstance(client, _FakeSpotifyClient) + assert client.kwargs["client_id"] == "client-id" + assert client.kwargs["client_secret"] == "client-secret" + assert "access_token" not in client.kwargs From 6d9f35967053777e184369129429699f6f3fc774 Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 15:37:44 -0600 Subject: [PATCH 21/45] feat(spotify-saved-albums): add OAuth saved-albums sync, scheduler job, and deterministic tests Added SpotifyPlaylistClient.get_saved_albums() in client.py: OAuth-required /v1/me/albums fetch with pagination. Per-album track expansion via album endpoint + track pagination. Normalized album + ordered track payloads for album-sync compatibility. Deterministic snapshot hash from ordered album IDs. Added spotify_saved_albums_watch_job(...) in spotify_playlist_watch.py: Validates OAuth token before sync. Loads saved albums snapshot, diffs by album IDs, and triggers run_spotify_album_sync(...) only for newly added albums. Persists snapshot under __spotify_saved_albums__. Best-effort M3U rebuild as "Spotify - Saved Albums". Non-destructive behavior on removals (no local file deletions). Extended scheduler wiring in main.py: Added periodic job spotify_saved_albums_watch. Default interval 30 minutes (configurable). Silent skip when no valid OAuth token. Auto-activates immediately after successful OAuth callback. Preserves existing playlist and liked-songs scheduling behavior. Added network-free tests in test_spotify_saved_albums_sync.py: Verifies only new albums trigger album sync when token is valid. Verifies clean skip path when OAuth token is unavailable. --- api/main.py | 175 ++++++++++++ scheduler/jobs/spotify_playlist_watch.py | 343 ++++++++++++++++++++++- spotify/client.py | 132 +++++++++ tests/test_spotify_liked_songs_sync.py | 163 +++++++++++ tests/test_spotify_saved_albums_sync.py | 163 +++++++++++ 5 files changed, 962 insertions(+), 14 deletions(-) create mode 100644 tests/test_spotify_liked_songs_sync.py create mode 100644 tests/test_spotify_saved_albums_sync.py diff --git a/api/main.py b/api/main.py index fc0cd64..7c1fab2 100644 --- a/api/main.py +++ b/api/main.py @@ -114,6 +114,10 @@ def _require_python_311(): from spotify.client import SpotifyPlaylistClient from spotify.oauth_store import SpotifyOAuthStore, SpotifyOAuthToken from db.playlist_snapshots import PlaylistSnapshotStore +from scheduler.jobs.spotify_playlist_watch import ( + spotify_liked_songs_watch_job, + spotify_saved_albums_watch_job, +) APP_NAME = "Retreivr API" STATUS_SCHEMA_VERSION = 1 @@ -125,8 +129,12 @@ def _require_python_311(): _TRUST_PROXY = os.environ.get("YT_ARCHIVER_TRUST_PROXY", "").strip().lower() in {"1", "true", "yes", "on"} SCHEDULE_JOB_ID = "archive_schedule" WATCHER_JOB_ID = "playlist_watcher" +LIKED_SONGS_JOB_ID = "spotify_liked_songs_watch" +SAVED_ALBUMS_JOB_ID = "spotify_saved_albums_watch" DEFERRED_RUN_JOB_ID = "deferred_run" WATCHER_QUIET_WINDOW_SECONDS = 60 +DEFAULT_LIKED_SONGS_SYNC_INTERVAL_MINUTES = 15 +DEFAULT_SAVED_ALBUMS_SYNC_INTERVAL_MINUTES = 30 OAUTH_SCOPES = ["https://www.googleapis.com/auth/youtube.readonly"] OAUTH_SESSION_TTL = timedelta(minutes=15) _OAUTH_SESSIONS = {} @@ -628,6 +636,8 @@ async def startup(): app.state.schedule_config = schedule_config app.state.scheduler.start() _apply_schedule_config(schedule_config) + _apply_liked_songs_schedule(config) + _apply_saved_albums_schedule(config) if schedule_config.get("enabled") and schedule_config.get("run_on_startup"): asyncio.create_task(_handle_scheduled_run()) if schedule_config.get("enabled"): @@ -2047,6 +2057,20 @@ def _schedule_tick(): asyncio.run_coroutine_threadsafe(_handle_scheduled_run(), loop) +def _liked_songs_schedule_tick(): + loop = app.state.loop + if not loop or loop.is_closed(): + return + asyncio.run_coroutine_threadsafe(_handle_liked_songs_scheduled_run(), loop) + + +def _saved_albums_schedule_tick(): + loop = app.state.loop + if not loop or loop.is_closed(): + return + asyncio.run_coroutine_threadsafe(_handle_saved_albums_scheduled_run(), loop) + + async def _handle_scheduled_run(): if app.state.running: logging.info("Scheduled run skipped; run already active") @@ -2067,6 +2091,151 @@ async def _handle_scheduled_run(): _set_schedule_state(next_run=_get_next_run_iso()) +def _resolve_liked_songs_interval_minutes(config: dict | None) -> int: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_value = spotify_cfg.get("liked_songs_sync_interval_minutes") + if raw_value is None: + raw_value = cfg.get("liked_songs_sync_interval_minutes") + try: + interval = int(raw_value) + except (TypeError, ValueError): + interval = DEFAULT_LIKED_SONGS_SYNC_INTERVAL_MINUTES + return max(1, interval) + + +def _resolve_saved_albums_interval_minutes(config: dict | None) -> int: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_value = spotify_cfg.get("saved_albums_sync_interval_minutes") + if raw_value is None: + raw_value = cfg.get("saved_albums_sync_interval_minutes") + try: + interval = int(raw_value) + except (TypeError, ValueError): + interval = DEFAULT_SAVED_ALBUMS_SYNC_INTERVAL_MINUTES + return max(1, interval) + + +def _has_connected_spotify_oauth_token(db_path: str) -> bool: + try: + return SpotifyOAuthStore(Path(db_path)).load() is not None + except Exception: + return False + + +async def _handle_liked_songs_scheduled_run() -> None: + config = _read_config_for_scheduler() + if not config: + return + + client_id, client_secret = _spotify_client_credentials(config) + if not client_id or not client_secret: + return + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + token = store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + return + + try: + await spotify_liked_songs_watch_job( + config=config, + db=PlaylistSnapshotStore(app.state.paths.db_path), + queue=_IntentQueueAdapter(), + spotify_client=SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ), + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Scheduled Spotify Liked Songs sync failed") + + +async def _handle_saved_albums_scheduled_run() -> None: + config = _read_config_for_scheduler() + if not config: + return + + client_id, client_secret = _spotify_client_credentials(config) + if not client_id or not client_secret: + return + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + token = store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + return + + try: + await spotify_saved_albums_watch_job( + config=config, + db=PlaylistSnapshotStore(app.state.paths.db_path), + queue=_IntentQueueAdapter(), + spotify_client=SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ), + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Scheduled Spotify Saved Albums sync failed") + + +def _apply_liked_songs_schedule(config: dict | None) -> None: + scheduler = app.state.scheduler + if not scheduler: + return + + existing = scheduler.get_job(LIKED_SONGS_JOB_ID) + if existing: + scheduler.remove_job(LIKED_SONGS_JOB_ID) + + # Do not schedule liked songs sync until OAuth has been connected at least once. + if not _has_connected_spotify_oauth_token(app.state.paths.db_path): + return + + interval_min = _resolve_liked_songs_interval_minutes(config) + start_date = datetime.now(timezone.utc) + timedelta(minutes=interval_min) + scheduler.add_job( + _liked_songs_schedule_tick, + trigger=IntervalTrigger(minutes=interval_min, start_date=start_date), + id=LIKED_SONGS_JOB_ID, + replace_existing=True, + max_instances=1, + coalesce=True, + misfire_grace_time=30, + ) + + +def _apply_saved_albums_schedule(config: dict | None) -> None: + scheduler = app.state.scheduler + if not scheduler: + return + + existing = scheduler.get_job(SAVED_ALBUMS_JOB_ID) + if existing: + scheduler.remove_job(SAVED_ALBUMS_JOB_ID) + + # Do not schedule saved albums sync until OAuth has been connected at least once. + if not _has_connected_spotify_oauth_token(app.state.paths.db_path): + return + + interval_min = _resolve_saved_albums_interval_minutes(config) + start_date = datetime.now(timezone.utc) + timedelta(minutes=interval_min) + scheduler.add_job( + _saved_albums_schedule_tick, + trigger=IntervalTrigger(minutes=interval_min, start_date=start_date), + id=SAVED_ALBUMS_JOB_ID, + replace_existing=True, + max_instances=1, + coalesce=True, + misfire_grace_time=30, + ) + + def _apply_schedule_config(schedule): scheduler = app.state.scheduler if not scheduler: @@ -3103,6 +3272,8 @@ async def api_update_schedule(payload: ScheduleRequest): app.state.schedule_config = current _apply_schedule_config(current) + _apply_liked_songs_schedule(config) + _apply_saved_albums_schedule(config) return _schedule_response() @@ -3707,6 +3878,8 @@ async def spotify_oauth_callback(code: str | None = None, state: str | None = No scope=scope, ) ) + _apply_liked_songs_schedule(config) + _apply_saved_albums_schedule(config) app.state.spotify_oauth_state = None return {"status": "connected"} @@ -4099,6 +4272,8 @@ async def api_put_config(payload: dict = Body(...)): schedule = _merge_schedule_config(payload.get("schedule")) app.state.schedule_config = schedule _apply_schedule_config(schedule) + _apply_liked_songs_schedule(payload) + _apply_saved_albums_schedule(payload) policy = normalize_watch_policy(payload) if getattr(normalize_watch_policy, "valid", True): app.state.watch_policy = policy diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py index 2fa6267..898329e 100644 --- a/scheduler/jobs/spotify_playlist_watch.py +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -14,9 +14,11 @@ from playlist.rebuild import rebuild_playlist_from_tracks from spotify.client import SpotifyPlaylistClient, get_playlist_items from spotify.diff import diff_playlist +from spotify.oauth_store import SpotifyOAuthStore from spotify.resolve import resolve_spotify_track SPOTIFY_LIKED_SONGS_PLAYLIST_ID = "__spotify_liked_songs__" +SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID = "__spotify_saved_albums__" def _load_previous_snapshot(db: Any, playlist_id: str) -> tuple[str | None, list[dict[str, Any]]]: @@ -91,6 +93,39 @@ def _load_downloaded_track_paths(playlist_id: str) -> list[str]: pass +def _load_downloaded_track_paths_for_playlist_ids(playlist_ids: list[str]) -> list[str]: + cleaned = [str(pid).strip() for pid in playlist_ids if str(pid).strip()] + if not cleaned: + return [] + + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(_resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + cur = conn.cursor() + placeholders = ", ".join(["?"] * len(cleaned)) + cur.execute( + f""" + SELECT file_path + FROM downloaded_music_tracks + WHERE playlist_id IN ({placeholders}) + ORDER BY downloaded_at ASC, id ASC + """, + tuple(cleaned), + ) + rows = cur.fetchall() + return [str(row["file_path"]) for row in rows if row["file_path"]] + except sqlite3.Error: + logging.exception("Failed to load downloaded tracks for playlist IDs: %s", cleaned) + return [] + finally: + try: + if conn is not None: + conn.close() + except Exception: + pass + + def _resolve_playlist_dirs(config: dict[str, Any] | None) -> tuple[Path, Path]: cfg = config or {} music_root = Path(str(cfg.get("music_download_folder") or "Music")) @@ -104,6 +139,43 @@ def _resolve_playlist_dirs(config: dict[str, Any] | None) -> tuple[Path, Path]: return playlist_root, music_root +def _spotify_client_credentials_from_config(config: dict[str, Any] | None) -> tuple[str, str]: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + client_id = str(spotify_cfg.get("client_id") or cfg.get("SPOTIFY_CLIENT_ID") or "").strip() + client_secret = str(spotify_cfg.get("client_secret") or cfg.get("SPOTIFY_CLIENT_SECRET") or "").strip() + return client_id, client_secret + + +def _resolve_db_path_from_runtime(db: Any) -> str: + if hasattr(db, "db_path"): + value = str(getattr(db, "db_path") or "").strip() + if value: + return value + return _resolve_db_path() + + +def _best_effort_rebuild_playlist_m3u( + *, + playlist_id: str, + playlist_name: str, + config: dict[str, Any] | None, +) -> None: + """Rebuild playlist M3U from downloaded canonical paths without raising errors.""" + try: + track_paths = _load_downloaded_track_paths(playlist_id) + playlist_root, music_root = _resolve_playlist_dirs(config) + rebuild_playlist_from_tracks( + playlist_name=(playlist_name or playlist_id).strip() or playlist_id, + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=track_paths, + ) + logging.info("Playlist M3U updated: %s (%d tracks)", playlist_name, len(track_paths)) + except Exception: + logging.exception("Playlist M3U rebuild failed for playlist %s", playlist_id) + + def _enqueue_added_track(queue: Any, item: dict[str, Any]) -> None: if callable(queue): queue(item) @@ -139,6 +211,258 @@ async def enqueue_spotify_track(queue, spotify_track: dict, search_service, play queue.enqueue(payload) +async def spotify_liked_songs_watch_job(config, db, queue, spotify_client, search_service): + """Sync Spotify Liked Songs using OAuth-backed `/v1/me/tracks` snapshots.""" + client_id, client_secret = _spotify_client_credentials_from_config(config if isinstance(config, dict) else None) + if not client_id or not client_secret: + logging.info("Liked Songs sync skipped: Spotify credentials not configured") + return {"status": "skipped", "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, "enqueued": 0} + + oauth_store = SpotifyOAuthStore(Path(_resolve_db_path_from_runtime(db))) + token = oauth_store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + logging.info("Liked Songs sync skipped: no valid Spotify OAuth token") + return {"status": "skipped", "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, "enqueued": 0} + + liked_client: Any = spotify_client + if isinstance(liked_client, SpotifyPlaylistClient): + liked_client._provided_access_token = token.access_token + elif not hasattr(liked_client, "get_liked_songs"): + liked_client = SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ) + + try: + current_snapshot_id, current_items = await liked_client.get_liked_songs() + except Exception as exc: + logging.exception("Liked Songs fetch failed") + return { + "status": "error", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "error": f"spotify_fetch_failed: {exc}", + } + + try: + previous_snapshot_id, previous_items = _load_previous_snapshot(db, SPOTIFY_LIKED_SONGS_PLAYLIST_ID) + except Exception as exc: + logging.exception("Liked Songs snapshot load failed") + return { + "status": "error", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "error": f"snapshot_read_failed: {exc}", + } + + if previous_snapshot_id == current_snapshot_id: + return { + "status": "unchanged", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": 0, + } + + diff = diff_playlist(previous_items, current_items) + added_items = list(diff["added"]) + enqueued = 0 + enqueue_errors: list[str] = [] + for track in added_items: + try: + await enqueue_spotify_track( + queue, + track, + search_service, + playlist_id=SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + ) + enqueued += 1 + except Exception as exc: + track_id = track.get("spotify_track_id") + enqueue_errors.append(f"{track_id}: {exc}") + logging.exception("Failed to enqueue Liked Songs track %s", track_id) + + try: + db.store_snapshot( + SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + str(current_snapshot_id), + current_items, + ) + except Exception as exc: + logging.exception("Liked Songs snapshot store failed") + return { + "status": "error", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "error": f"snapshot_store_failed: {exc}", + "enqueued": enqueued, + "added_count": len(added_items), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } + + _best_effort_rebuild_playlist_m3u( + playlist_id=SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + playlist_name=get_liked_songs_playlist_name(), + config=config if isinstance(config, dict) else None, + ) + + return { + "status": "updated", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": enqueued, + "added_count": len(added_items), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } + + +async def spotify_saved_albums_watch_job(config, db, queue, spotify_client, search_service): + """Sync Spotify Saved Albums via OAuth and enqueue newly added albums.""" + client_id, client_secret = _spotify_client_credentials_from_config(config if isinstance(config, dict) else None) + if not client_id or not client_secret: + logging.info("Saved Albums sync skipped: Spotify credentials not configured") + return {"status": "skipped", "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, "enqueued": 0} + + oauth_store = SpotifyOAuthStore(Path(_resolve_db_path_from_runtime(db))) + token = oauth_store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + logging.info("Saved Albums sync skipped: no valid Spotify OAuth token") + return {"status": "skipped", "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, "enqueued": 0} + + saved_albums_client: Any = spotify_client + if isinstance(saved_albums_client, SpotifyPlaylistClient): + saved_albums_client._provided_access_token = token.access_token + elif not hasattr(saved_albums_client, "get_saved_albums"): + saved_albums_client = SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ) + + try: + current_snapshot_id, current_albums = await saved_albums_client.get_saved_albums() + except Exception as exc: + logging.exception("Saved Albums fetch failed") + return { + "status": "error", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "error": f"spotify_fetch_failed: {exc}", + } + + current_snapshot_items: list[dict[str, Any]] = [] + album_map: dict[str, dict[str, Any]] = {} + for idx, album in enumerate(current_albums or []): + album_id = str((album or {}).get("album_id") or "").strip() + if not album_id: + continue + current_snapshot_items.append( + { + "spotify_track_id": album_id, + "position": idx, + "added_at": (album or {}).get("added_at"), + } + ) + album_map[album_id] = dict(album) + + try: + previous_snapshot_id, previous_items = _load_previous_snapshot(db, SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID) + except Exception as exc: + logging.exception("Saved Albums snapshot load failed") + return { + "status": "error", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "error": f"snapshot_read_failed: {exc}", + } + + if previous_snapshot_id == current_snapshot_id: + return { + "status": "unchanged", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": 0, + } + + diff = diff_playlist(previous_items, current_snapshot_items) + added_albums = list(diff["added"]) + enqueued = 0 + enqueue_errors: list[str] = [] + + dispatcher_config = dict(config) if isinstance(config, dict) else {} + dispatcher_config["search_service"] = search_service + + # Local import avoids a module import cycle with api.intent_dispatcher. + from api.intent_dispatcher import run_spotify_album_sync + + for album_item in added_albums: + album_id = str((album_item or {}).get("spotify_track_id") or "").strip() + if not album_id: + continue + try: + await run_spotify_album_sync( + album_id=album_id, + config=dispatcher_config, + db=db, + queue=queue, + spotify_client=saved_albums_client, + ) + enqueued += 1 + except Exception as exc: + enqueue_errors.append(f"{album_id}: {exc}") + logging.exception("Saved Albums enqueue failed for album %s", album_id) + + try: + db.store_snapshot( + SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + str(current_snapshot_id), + current_snapshot_items, + ) + except Exception as exc: + logging.exception("Saved Albums snapshot store failed") + return { + "status": "error", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "error": f"snapshot_store_failed: {exc}", + "enqueued": enqueued, + "added_count": len(added_albums), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } + + # Best effort: rebuild a virtual "Saved Albums" M3U from album-scoped downloads. + try: + album_playlist_ids = [ + f"spotify_album_{str((item or {}).get('spotify_track_id') or '').strip()}" + for item in current_snapshot_items + if str((item or {}).get("spotify_track_id") or "").strip() + ] + track_paths = _load_downloaded_track_paths_for_playlist_ids(album_playlist_ids) + playlist_root, music_root = _resolve_playlist_dirs(config if isinstance(config, dict) else None) + rebuild_playlist_from_tracks( + playlist_name="Spotify - Saved Albums", + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=track_paths, + ) + logging.info("Playlist M3U updated: Spotify - Saved Albums (%d tracks)", len(track_paths)) + except Exception: + logging.exception("Saved Albums M3U rebuild failed") + + return { + "status": "updated", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": enqueued, + "added_count": len(added_albums), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } + + def playlist_watch_job( spotify_client, db, @@ -203,20 +527,11 @@ def playlist_watch_job( "enqueue_errors": enqueue_errors, } - # Best effort: refresh the playlist M3U from canonical downloaded file paths. - try: - track_paths = _load_downloaded_track_paths(pid) - resolved_playlist_name = (playlist_name or pid).strip() or pid - playlist_root, music_root = _resolve_playlist_dirs(config) - rebuild_playlist_from_tracks( - playlist_name=resolved_playlist_name, - playlist_root=playlist_root, - music_root=music_root, - track_file_paths=track_paths, - ) - logging.info("Playlist M3U updated: %s (%d tracks)", resolved_playlist_name, len(track_paths)) - except Exception: - logging.exception("Playlist M3U rebuild failed for playlist %s", pid) + _best_effort_rebuild_playlist_m3u( + playlist_id=pid, + playlist_name=(playlist_name or pid).strip() or pid, + config=config, + ) return { "status": "updated", diff --git a/spotify/client.py b/spotify/client.py index 6599538..126df08 100644 --- a/spotify/client.py +++ b/spotify/client.py @@ -222,6 +222,138 @@ async def get_liked_songs(self) -> tuple[str, list[dict[str, Any]]]: snapshot_id = hashlib.sha256(snapshot_source).hexdigest() return snapshot_id, items + async def get_saved_albums(self) -> tuple[str, list[dict[str, Any]]]: + """Fetch authenticated user's saved albums from Spotify. + + Returns: + A tuple ``(snapshot_id, items)`` where: + - ``snapshot_id`` is a deterministic SHA-256 hash of ordered album IDs. + - ``items`` is an ordered list of album dicts containing album metadata + and normalized ordered track lists suitable for album sync flows. + """ + if not self._provided_access_token: + raise RuntimeError("Spotify OAuth access_token is required for saved albums") + + offset = 0 + limit = 50 + saved_albums: list[dict[str, Any]] = [] + ordered_album_ids: list[str] = [] + + while True: + payload = await _request_json_with_retry( + self, + "https://api.spotify.com/v1/me/albums", + params={ + "limit": limit, + "offset": offset, + "fields": "items(added_at,album(id,name,artists(name),release_date,total_tracks)),next,total", + }, + ) + + for entry in payload.get("items") or []: + album = entry.get("album") + if not isinstance(album, dict): + continue + album_id = str(album.get("id") or "").strip() + if not album_id: + continue + ordered_album_ids.append(album_id) + saved_albums.append( + { + "album_id": album_id, + "added_at": entry.get("added_at"), + "name": album.get("name"), + "artists": [ + str(artist.get("name")).strip() + for artist in (album.get("artists") or []) + if isinstance(artist, dict) and artist.get("name") + ], + "release_date": album.get("release_date"), + "total_tracks": album.get("total_tracks"), + } + ) + + next_url = payload.get("next") + if not next_url: + break + offset += limit + + album_items: list[dict[str, Any]] = [] + for position, album_entry in enumerate(saved_albums): + album_id = str(album_entry.get("album_id") or "").strip() + encoded_album_id = urllib.parse.quote(album_id, safe="") + album_payload = await _request_json_with_retry( + self, + f"https://api.spotify.com/v1/albums/{encoded_album_id}", + params={ + "fields": ( + "id,name,artists(name),release_date,total_tracks," + "tracks(items(id,name,duration_ms,track_number,disc_number,artists(name),external_ids(isrc)),next)" + ) + }, + ) + + album_name = album_payload.get("name") or album_entry.get("name") + album_artists = [ + str(artist.get("name")).strip() + for artist in (album_payload.get("artists") or []) + if isinstance(artist, dict) and artist.get("name") + ] + tracks_page = album_payload.get("tracks") or {} + tracks: list[dict[str, Any]] = [] + track_position = 0 + while True: + for raw_track in tracks_page.get("items") or []: + if not isinstance(raw_track, dict): + continue + track_id = raw_track.get("id") + if not track_id: + continue + artists = raw_track.get("artists") or [] + first_artist = ( + artists[0].get("name") + if artists and isinstance(artists[0], dict) + else (album_artists[0] if album_artists else None) + ) + external_ids = raw_track.get("external_ids") or {} + tracks.append( + { + "spotify_track_id": track_id, + "position": track_position, + "artist": first_artist, + "title": raw_track.get("name"), + "album": album_name, + "duration_ms": raw_track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + "track_num": raw_track.get("track_number"), + "disc_num": raw_track.get("disc_number"), + } + ) + track_position += 1 + + next_tracks_url = tracks_page.get("next") + if not next_tracks_url: + break + tracks_page = await _request_json_with_retry(self, str(next_tracks_url)) + + album_items.append( + { + "album_id": album_id, + "position": position, + "added_at": album_entry.get("added_at"), + "name": album_name, + "artist": album_artists[0] if album_artists else None, + "artists": album_artists, + "release_date": album_payload.get("release_date") or album_entry.get("release_date"), + "total_tracks": album_payload.get("total_tracks") or album_entry.get("total_tracks"), + "tracks": tracks, + } + ) + + snapshot_source = "\n".join(ordered_album_ids).encode("utf-8") + snapshot_id = hashlib.sha256(snapshot_source).hexdigest() + return snapshot_id, album_items + async def _request_json_with_retry( spotify_client: SpotifyPlaylistClient, diff --git a/tests/test_spotify_liked_songs_sync.py b/tests/test_spotify_liked_songs_sync.py new file mode 100644 index 0000000..2e47c5a --- /dev/null +++ b/tests/test_spotify_liked_songs_sync.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import ( + SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + get_liked_songs_playlist_name, + spotify_liked_songs_watch_job, +) + + +class _FakeSpotifyClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = items + + async def get_liked_songs(self) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.items) + + +class _FakeSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + if self.latest_snapshot is None: + return None + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + + +def _item(track_id: str, position: int) -> dict[str, Any]: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": f"2026-02-16T00:0{position}:00Z", + "artist": f"Artist {track_id}", + "title": f"Title {track_id}", + "album": "Album", + "duration_ms": 123000, + "isrc": f"ISRC{track_id}", + } + + +def test_liked_songs_sync_enqueues_added_tracks_and_rebuilds_m3u(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + db = _FakeSnapshotStore(latest_snapshot=None) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient("snap-liked-1", [_item("a", 0), _item("b", 1)]) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return SimpleNamespace(access_token="oauth-token") + + enqueue_calls: list[tuple[str, dict[str, Any]]] = [] + + async def _spy_enqueue_spotify_track(queue, spotify_track: dict, search_service, playlist_id: str): + enqueue_calls.append((playlist_id, dict(spotify_track))) + + rebuild_calls: list[dict[str, Any]] = [] + + def _spy_rebuild_playlist_from_tracks(playlist_name, playlist_root, music_root, track_file_paths): + rebuild_calls.append( + { + "playlist_name": playlist_name, + "playlist_root": playlist_root, + "music_root": music_root, + "track_file_paths": list(track_file_paths), + } + ) + return playlist_root / f"{playlist_name}.m3u" + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.enqueue_spotify_track", + _spy_enqueue_spotify_track, + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._load_downloaded_track_paths", + lambda _playlist_id: [], + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.rebuild_playlist_from_tracks", + _spy_rebuild_playlist_from_tracks, + ) + + result = asyncio.run( + spotify_liked_songs_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "updated" + assert result["playlist_id"] == SPOTIFY_LIKED_SONGS_PLAYLIST_ID + assert result["enqueued"] == 2 + + assert [call[0] for call in enqueue_calls] == [ + SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + ] + assert [call[1]["spotify_track_id"] for call in enqueue_calls] == ["a", "b"] + + assert len(db.store_calls) == 1 + assert db.store_calls[0][0] == SPOTIFY_LIKED_SONGS_PLAYLIST_ID + assert db.store_calls[0][1] == "snap-liked-1" + + assert len(rebuild_calls) == 1 + assert rebuild_calls[0]["playlist_name"] == get_liked_songs_playlist_name() + + +def test_liked_songs_sync_exits_cleanly_when_oauth_token_missing(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + db = _FakeSnapshotStore(latest_snapshot=None) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient("snap-liked-1", [_item("a", 0)]) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return None + + enqueue_calls: list[dict[str, Any]] = [] + + async def _spy_enqueue_spotify_track(queue, spotify_track: dict, search_service, playlist_id: str): + enqueue_calls.append(dict(spotify_track)) + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.enqueue_spotify_track", + _spy_enqueue_spotify_track, + ) + + result = asyncio.run( + spotify_liked_songs_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "skipped" + assert result["playlist_id"] == SPOTIFY_LIKED_SONGS_PLAYLIST_ID + assert result["enqueued"] == 0 + assert enqueue_calls == [] + assert db.store_calls == [] diff --git a/tests/test_spotify_saved_albums_sync.py b/tests/test_spotify_saved_albums_sync.py new file mode 100644 index 0000000..740be4e --- /dev/null +++ b/tests/test_spotify_saved_albums_sync.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import ( + SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + spotify_saved_albums_watch_job, +) + + +class _FakeSpotifyClient: + def __init__(self, snapshot_id: str, albums: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.albums = albums + + async def get_saved_albums(self) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.albums) + + +class _FakeSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + + +def _album(album_id: str, position: int) -> dict[str, Any]: + return { + "album_id": album_id, + "position": position, + "added_at": f"2026-02-16T00:0{position}:00Z", + "name": f"Album {album_id}", + "artist": f"Artist {album_id}", + "artists": [f"Artist {album_id}"], + "release_date": "2024-01-01", + "total_tracks": 10, + "tracks": [], + } + + +def test_saved_albums_sync_triggers_album_sync_only_for_new_albums(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + previous_items = [ + {"spotify_track_id": "album-a", "position": 0, "added_at": "2026-02-16T00:00:00Z"}, + ] + db = _FakeSnapshotStore({"snapshot_id": "snap-prev", "items": previous_items}) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient( + "snap-next", + [_album("album-a", 0), _album("album-b", 1), _album("album-c", 2)], + ) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return SimpleNamespace(access_token="oauth-token") + + album_sync_calls: list[str] = [] + + async def _spy_run_spotify_album_sync(*, album_id, config, db, queue, spotify_client): + album_sync_calls.append(str(album_id)) + return { + "status": "accepted", + "intent_type": "spotify_album", + "identifier": str(album_id), + "message": "ok", + "enqueued_count": 1, + } + + rebuild_calls: list[dict[str, Any]] = [] + + def _spy_rebuild_playlist_from_tracks(playlist_name, playlist_root, music_root, track_file_paths): + rebuild_calls.append( + { + "playlist_name": playlist_name, + "playlist_root": playlist_root, + "music_root": music_root, + "track_file_paths": list(track_file_paths), + } + ) + return playlist_root / f"{playlist_name}.m3u" + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr("api.intent_dispatcher.run_spotify_album_sync", _spy_run_spotify_album_sync) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._load_downloaded_track_paths_for_playlist_ids", + lambda _playlist_ids: [], + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.rebuild_playlist_from_tracks", + _spy_rebuild_playlist_from_tracks, + ) + + result = asyncio.run( + spotify_saved_albums_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "updated" + assert result["playlist_id"] == SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID + assert album_sync_calls == ["album-b", "album-c"] + + assert len(db.store_calls) == 1 + assert db.store_calls[0][0] == SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID + assert db.store_calls[0][1] == "snap-next" + + assert len(rebuild_calls) == 1 + assert rebuild_calls[0]["playlist_name"] == "Spotify - Saved Albums" + + +def test_saved_albums_sync_skips_when_oauth_token_missing(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + db = _FakeSnapshotStore(latest_snapshot=None) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient("snap-next", [_album("album-a", 0)]) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return None + + album_sync_calls: list[str] = [] + + async def _spy_run_spotify_album_sync(*, album_id, config, db, queue, spotify_client): + album_sync_calls.append(str(album_id)) + return {"status": "accepted", "enqueued_count": 1} + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr("api.intent_dispatcher.run_spotify_album_sync", _spy_run_spotify_album_sync) + + result = asyncio.run( + spotify_saved_albums_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "skipped" + assert result["playlist_id"] == SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID + assert result["enqueued"] == 0 + assert album_sync_calls == [] + assert db.store_calls == [] From 72af74d253a60f83c12fad88bc41f68d8ddf209a Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 15:46:27 -0600 Subject: [PATCH 22/45] feat(spotify-user-playlists): add OAuth playlist discovery sync + scheduler + tests Added SpotifyPlaylistClient.get_user_playlists() in client.py: OAuth-required fetch from /v1/me/playlists with pagination. Normalized output (id, name, track_count). Deterministic snapshot hash from ordered playlist IDs. Added spotify_user_playlists_watch_job(...) in spotify_playlist_watch.py: Validates OAuth token. Loads current user playlists and diffs against snapshot __spotify_user_playlists__. Triggers existing playlist_watch_job only for newly added playlist IDs. Stores updated snapshot. Non-destructive on removals (no local file deletion). Wired periodic scheduler support in main.py: New job id spotify_user_playlists_watch. Default interval 30 minutes (configurable). Silent skip on missing/invalid OAuth token. Auto-activation after successful OAuth callback. Reapplies on startup/config/schedule updates. Added network-free tests in test_spotify_user_playlists_sync.py: Valid token path: only new playlists trigger sync. Missing token path: clean skip with no sync calls. --- api/main.py | 83 +++++++++++++ scheduler/jobs/spotify_playlist_watch.py | 122 +++++++++++++++++++ spotify/client.py | 53 +++++++++ tests/test_spotify_user_playlists_sync.py | 138 ++++++++++++++++++++++ 4 files changed, 396 insertions(+) create mode 100644 tests/test_spotify_user_playlists_sync.py diff --git a/api/main.py b/api/main.py index 7c1fab2..35e9005 100644 --- a/api/main.py +++ b/api/main.py @@ -117,6 +117,7 @@ def _require_python_311(): from scheduler.jobs.spotify_playlist_watch import ( spotify_liked_songs_watch_job, spotify_saved_albums_watch_job, + spotify_user_playlists_watch_job, ) APP_NAME = "Retreivr API" @@ -131,10 +132,12 @@ def _require_python_311(): WATCHER_JOB_ID = "playlist_watcher" LIKED_SONGS_JOB_ID = "spotify_liked_songs_watch" SAVED_ALBUMS_JOB_ID = "spotify_saved_albums_watch" +USER_PLAYLISTS_JOB_ID = "spotify_user_playlists_watch" DEFERRED_RUN_JOB_ID = "deferred_run" WATCHER_QUIET_WINDOW_SECONDS = 60 DEFAULT_LIKED_SONGS_SYNC_INTERVAL_MINUTES = 15 DEFAULT_SAVED_ALBUMS_SYNC_INTERVAL_MINUTES = 30 +DEFAULT_USER_PLAYLISTS_SYNC_INTERVAL_MINUTES = 30 OAUTH_SCOPES = ["https://www.googleapis.com/auth/youtube.readonly"] OAUTH_SESSION_TTL = timedelta(minutes=15) _OAUTH_SESSIONS = {} @@ -638,6 +641,7 @@ async def startup(): _apply_schedule_config(schedule_config) _apply_liked_songs_schedule(config) _apply_saved_albums_schedule(config) + _apply_user_playlists_schedule(config) if schedule_config.get("enabled") and schedule_config.get("run_on_startup"): asyncio.create_task(_handle_scheduled_run()) if schedule_config.get("enabled"): @@ -2071,6 +2075,13 @@ def _saved_albums_schedule_tick(): asyncio.run_coroutine_threadsafe(_handle_saved_albums_scheduled_run(), loop) +def _user_playlists_schedule_tick(): + loop = app.state.loop + if not loop or loop.is_closed(): + return + asyncio.run_coroutine_threadsafe(_handle_user_playlists_scheduled_run(), loop) + + async def _handle_scheduled_run(): if app.state.running: logging.info("Scheduled run skipped; run already active") @@ -2117,6 +2128,19 @@ def _resolve_saved_albums_interval_minutes(config: dict | None) -> int: return max(1, interval) +def _resolve_user_playlists_interval_minutes(config: dict | None) -> int: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_value = spotify_cfg.get("user_playlists_sync_interval_minutes") + if raw_value is None: + raw_value = cfg.get("user_playlists_sync_interval_minutes") + try: + interval = int(raw_value) + except (TypeError, ValueError): + interval = DEFAULT_USER_PLAYLISTS_SYNC_INTERVAL_MINUTES + return max(1, interval) + + def _has_connected_spotify_oauth_token(db_path: str) -> bool: try: return SpotifyOAuthStore(Path(db_path)).load() is not None @@ -2184,6 +2208,36 @@ async def _handle_saved_albums_scheduled_run() -> None: logging.exception("Scheduled Spotify Saved Albums sync failed") +async def _handle_user_playlists_scheduled_run() -> None: + config = _read_config_for_scheduler() + if not config: + return + + client_id, client_secret = _spotify_client_credentials(config) + if not client_id or not client_secret: + return + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + token = store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + return + + try: + await spotify_user_playlists_watch_job( + config=config, + db=PlaylistSnapshotStore(app.state.paths.db_path), + queue=_IntentQueueAdapter(), + spotify_client=SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ), + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Scheduled Spotify User Playlists sync failed") + + def _apply_liked_songs_schedule(config: dict | None) -> None: scheduler = app.state.scheduler if not scheduler: @@ -2236,6 +2290,32 @@ def _apply_saved_albums_schedule(config: dict | None) -> None: ) +def _apply_user_playlists_schedule(config: dict | None) -> None: + scheduler = app.state.scheduler + if not scheduler: + return + + existing = scheduler.get_job(USER_PLAYLISTS_JOB_ID) + if existing: + scheduler.remove_job(USER_PLAYLISTS_JOB_ID) + + # Do not schedule user playlists sync until OAuth has been connected at least once. + if not _has_connected_spotify_oauth_token(app.state.paths.db_path): + return + + interval_min = _resolve_user_playlists_interval_minutes(config) + start_date = datetime.now(timezone.utc) + timedelta(minutes=interval_min) + scheduler.add_job( + _user_playlists_schedule_tick, + trigger=IntervalTrigger(minutes=interval_min, start_date=start_date), + id=USER_PLAYLISTS_JOB_ID, + replace_existing=True, + max_instances=1, + coalesce=True, + misfire_grace_time=30, + ) + + def _apply_schedule_config(schedule): scheduler = app.state.scheduler if not scheduler: @@ -3274,6 +3354,7 @@ async def api_update_schedule(payload: ScheduleRequest): _apply_schedule_config(current) _apply_liked_songs_schedule(config) _apply_saved_albums_schedule(config) + _apply_user_playlists_schedule(config) return _schedule_response() @@ -3880,6 +3961,7 @@ async def spotify_oauth_callback(code: str | None = None, state: str | None = No ) _apply_liked_songs_schedule(config) _apply_saved_albums_schedule(config) + _apply_user_playlists_schedule(config) app.state.spotify_oauth_state = None return {"status": "connected"} @@ -4274,6 +4356,7 @@ async def api_put_config(payload: dict = Body(...)): _apply_schedule_config(schedule) _apply_liked_songs_schedule(payload) _apply_saved_albums_schedule(payload) + _apply_user_playlists_schedule(payload) policy = normalize_watch_policy(payload) if getattr(normalize_watch_policy, "valid", True): app.state.watch_policy = policy diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py index 898329e..7d0f6b0 100644 --- a/scheduler/jobs/spotify_playlist_watch.py +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -19,6 +19,7 @@ SPOTIFY_LIKED_SONGS_PLAYLIST_ID = "__spotify_liked_songs__" SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID = "__spotify_saved_albums__" +SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID = "__spotify_user_playlists__" def _load_previous_snapshot(db: Any, playlist_id: str) -> tuple[str | None, list[dict[str, Any]]]: @@ -463,6 +464,127 @@ async def spotify_saved_albums_watch_job(config, db, queue, spotify_client, sear } +async def spotify_user_playlists_watch_job(config, db, queue, spotify_client, search_service): + """Sync authenticated user's Spotify playlists and trigger sync for new playlists.""" + client_id, client_secret = _spotify_client_credentials_from_config(config if isinstance(config, dict) else None) + if not client_id or not client_secret: + logging.info("User Playlists sync skipped: Spotify credentials not configured") + return {"status": "skipped", "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, "enqueued": 0} + + oauth_store = SpotifyOAuthStore(Path(_resolve_db_path_from_runtime(db))) + token = oauth_store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + logging.info("User Playlists sync skipped: no valid Spotify OAuth token") + return {"status": "skipped", "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, "enqueued": 0} + + playlists_client: Any = spotify_client + if isinstance(playlists_client, SpotifyPlaylistClient): + playlists_client._provided_access_token = token.access_token + elif not hasattr(playlists_client, "get_user_playlists"): + playlists_client = SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ) + + try: + current_snapshot_id, current_playlists = await playlists_client.get_user_playlists() + except Exception as exc: + logging.exception("User Playlists fetch failed") + return { + "status": "error", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "error": f"spotify_fetch_failed: {exc}", + } + + current_snapshot_items: list[dict[str, Any]] = [] + playlist_name_by_id: dict[str, str] = {} + for idx, playlist in enumerate(current_playlists or []): + playlist_id = str((playlist or {}).get("id") or "").strip() + if not playlist_id: + continue + playlist_name_by_id[playlist_id] = str((playlist or {}).get("name") or "").strip() or playlist_id + current_snapshot_items.append( + { + "spotify_track_id": playlist_id, + "position": idx, + "added_at": None, + } + ) + + try: + previous_snapshot_id, previous_items = _load_previous_snapshot(db, SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID) + except Exception as exc: + logging.exception("User Playlists snapshot load failed") + return { + "status": "error", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "error": f"snapshot_read_failed: {exc}", + } + + if previous_snapshot_id == current_snapshot_id: + return { + "status": "unchanged", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": 0, + } + + diff = diff_playlist(previous_items, current_snapshot_items) + added_playlists = list(diff["added"]) + synced = 0 + sync_errors: list[str] = [] + for playlist_item in added_playlists: + playlist_id = str((playlist_item or {}).get("spotify_track_id") or "").strip() + if not playlist_id: + continue + playlist_name = playlist_name_by_id.get(playlist_id, playlist_id) + try: + playlist_watch_job( + spotify_client=playlists_client, + db=db, + queue=queue, + playlist_id=playlist_id, + playlist_name=playlist_name, + config=config if isinstance(config, dict) else None, + ) + synced += 1 + except Exception as exc: + sync_errors.append(f"{playlist_id}: {exc}") + logging.exception("User Playlists sync failed for playlist %s", playlist_id) + + try: + db.store_snapshot( + SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + str(current_snapshot_id), + current_snapshot_items, + ) + except Exception as exc: + logging.exception("User Playlists snapshot store failed") + return { + "status": "error", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "error": f"snapshot_store_failed: {exc}", + "enqueued": synced, + "added_count": len(added_playlists), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": sync_errors, + } + + return { + "status": "updated", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": synced, + "added_count": len(added_playlists), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": sync_errors, + } + + def playlist_watch_job( spotify_client, db, diff --git a/spotify/client.py b/spotify/client.py index 126df08..9f813b1 100644 --- a/spotify/client.py +++ b/spotify/client.py @@ -354,6 +354,59 @@ async def get_saved_albums(self) -> tuple[str, list[dict[str, Any]]]: snapshot_id = hashlib.sha256(snapshot_source).hexdigest() return snapshot_id, album_items + async def get_user_playlists(self) -> tuple[str, list[dict[str, Any]]]: + """Fetch authenticated user's playlists from Spotify. + + Returns: + A tuple ``(snapshot_id, items)`` where: + - ``snapshot_id`` is a deterministic SHA-256 hash of ordered playlist IDs. + - ``items`` is an ordered list of normalized playlist dicts with + keys: ``id``, ``name``, and ``track_count``. + """ + if not self._provided_access_token: + raise RuntimeError("Spotify OAuth access_token is required for user playlists") + + offset = 0 + limit = 50 + ordered_playlist_ids: list[str] = [] + items: list[dict[str, Any]] = [] + + while True: + payload = await _request_json_with_retry( + self, + "https://api.spotify.com/v1/me/playlists", + params={ + "limit": limit, + "offset": offset, + "fields": "items(id,name,tracks(total)),next,total", + }, + ) + raw_items = payload.get("items") or [] + for raw in raw_items: + if not isinstance(raw, dict): + continue + playlist_id = str(raw.get("id") or "").strip() + if not playlist_id: + continue + ordered_playlist_ids.append(playlist_id) + tracks = raw.get("tracks") or {} + items.append( + { + "id": playlist_id, + "name": raw.get("name"), + "track_count": int(tracks.get("total") or 0), + } + ) + + next_url = payload.get("next") + if not next_url: + break + offset += limit + + snapshot_source = "\n".join(ordered_playlist_ids).encode("utf-8") + snapshot_id = hashlib.sha256(snapshot_source).hexdigest() + return snapshot_id, items + async def _request_json_with_retry( spotify_client: SpotifyPlaylistClient, diff --git a/tests/test_spotify_user_playlists_sync.py b/tests/test_spotify_user_playlists_sync.py new file mode 100644 index 0000000..7d4ef40 --- /dev/null +++ b/tests/test_spotify_user_playlists_sync.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import ( + SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + spotify_user_playlists_watch_job, +) + + +class _FakeSpotifyClient: + def __init__(self, snapshot_id: str, playlists: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.playlists = playlists + + async def get_user_playlists(self) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.playlists) + + +class _FakeSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + + +def _playlist(playlist_id: str, name: str, track_count: int) -> dict[str, Any]: + return { + "id": playlist_id, + "name": name, + "track_count": track_count, + } + + +def test_user_playlists_sync_triggers_existing_watch_job_for_new_playlists(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + previous_items = [ + {"spotify_track_id": "pl-a", "position": 0, "added_at": None}, + ] + db = _FakeSnapshotStore({"snapshot_id": "snap-prev", "items": previous_items}) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient( + "snap-next", + [_playlist("pl-a", "Existing", 10), _playlist("pl-b", "New One", 20), _playlist("pl-c", "New Two", 30)], + ) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return SimpleNamespace(access_token="oauth-token") + + watch_calls: list[dict[str, Any]] = [] + + def _spy_playlist_watch_job(*, spotify_client, db, queue, playlist_id, playlist_name=None, config=None): + watch_calls.append( + { + "playlist_id": playlist_id, + "playlist_name": playlist_name, + "config": config, + } + ) + return { + "status": "updated", + "playlist_id": playlist_id, + "enqueued": 0, + } + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.playlist_watch_job", _spy_playlist_watch_job) + + result = asyncio.run( + spotify_user_playlists_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "updated" + assert result["playlist_id"] == SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID + assert [call["playlist_id"] for call in watch_calls] == ["pl-b", "pl-c"] + assert [call["playlist_name"] for call in watch_calls] == ["New One", "New Two"] + + assert len(db.store_calls) == 1 + assert db.store_calls[0][0] == SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID + assert db.store_calls[0][1] == "snap-next" + + +def test_user_playlists_sync_skips_cleanly_when_oauth_token_missing(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + db = _FakeSnapshotStore(latest_snapshot=None) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient("snap-next", [_playlist("pl-a", "Any", 1)]) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return None + + watch_calls: list[str] = [] + + def _spy_playlist_watch_job(*, spotify_client, db, queue, playlist_id, playlist_name=None, config=None): + watch_calls.append(playlist_id) + return {"status": "updated", "playlist_id": playlist_id, "enqueued": 0} + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.playlist_watch_job", _spy_playlist_watch_job) + + result = asyncio.run( + spotify_user_playlists_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "skipped" + assert result["playlist_id"] == SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID + assert result["enqueued"] == 0 + assert watch_calls == [] + assert db.store_calls == [] From d37948cb56ed6c707e91d2240bd10dbc8d082cee Mon Sep 17 00:00:00 2001 From: z3ro-2 Date: Mon, 16 Feb 2026 16:04:03 -0600 Subject: [PATCH 23/45] feat(ui): add Spotify integration controls and runtime sync status in Config/Status pages Added new Spotify Integration card to Config page with: OAuth connection status display Connect/Disconnect actions Sync toggles for Liked Songs, Saved Albums, and My Playlists Interval inputs for each Spotify sync stream Wired app.js config logic: Added refreshSpotifyConfig() to load config + OAuth status and update Spotify controls Hooked refresh into config page navigation flow Added connect/disconnect button handlers with user notices Extended config save payload to persist Spotify sync flags and interval settings Added new Spotify Sync Status card to Status page with fields for: OAuth state Last liked songs sync Last saved albums sync Last playlists sync Extended refreshStatus() to best-effort fetch /api/spotify/status and update Spotify status fields without breaking existing status polling when endpoint is unavailable. Updated homepage Music Mode label text to: Music Mode (Spotify metadata, album structure, validation) Updated intent confirmation button text to be contextual: Album/Playlist/Track-specific labels with fallback to Confirm Download. --- api/main.py | 39 ++++++++++++ webUI/app.js | 151 ++++++++++++++++++++++++++++++++++++++++++++++- webUI/index.html | 63 +++++++++++++++++++- 3 files changed, 250 insertions(+), 3 deletions(-) diff --git a/api/main.py b/api/main.py index 35e9005..57402b6 100644 --- a/api/main.py +++ b/api/main.py @@ -3959,6 +3959,37 @@ async def spotify_oauth_callback(code: str | None = None, state: str | None = No scope=scope, ) ) + try: + sync_db = PlaylistSnapshotStore(app.state.paths.db_path) + sync_queue = _IntentQueueAdapter() + sync_client = SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=access_token, + ) + await spotify_liked_songs_watch_job( + config=config, + db=sync_db, + queue=sync_queue, + spotify_client=sync_client, + search_service=app.state.search_service, + ) + await spotify_saved_albums_watch_job( + config=config, + db=sync_db, + queue=sync_queue, + spotify_client=sync_client, + search_service=app.state.search_service, + ) + await spotify_user_playlists_watch_job( + config=config, + db=sync_db, + queue=sync_queue, + spotify_client=sync_client, + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Post-OAuth immediate Spotify sync failed") _apply_liked_songs_schedule(config) _apply_saved_albums_schedule(config) _apply_user_playlists_schedule(config) @@ -3966,6 +3997,14 @@ async def spotify_oauth_callback(code: str | None = None, state: str | None = No return {"status": "connected"} +@app.post("/api/spotify/oauth/disconnect") +async def spotify_oauth_disconnect(): + """Clear stored Spotify OAuth token state.""" + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + store.clear() + return {"status": "disconnected"} + + @app.get("/api/search/items/{item_id}/candidates") async def get_search_candidates(item_id: str): service = app.state.search_service diff --git a/webUI/app.js b/webUI/app.js index af0c78e..bfca813 100644 --- a/webUI/app.js +++ b/webUI/app.js @@ -210,7 +210,7 @@ function setPage(page) { refreshLogs(); } else if (target === "config") { if (!state.config || !state.configDirty) { - loadConfig(); + loadConfig().then(() => refreshSpotifyConfig()); } refreshSchedule(); } else if (target === "advanced") { @@ -1111,6 +1111,29 @@ async function refreshStatus() { } }; } + + try { + const spotifyStatus = await fetchJson("/api/spotify/status"); + const oauthConnected = !!spotifyStatus.oauth_connected; + const oauthEl = $("#spotify-status-oauth"); + if (oauthEl) { + oauthEl.textContent = oauthConnected ? "Connected" : "Not connected"; + } + const likedEl = $("#spotify-status-liked"); + if (likedEl) { + likedEl.textContent = formatTimestamp(spotifyStatus.last_liked_sync) || "-"; + } + const savedEl = $("#spotify-status-saved"); + if (savedEl) { + savedEl.textContent = formatTimestamp(spotifyStatus.last_saved_sync) || "-"; + } + const playlistsEl = $("#spotify-status-playlists"); + if (playlistsEl) { + playlistsEl.textContent = formatTimestamp(spotifyStatus.last_playlists_sync) || "-"; + } + } catch (err) { + // Best-effort status enrichment; ignore when endpoint is unavailable. + } } catch (err) { setNotice($("#run-message"), `Status error: ${err.message}`, true); } @@ -2457,7 +2480,15 @@ function renderHomeIntentCard(intentType, identifier, options = {}) { confirmButton.dataset.action = "home-intent-confirm"; confirmButton.dataset.intentType = intentType || ""; confirmButton.dataset.identifier = identifier || ""; - confirmButton.textContent = "Confirm Download"; + if (intentType === "spotify_album") { + confirmButton.textContent = "Download Album"; + } else if (intentType === "spotify_playlist") { + confirmButton.textContent = "Download Playlist"; + } else if (intentType === "spotify_track") { + confirmButton.textContent = "Download Track"; + } else { + confirmButton.textContent = "Confirm Download"; + } actions.appendChild(confirmButton); } @@ -3659,6 +3690,94 @@ async function refreshSpotifyPlaylistStatus() { } } +function applySpotifyConfigState(cfg, oauthStatus) { + const spotifyCfg = (cfg && cfg.spotify) || {}; + const connected = !!(oauthStatus && oauthStatus.connected); + + const statusEl = $("#spotify-connection-status"); + if (statusEl) { + statusEl.textContent = connected ? "Connected" : "Not connected"; + } + + const connectBtn = $("#spotify-connect-btn"); + if (connectBtn) { + connectBtn.style.display = connected ? "none" : ""; + } + const disconnectBtn = $("#spotify-disconnect-btn"); + if (disconnectBtn) { + disconnectBtn.style.display = connected ? "" : "none"; + } + + const syncLiked = $("#spotify-sync-liked"); + if (syncLiked) { + syncLiked.checked = !!spotifyCfg.sync_liked_songs; + } + const syncSaved = $("#spotify-sync-saved"); + if (syncSaved) { + syncSaved.checked = !!spotifyCfg.sync_saved_albums; + } + const syncPlaylists = $("#spotify-sync-playlists"); + if (syncPlaylists) { + syncPlaylists.checked = !!spotifyCfg.sync_user_playlists; + } + + const likedInterval = $("#spotify-liked-interval"); + if (likedInterval) { + likedInterval.value = spotifyCfg.liked_songs_sync_interval_minutes ?? 15; + } + const savedInterval = $("#spotify-saved-interval"); + if (savedInterval) { + savedInterval.value = spotifyCfg.saved_albums_sync_interval_minutes ?? 30; + } + const playlistsInterval = $("#spotify-playlists-interval"); + if (playlistsInterval) { + playlistsInterval.value = spotifyCfg.user_playlists_sync_interval_minutes ?? 30; + } +} + +async function refreshSpotifyConfig() { + try { + const cfg = await fetchJson("/api/config"); + state.config = cfg; + let oauthStatus = { connected: false }; + try { + oauthStatus = await fetchJson("/api/spotify/oauth/status"); + } catch (err) { + oauthStatus = { connected: false }; + } + applySpotifyConfigState(cfg, oauthStatus); + } catch (err) { + setConfigNotice(`Spotify config refresh failed: ${err.message}`, true); + } +} + +async function connectSpotify() { + try { + const data = await fetchJson("/api/spotify/oauth/connect"); + if (data && data.auth_url) { + window.open(data.auth_url, "_blank", "noopener"); + } + setConfigNotice("Complete Spotify authorization in the opened window.", false); + await refreshSpotifyConfig(); + } catch (err) { + setConfigNotice(`Spotify connect failed: ${err.message}`, true); + } +} + +async function disconnectSpotify() { + try { + await fetchJson("/api/spotify/oauth/disconnect", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({}), + }); + await refreshSpotifyConfig(); + setConfigNotice("Spotify disconnected.", false); + } catch (err) { + setConfigNotice(`Spotify disconnect failed: ${err.message}`, true); + } +} + function updateSpotifyPlaylistStatusDisplay() { $$(".spotify-playlist-row").forEach((row) => { const url = row.dataset.playlistUrl; @@ -3921,6 +4040,26 @@ function buildConfigFromForm() { delete base.telegram; } + base.spotify = base.spotify || {}; + base.spotify.sync_liked_songs = !!$("#spotify-sync-liked")?.checked; + base.spotify.sync_saved_albums = !!$("#spotify-sync-saved")?.checked; + base.spotify.sync_user_playlists = !!$("#spotify-sync-playlists")?.checked; + + const likedInterval = parseInt($("#spotify-liked-interval")?.value, 10); + base.spotify.liked_songs_sync_interval_minutes = Number.isInteger(likedInterval) && likedInterval > 0 + ? likedInterval + : 15; + + const savedInterval = parseInt($("#spotify-saved-interval")?.value, 10); + base.spotify.saved_albums_sync_interval_minutes = Number.isInteger(savedInterval) && savedInterval > 0 + ? savedInterval + : 30; + + const playlistsInterval = parseInt($("#spotify-playlists-interval")?.value, 10); + base.spotify.user_playlists_sync_interval_minutes = Number.isInteger(playlistsInterval) && playlistsInterval > 0 + ? playlistsInterval + : 30; + const accounts = {}; $$(".account-row").forEach((row) => { const name = row.querySelector(".account-name").value.trim(); @@ -4536,6 +4675,14 @@ function bindEvents() { } }); $("#oauth-complete").addEventListener("click", completeOauth); + const spotifyConnectBtn = $("#spotify-connect-btn"); + if (spotifyConnectBtn) { + spotifyConnectBtn.addEventListener("click", connectSpotify); + } + const spotifyDisconnectBtn = $("#spotify-disconnect-btn"); + if (spotifyDisconnectBtn) { + spotifyDisconnectBtn.addEventListener("click", disconnectSpotify); + } $("#add-account").addEventListener("click", () => addAccountRow("", {})); $("#add-playlist").addEventListener("click", () => addPlaylistRow({})); diff --git a/webUI/index.html b/webUI/index.html index 5089f68..7543dcc 100644 --- a/webUI/index.html +++ b/webUI/index.html @@ -80,7 +80,7 @@ +
+
Spotify Sync Status
+
+
+ OAuth + - +
+
+ Last Liked Songs Sync + - +
+
+
+
+ Last Saved Albums Sync + - +
+
+ Last Playlists Sync + - +
+
+
Active playlist @@ -600,6 +623,44 @@
+
+
Spotify Integration
+
+ Connection Status + Not connected +
+
+ + +
+
+ + + + + + +
+
+
yt-dlp Options (JSON)