diff --git a/.env.example b/.env.example index 9ce5c48..ab72ce8 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,28 @@ RETREIVR_DATA_DIR=/app/data # App data (SQLite/temp) root; change only if you use a different internal mount point. RETREIVR_HOST=0.0.0.0 # Bind address for the API; keep 0.0.0.0 in containers for external access. RETREIVR_PORT=8000 # Internal API port; change only if you modify container port mapping. + +# --- Storage --- +RETREIVR_DOWNLOADS_DIR=/app/downloads # Root directory for downloaded media inside container. + +# --- Video Container Policy (v0.9.3 default = mkv) --- +RETREIVR_DEFAULT_VIDEO_FORMAT=mkv # Options: mkv | mp4 | webm (mkv recommended for archival fidelity). + +# --- Scheduler --- +RETREIVR_SCHEDULER_INTERVAL_MINUTES=30 # Interval for playlist/watch polling. + +# --- Spotify (Optional – requires OAuth + Premium) --- +SPOTIFY_CLIENT_ID= +SPOTIFY_CLIENT_SECRET= +SPOTIFY_REDIRECT_URI=http://localhost:8090/api/spotify/callback + +# Spotify functionality is disabled unless OAuth is configured AND Premium validation succeeds. + +# --- Telegram Notifications (Optional) --- +TELEGRAM_BOT_TOKEN= +TELEGRAM_CHAT_ID= + +# Leave empty to disable Telegram notifications. + +# --- Logging --- +RETREIVR_LOG_LEVEL=INFO # Options: DEBUG | INFO | WARNING | ERROR diff --git a/CHANGELOG.md b/CHANGELOG.md index 93594a9..10e07bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,73 @@ All notable changes to this project will be documented here. +## [v0.9.3] – Canonical Authority & Scheduler Hardening + +This release establishes Retreivr’s canonical authority model and locks in deterministic orchestration behavior. v0.9.3 is a stability milestone focused on correctness, idempotency, and clean archival output. + +### Highlights + +- MusicBrainz is now the canonical metadata authority. +- Spotify downgraded to optional intent ingestion (OAuth + Premium required). +- Spotify API usage (playlists, saved tracks, metadata hints) now strictly requires OAuth configuration and an active Premium subscription. +- Deterministic playlist snapshot hashing and diffing. +- Idempotent scheduler ticks (no duplicate enqueues). +- MKV set as the default video container. +- Integration tests added for full pipeline and snapshot behavior. + +--- + +### Added + +- Structured `PlaylistRunSummary` with: + - `added` + - `skipped` + - `completed` + - `failed` +- Stable playlist snapshot hashing using normalized item sets. +- Crash-safe restart behavior for scheduler runs. +- Active-job duplicate detection (queued / claimed / downloading / postprocessing states). +- Integration tests covering: + - Full music flow (search → resolve → download → embed → persist) + - Spotify intent conversion (MB-first enforcement) + - Playlist reorder behavior (no re-enqueue) + - Crash/restart idempotency +- MKV default container policy for video downloads. + +--- + +### Changed + +- Canonical metadata resolution is now MusicBrainz-first in all ingestion paths. +- Spotify metadata is treated as hints only and never overrides MusicBrainz canonical results. +- Legacy resolver paths removed. +- Duplicate MusicBrainz client stacks consolidated into a single service layer. +- Canonical naming enforced: + - No video IDs in filenames + - No upload dates in filenames + - Zero-padded music track numbers +- Video metadata embedding now occurs after final container merge, ensuring metadata survives remux. +- Scheduler diff logic hardened to ignore reorder-only changes. +- Snapshot persistence made deterministic to prevent unnecessary DB churn. + +--- + +### Fixed + +- Prevented duplicate active-job enqueue on scheduler restart. +- Eliminated reorder-triggered playlist re-downloads. +- Fixed snapshot instability caused by unordered playlist items. +- Prevented metadata failures from corrupting or blocking completed downloads. + +--- + +### Notes + +- This release prioritizes stability over new feature expansion. +- v0.9.3 marks the transition to a canonical, deterministic ingestion engine. +- MKV is now the default video container to preserve codec fidelity and improve metadata support. +- Spotify integration depends on the official Spotify Web API and requires valid OAuth credentials plus Premium account validation; without these, Spotify playlist sync and metadata ingestion remain disabled. + ## [v0.9.2] – Search Engine Dialed In // Home Page UI Update Highlights @@ -10,89 +77,57 @@ This release hardens the download pipeline (especially audio-only MP3), improves ⸻ -🚀 Improvements - • Reliable MP3 audio-only downloads - • Audio mode now uses a robust bestaudio[acodec!=none]/bestaudio/best selector. - • Prevents unnecessary video downloads when targeting MP3. - • Matches known-working yt-dlp CLI behavior. - • Works consistently for direct URLs and queued jobs. - • Safer yt-dlp option handling - • Avoids forced merge/remux unless explicitly required. - • Reduces ffmpeg post-processing failures. - • Audio and video paths are now clearly separated and predictable. - • yt-dlp CLI observability - • Job workers now log the exact yt-dlp CLI command executed (with secrets redacted). - • Makes debugging format, cookie, and extractor issues significantly easier. +🚀 Improvements • Reliable MP3 audio-only downloads • Audio mode now uses a robust bestaudio[acodec!=none]/bestaudio/best selector. • Prevents unnecessary video downloads when targeting MP3. • Matches known-working yt-dlp CLI behavior. • Works consistently for direct URLs and queued jobs. • Safer yt-dlp option handling • Avoids forced merge/remux unless explicitly required. • Reduces ffmpeg post-processing failures. • Audio and video paths are now clearly separated and predictable. • yt-dlp CLI observability • Job workers now log the exact yt-dlp CLI command executed (with secrets redacted). • Makes debugging format, cookie, and extractor issues significantly easier. ⸻ -🧠 Behavior Fixes - • Post-processing failures are now terminal - • ffmpeg / post-processing errors correctly mark jobs as FAILED. - • Prevents silent re-queue loops and misleading “Queued” states in the UI. - • Video pipeline preserved - • Default video behavior (bestvideo+bestaudio/best) remains unchanged. - • MP4 / MKV / WebM downloads continue to work as before. +🧠 Behavior Fixes • Post-processing failures are now terminal • ffmpeg / post-processing errors correctly mark jobs as FAILED. • Prevents silent re-queue loops and misleading “Queued” states in the UI. • Video pipeline preserved • Default video behavior (bestvideo+bestaudio/best) remains unchanged. • MP4 / MKV / WebM downloads continue to work as before. ⸻ -🎧 Music & Metadata - • Music metadata enrichment remains optional - • Failed or low-confidence enrichment no longer blocks successful downloads. - • Clear logging when metadata is skipped due to confidence thresholds. +🎧 Music & Metadata • Music metadata enrichment remains optional • Failed or low-confidence enrichment no longer blocks successful downloads. • Clear logging when metadata is skipped due to confidence thresholds. ⸻ -🖥 UI / UX - • Home page cleanup - • Reorganized source filters and advanced options into a single compact row. - • Reduced visual noise without removing functionality. - • Improved spacing and alignment for music mode, format, and destination controls. - • Advanced Search remains available - • Advanced functionality is still accessible via the dedicated Advanced Search page. +🖥 UI / UX • Home page cleanup • Reorganized source filters and advanced options into a single compact row. • Reduced visual noise without removing functionality. • Improved spacing and alignment for music mode, format, and destination controls. • Advanced Search remains available • Advanced functionality is still accessible via the dedicated Advanced Search page. ⸻ -🧹 Internal / Maintenance - • Improved internal option auditing logs. - • Better separation between search, enqueue, and execution logic. - • No schema or config migrations required. +🧹 Internal / Maintenance • Improved internal option auditing logs. • Better separation between search, enqueue, and execution logic. • No schema or config migrations required. ⸻ -⚠️ Known Notes - • Client-side (“download to this device”) delivery is still being refined and may be disabled or hidden in some UI paths. - +⚠️ Known Notes • Client-side (“download to this device”) delivery is still being refined and may be disabled or hidden in some UI paths. ## [v0.9.1] – Runtime Stability & Direct URL Fixes -This release focuses on restoring and hardening runtime stability after refactors since yt-archiver v1.2.0. -Primary goals were correctness, predictability, and eliminating regressions in downloads, scheduling, and search flows. +This release focuses on restoring and hardening runtime stability after refactors since yt-archiver v1.2.0. Primary goals were correctness, predictability, and eliminating regressions in downloads, scheduling, and search flows. Fixed: -- Restored reliable Direct URL downloads for video and audio (mp3/m4a/etc). -- Corrected yt-dlp invocation for audio formats (uses extract-audio instead of merge-output-format). -- Fixed Direct URL runs appearing permanently queued in the Home UI. -- Prevented empty or zero-byte output files from being recorded as completed. -- Fixed scheduler playlist downloads producing incorrect formats or audio-only output. -- Ensured scheduler and direct downloads can run concurrently without interference. -- Fixed missing database schema initialization for search-related tables. -- Normalized all filesystem paths via paths.py and environment variables (Docker-safe). -- Fixed Advanced Search “Failed to load requests” error caused by search DB store calling service-only logic. -- Fixed Home screen results remaining stuck in “Queued” by restoring reliable search request status hydration. -- Unified search job database usage to a single canonical path to prevent schema and state mismatches. +Restored reliable Direct URL downloads for video and audio (mp3/m4a/etc). +Corrected yt-dlp invocation for audio formats (uses extract-audio instead of merge-output-format). +Fixed Direct URL runs appearing permanently queued in the Home UI. +Prevented empty or zero-byte output files from being recorded as completed. +Fixed scheduler playlist downloads producing incorrect formats or audio-only output. +Ensured scheduler and direct downloads can run concurrently without interference. +Fixed missing database schema initialization for search-related tables. +Normalized all filesystem paths via paths.py and environment variables (Docker-safe). +Fixed Advanced Search “Failed to load requests” error caused by search DB store calling service-only logic. +Fixed Home screen results remaining stuck in “Queued” by restoring reliable search request status hydration. +Unified search job database usage to a single canonical path to prevent schema and state mismatches. Changed: -- Direct URL playlist links are now explicitly rejected with a clear user-facing error message. -- Direct URL runs bypass the job queue but still report progress and completion via run status. -- Search-only results can now be downloaded individually via the Home results UI. -- Default video downloads respect configured format preferences (e.g., webm/mp4). -- Metadata enrichment failures no longer block or corrupt completed downloads. +Direct URL playlist links are now explicitly rejected with a clear user-facing error message. +Direct URL runs bypass the job queue but still report progress and completion via run status. +Search-only results can now be downloaded individually via the Home results UI. +Default video downloads respect configured format preferences (e.g., webm/mp4). +Metadata enrichment failures no longer block or corrupt completed downloads. Notes: -- Playlist URLs must be added via Scheduler / Playlist configuration, not Direct URL mode. -- Kill-download button is not guaranteed during active runs and remains experimental. -- Watcher functionality is present but considered beta and may change in later releases. + +Playlist URLs must be added via Scheduler / Playlist configuration, not Direct URL mode. +Kill-download button is not guaranteed during active runs and remains experimental. +Watcher functionality is present but considered beta and may change in later releases. ## [v0.9.0] – Retreivr Rebrand Release // Music Mode and Metadata - Project renamed to Retreivr diff --git a/README.md b/README.md index f90deb2..51ea7db 100644 --- a/README.md +++ b/README.md @@ -4,273 +4,228 @@

-

- Powerful, self-hosted media search, archival, and metadata imbedding engine -

- ## Overview -Retreivr is a self-hosted media search and archival engine focused on discovering, scoring, and archiving publicly available media. -It provides an advanced search pipeline, a unified FIFO download queue, and post-download metadata enrichment, with an initial focus on music. - -## History -Retreivr is the successor to the YouTube-Archiver project. -Version v0.9.x represents the first stable pre-1.0 release series under the Retreivr name. - -## Functionality -Retreivr runs as a local service backed by SQLite, exposing a Web UI and API for search, queue inspection, logs, and completed downloads. -All downloads are processed exactly once through a unified worker queue and written to disk. - -## Core Capabilities -- Keeping personal or shared YouTube playlists in sync -- Running scheduled archive jobs without cron or babysitting -- Downloading a single URL on demand -- Reviewing status, progress, logs, and history from a browser -- Downloading completed files directly from the server -- Running cleanly in Docker with explicit, safe volume mappings -- Mobile-friendly Web UI served by the API -- Built-in scheduler (no cron, no systemd) -- Docker-safe path handling and volume layout -- Background runs with live playlist + video progress -- SQLite history with search, filter, and sort -- Manual yt-dlp update button (restart required) -- Optional Basic auth and reverse-proxy support -- Download buttons for completed files -- Single-URL delivery mode (server library or one-time client download) -- Manual cleanup for temporary files -- Single-playlist runs on demand (without editing config) -- Current phase and last error in Status -- App version + update availability (GitHub release check) - -## Web UI Screenshots - -### Home Page -![Home Page](webUI/assets/screenshots/screenshot_1.png) - -## Quick Start (Docker - recommended) -Pull the prebuilt image from GHCR: +Retreivr is a self-hosted media ingestion and archival engine. + +It resolves user intent, downloads publicly available media, applies canonical metadata, and writes clean, deterministic files to disk. Retreivr does not stream, index, or play media — it focuses strictly on acquisition and archival correctness. + +MusicBrainz is the canonical metadata authority. Spotify is optional and only used when OAuth credentials are configured and premium validation succeeds. + +--- + +## Core Principles + +- Deterministic execution (no duplicate downloads) +- Canonical metadata-first architecture (MusicBrainz authority) +- Clean filesystem structure (no source IDs in filenames) +- Idempotent scheduler behavior +- Single-worker design for correctness +- Local-first, Docker-first deployment + +--- + +## What Retreivr Does + +- Resolves search queries into concrete media candidates +- Downloads media using yt-dlp +- Applies canonical naming rules +- Embeds structured metadata into files +- Stores download history in SQLite +- Synchronizes playlists via deterministic snapshot + diff +- Provides a Web UI and REST API +- Sends optional Telegram run summaries + +--- + +## What Retreivr Does NOT Do + +- Stream media +- Replace Plex, Jellyfin, or music players +- Auto-delete owned files +- Circumvent DRM or protected platforms +- Run as a cloud service +- Collect telemetry + +--- + +## Architecture Summary + +### Intent Ingestion +- Direct URL (single item) +- Search queries +- Scheduled playlist sync + +### Resolution +- MusicBrainz-first canonical resolution +- Spotify fallback only when OAuth + Premium validated + +### Download +- Unified FIFO job queue +- yt-dlp execution +- Container finalized before metadata embedding +- Atomic move to final destination + +### Metadata +- Video: title, identifiers, channel_id, canonical URL embedded +- Music: enriched via MusicBrainz (track, album, ISRC, MBIDs, artwork) +- Files are never renamed after finalization + +### Scheduler +- Deterministic playlist snapshot hashing +- Reorder does not trigger re-download +- Active-job duplicate prevention +- Crash-safe idempotency +- Single structured run summary + +--- + +## Canonical Filesystem Behavior + +### Music + +``` +Music/ + Album Artist/ + Album (Year)/ + Disc 1/ + 01 - Track Title.ext +``` + +Rules: +- No video IDs in filenames +- No upload dates in filenames +- Zero-padded track numbers +- Unicode-safe normalization + +### Video + +- Filename = sanitized title only +- Collision resolution via " (2)", " (3)" +- Source identifiers stored in metadata + SQLite only + +--- + +## Default Container Policy + +As of v0.9.3: + +- Default video container: **MKV** +- No forced re-encoding +- Metadata embedded after container finalization + +MKV provides strong metadata support while preserving original codec fidelity. + +--- + +## Quick Start (Docker - Recommended) + +Pull the image: + ```bash docker pull ghcr.io/retreivr/retreivr:latest ``` -The image is published under GitHub Packages for this repo. -Copy the Docker and env templates, then start: +Copy templates and start: + ```bash cp docker/docker-compose.yml.example docker/docker-compose.yml cp .env.example .env docker compose -f docker/docker-compose.yml up -d ``` -Open the Web UI at `http://YOUR_HOST:8090`. -## Quick Start (Local/source) -```bash -git clone https://github.com/Retreivr/retreivr.git -cd retreivr -cp docker-compose.yml.example docker-compose.yml -docker compose up -d -``` +Open the Web UI at: -Docker deployment is the recommended path for most users. +``` +http://YOUR_HOST:8090 +``` -For Portainer deployment, see portainer.md and /docker/docker-compose.portainer.yml.example +--- ## Requirements + Docker deployment: -- Docker Engine or Docker Desktop -- docker compose (v2) +- Docker Engine or Docker Desktop +- docker compose (v2) -Local/source deployment (optional): -- Python 3.11 only -- ffmpeg on PATH -- Node.js or Deno only if you use a JS runtime for extractor workarounds +Optional local/source: +- Python 3.11 +- ffmpeg on PATH -## Configuration -Most users only need to edit `config/config.json` and set download paths. +--- + +## Configuration Overview + +Primary config file: -1) Copy the sample config: -```bash -cp config/config_sample.json config/config.json +``` +config/config.json ``` -Config path usage: -- Web UI / API runs use the server’s active config path (`/api/config/path`, default `config/config.json`). -- CLI runs use the path passed to `scripts/archiver.py` (or its default if omitted). +Key areas: +- Playlist definitions +- Default `final_format` +- Music mode toggle +- OAuth configuration (optional) +- Scheduler interval +- Telegram notifications (optional) -2) (OPTIONAL) Create a Google Cloud OAuth client (Type: Desktop app) and place client secret JSONs in `tokens/`. +Spotify integration requires OAuth credentials and premium validation. Without it, Spotify functionality remains disabled. -3) (OPTIONAL) Generate OAuth tokens: -Web UI (recommended): -- Config page → Accounts → fill Account, Client Secret, Token path -- Click “Run OAuth”, open the URL, approve, then paste the code to save the token +--- -CLI fallback: -```bash -python scripts/setup_oauth.py --account family_tv tokens/client_secret_family.json tokens/token_family.json -``` +## Music Metadata Enrichment (Optional) -4) Edit `config/config.json`: -- `accounts` paths to client_secret and token JSONs (optional if you only use public playlists) -- `playlists` with `playlist_id`, `folder`, optional `account`, optional `final_format`, optional `music_mode`, optional `mode` (full/subscribe) -- `final_format` default (webm/mp4/mkv/mp3) -- `music_filename_template` optional music-safe naming (artist/album/track) -- `yt_dlp_cookies` optional Netscape cookies.txt for improved music metadata -- `js_runtime` to avoid extractor issues (node:/path or deno:/path) -- `single_download_folder` default for single-URL downloads -- `telegram` optional bot_token/chat_id for summaries (see Telegram setup below) -- `schedule` optional interval scheduler -- `watch_policy` optional adaptive watcher with downtime window (local time) - -## Music mode (optional) -Music mode is opt-in per playlist and per single-URL run. It applies music-focused metadata and uses yt-dlp music metadata when available. When enabled, download URLs use `music.youtube.com`. - -Recommendations: -- Provide a Netscape `cookies.txt` file via `yt_dlp_cookies` (stored under `tokens/`) for the best YouTube Music metadata. -- Use a music filename template such as: - `%(artist)s/%(album)s/%(track_number)s - %(track)s.%(ext)s` - -Notes: -- If cookies are missing, music metadata quality may be degraded. -- Single-URL runs auto-enable music mode when the URL is `music.youtube.com`. -- If `final_format` is a video format (webm/mp4/mkv), the download remains video even in music mode. Use an audio format (mp3/m4a/flac/opus) to force audio-only. - -## Music metadata enrichment (optional) -When `music_mode` is enabled and `music_metadata.enabled` is true, the app enqueues the finalized file for background enrichment using Spotify as the canonical metadata source, with MusicBrainz as an optional fallback. This runs asynchronously and does not block downloads. Files are never renamed, and existing rich tags are not overwritten. - -Example config: -```json -"music_metadata": { - "enabled": true, - "confidence_threshold": 70, - "embed_artwork": true, - "allow_overwrite_tags": true, - "max_artwork_size_px": 1500, - "rate_limit_seconds": 1.5, - "dry_run": false -} -``` +When enabled: -Tagged files preserve YouTube traceability via custom tags (SOURCE, SOURCE_TITLE, MBID when matched). By default, enriched tags overwrite existing yt-dlp tags; set `allow_overwrite_tags` to false to keep original tags intact. - -## Single-URL delivery modes -Single-URL runs support an explicit delivery mode: -- `server` (default): save into the server library (`single_download_folder`). -- `client`: stage the finalized file for a one-time HTTP download to the browser, then delete it after transfer or timeout (~10 minutes). - -Delivery mode applies to single-URL runs only; playlists and watcher runs always save to the server library. Validation and conversion still occur before any delivery. - -## Direct URL limitations -Direct URL mode is intentionally limited to **single media items only**. - -- Playlist URLs are **not supported** in Direct URL mode. -- If a playlist URL is entered, the run will fail immediately with a clear error message. -- To archive playlists, use the **Scheduler** or **Playlist** configuration instead. - -This design keeps Direct URL runs fast, predictable, and isolated from long-running playlist jobs. - -## Web UI -The Web UI is served by the API and talks only to REST endpoints. It provides: -- Home page with run controls, status, schedule, and metrics -- Config page (including schedule controls and optional playlist names) -- OAuth helper to generate tokens directly from the Config page -- Downloads page with search and limit controls -- History page with search, filter, sort, and limit controls -- Logs page with manual refresh -- Live playlist progress + per-video download progress -- Current phase and last error in Status -- App version + update availability (GitHub release check) -- Download buttons for completed files -- Single-URL delivery mode (server library or one-time client download) -- Manual cleanup for temporary files -- Manual yt-dlp update button (restart container after update) -- Single-playlist runs on demand (without editing config) - -## API overview -Common endpoints: -- GET /api/status -- GET /api/metrics -- GET /api/schedule -- POST /api/run -- GET /api/history -- GET /api/logs - -OpenAPI docs are available at `/docs`. - -## Telegram notifications (optional) -You must create your own bot and provide both the bot token and chat ID. - -Quick setup: -1) Talk to @BotFather in Telegram and create a bot to get the token. -2) Start a chat with the new bot and send a message. -3) Get your chat ID by visiting: - `https://api.telegram.org/bot/getUpdates` - Look for `"chat":{"id":...}` in the response. -4) Set these in `config.json`: -``` -"telegram": { - "bot_token": "YOUR_BOT_TOKEN", - "chat_id": "YOUR_CHAT_ID" -} -``` +- MusicBrainz resolves canonical track + release +- MBIDs embedded +- Artwork optionally embedded +- Tags enriched without renaming files + +Spotify metadata is never authoritative. -Notes: -- For group chats, add the bot to the group and send a message first. -- Group chat IDs are usually negative numbers. +--- -## Documentation +## API Overview -- [Path & Volume Layout](docs/paths.md) -- [Portainer Setup](docs/portainer.md) +Common endpoints: + +- `GET /api/status` +- `GET /api/metrics` +- `POST /api/run` +- `GET /api/history` +- `GET /api/music/albums/search` +- `POST /api/music/album/candidates` + +OpenAPI docs available at `/docs`. + +--- ## Updating -Containers are disposable; your real data lives in mounted volumes. A safe update flow is: + ```bash docker compose pull docker compose down docker compose up -d ``` -This preserves your config, database, logs, tokens, and downloads. -## Versioning (Docker builds) -The app reads its version from `RETREIVR_VERSION`. The Dockerfile exposes a build arg: -```bash -docker build -f docker/Dockerfile --build-arg RETRIEVR_VERSION=0.9.1 -t retreivr:latest . -``` -This avoids keeping the version in Compose or runtime envs. - -## Security -Retreivr is designed as a local-first application with no hosted or cloud mode. It supports optional Basic auth and is reverse-proxy friendly. OAuth tokens and sensitive data are stored locally and not exposed to frontend JavaScript. - -## Project Scope (v0.9.x) -- Music-focused search and downloads -- Public, non-DRM sources only -- Single-worker, deterministic execution -- UI and APIs are stable but evolving -- Direct URL mode is restricted to single-item downloads; playlists must use scheduled or playlist runs. - -## What this tool does not attempt to do -This project does not attempt to: -- Circumvent DRM -- Auto-update yt-dlp at runtime -- Act as a hosted or cloud service -- Collect telemetry or usage data -- Bypass platform terms of service -- Provide real-time detection (playlist checks are scheduled/polled) -- Run with multiple API workers (single-worker design is required for the watcher) -- Guarantee complete music metadata (fields may be missing depending on source and cookies) - -## Notes -- Downloads are staged in a temp directory and atomically copied to their final location -- “Clear temporary files” only removes working directories (temp downloads + yt-dlp temp) -- “Update yt-dlp” runs in-container and requires a container restart to take effect -- RETREIVR_* environment variables can override paths (see .env.example) - -## Release -See `CHANGELOG.md` for details of the current release and history. - -## Contributing -Contributions are welcome. Please read `CONTRIBUTING.md` before opening a PR. - -## Security -Security issues should be reported privately. See `SECURITY.md`. +Data persists in mounted volumes. + +--- + +## Project Scope (v0.9.3) + +- Stable ingestion engine +- MusicBrainz-first canonical resolution +- Deterministic playlist snapshot behavior +- Idempotent scheduler runs +- MKV default container +- Integration test coverage for core flows + +v0.9.3 is a stabilization milestone. + +--- ## License + MIT. See `LICENSE`. diff --git a/api/intent_dispatcher.py b/api/intent_dispatcher.py new file mode 100644 index 0000000..95178e3 --- /dev/null +++ b/api/intent_dispatcher.py @@ -0,0 +1,352 @@ +"""Intent execution dispatcher for API-layer intent plumbing.""" + +from __future__ import annotations + +import logging +import os +import sqlite3 +from pathlib import Path +from typing import Any, Dict +from urllib.parse import quote + +from input.intent_router import IntentType +from playlist.rebuild import rebuild_playlist_from_tracks +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track, playlist_watch_job + + +async def execute_intent( + intent_type: str, + identifier: str, + config, + db, + queue, + spotify_client, +) -> Dict[str, Any]: + """Dispatch intent execution to existing Spotify ingestion behaviors. + + This function keeps intent execution thin by delegating to established + watcher/enqueue helpers where possible. + """ + raw_intent = str(intent_type or "").strip() + raw_identifier = str(identifier or "").strip() + if not raw_intent: + return _error_response(raw_intent, raw_identifier, "intent_type is required") + if not raw_identifier: + return _error_response(raw_intent, raw_identifier, "identifier is required") + + try: + intent = IntentType(raw_intent) + except ValueError: + return _error_response(raw_intent, raw_identifier, "unsupported intent_type") + + if intent == IntentType.SPOTIFY_PLAYLIST: + playlist_name = _resolve_playlist_name(raw_identifier, config) + result = playlist_watch_job( + spotify_client, + db, + queue, + raw_identifier, + playlist_name=playlist_name, + config=config if isinstance(config, dict) else None, + ) + status = "accepted" if result.get("status") in {"updated", "unchanged"} else "error" + return { + "status": status, + "intent_type": intent.value, + "identifier": raw_identifier, + "message": f"playlist sync {result.get('status', 'completed')}", + "enqueued_count": int(result.get("enqueued") or 0), + } + + if intent == IntentType.SPOTIFY_TRACK: + search_service = _resolve_search_service(config) + if search_service is None: + return _error_response( + intent.value, + raw_identifier, + "search_service is required for spotify_track execution", + ) + track = _fetch_spotify_track(spotify_client, raw_identifier) + if not track: + return _error_response(intent.value, raw_identifier, "track not found") + await enqueue_spotify_track( + queue=queue, + spotify_track=track, + search_service=search_service, + playlist_id=f"spotify_track_{raw_identifier}", + ) + return { + "status": "accepted", + "intent_type": intent.value, + "identifier": raw_identifier, + "message": "track enqueue attempted", + "enqueued_count": 1, + } + + if intent == IntentType.SPOTIFY_ALBUM: + result = await run_spotify_album_sync( + album_id=raw_identifier, + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + ) + result["intent_type"] = intent.value + result["identifier"] = raw_identifier + return result + + if intent == IntentType.SPOTIFY_ARTIST: + return { + "status": "accepted", + "intent_type": intent.value, + "identifier": raw_identifier, + "message": "artist intent requires selection before enqueue", + "enqueued_count": 0, + } + + return _error_response(intent.value, raw_identifier, "intent type not implemented") + + +def _error_response(intent_type: str, identifier: str, message: str) -> Dict[str, Any]: + return { + "status": "error", + "intent_type": intent_type, + "identifier": identifier, + "message": message, + "enqueued_count": 0, + } + + +def _resolve_search_service(config: Any) -> Any: + if isinstance(config, dict): + return config.get("search_service") + return getattr(config, "search_service", None) + + +def _resolve_playlist_name(playlist_id: str, config: Any) -> str: + if not isinstance(config, dict): + return playlist_id + entries = config.get("spotify_playlists") or [] + for entry in entries: + if not isinstance(entry, dict): + continue + candidate = str(entry.get("playlist_id") or "").strip() + if candidate and candidate == playlist_id: + name = str(entry.get("name") or "").strip() + if name: + return name + return playlist_id + + +def _fetch_spotify_track(spotify_client: Any, track_id: str) -> dict[str, Any] | None: + encoded = quote(track_id, safe="") + payload = spotify_client._request_json( + f"https://api.spotify.com/v1/tracks/{encoded}", + params={"market": "from_token"}, + ) + if not isinstance(payload, dict) or not payload.get("id"): + return None + return _normalize_track_payload(payload, album_name=(payload.get("album") or {}).get("name")) + + +def _fetch_spotify_album_tracks(spotify_client: Any, album_id: str) -> tuple[str, list[dict[str, Any]]]: + encoded = quote(album_id, safe="") + album = spotify_client._request_json( + f"https://api.spotify.com/v1/albums/{encoded}", + params={"fields": "name,tracks(items(id,name,duration_ms,artists(name),disc_number,track_number),next)"}, + ) + title = str(album.get("name") or "") + tracks_page = album.get("tracks") or {} + items: list[dict[str, Any]] = [] + while True: + for raw in tracks_page.get("items") or []: + if not raw or not raw.get("id"): + continue + items.append(_normalize_track_payload(raw, album_name=title)) + next_url = tracks_page.get("next") + if not next_url: + break + tracks_page = spotify_client._request_json(str(next_url)) + return title, items + + +async def run_spotify_album_sync( + album_id: str, + config, + db, + queue, + spotify_client, +) -> Dict[str, Any]: + """Run a one-shot Spotify album sync using existing enqueue and rebuild pipelines. + + Behavior mirrors playlist sync style orchestration: + - fetch album metadata + ordered tracks, + - enqueue each track via ``enqueue_spotify_track``, + - best-effort rebuild of an album-scoped M3U from downloaded canonical paths. + """ + album_identifier = str(album_id or "").strip() + if not album_identifier: + return { + "status": "error", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": "album_id is required", + "enqueued_count": 0, + } + + search_service = _resolve_search_service(config) + if search_service is None: + return { + "status": "error", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": "search_service is required for spotify_album execution", + "enqueued_count": 0, + } + + try: + album_title, album_tracks, album_artist = _fetch_spotify_album_tracks_with_artist( + spotify_client, + album_identifier, + ) + except Exception as exc: + return { + "status": "error", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": f"album fetch failed: {exc}", + "enqueued_count": 0, + } + + if not album_tracks: + return { + "status": "error", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": "album contains no tracks", + "enqueued_count": 0, + } + + playlist_id = f"spotify_album_{album_identifier}" + enqueued = 0 + for track in album_tracks: + await enqueue_spotify_track( + queue=queue, + spotify_track=track, + search_service=search_service, + playlist_id=playlist_id, + ) + enqueued += 1 + + try: + downloaded_paths = _load_downloaded_paths_for_playlist(playlist_id) + playlist_root, music_root = _resolve_playlist_dirs(config) + artist_name = album_artist or "Unknown Artist" + album_name = album_title or album_identifier + playlist_name = f"Spotify - Album - {artist_name} - {album_name}" + rebuild_playlist_from_tracks( + playlist_name=playlist_name, + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=downloaded_paths, + ) + except Exception: + logging.exception("Album M3U rebuild failed for album %s", album_identifier) + + return { + "status": "accepted", + "intent_type": IntentType.SPOTIFY_ALBUM.value, + "identifier": album_identifier, + "message": f"album sync completed: {album_title or album_identifier}", + "enqueued_count": enqueued, + } + + +def _fetch_spotify_album_tracks_with_artist( + spotify_client: Any, + album_id: str, +) -> tuple[str, list[dict[str, Any]], str]: + encoded = quote(album_id, safe="") + album = spotify_client._request_json( + f"https://api.spotify.com/v1/albums/{encoded}", + params={ + "fields": ( + "name,artists(name)," + "tracks(items(id,name,duration_ms,artists(name),disc_number,track_number,external_ids(isrc)),next)" + ) + }, + ) + title = str(album.get("name") or "") + album_artists = album.get("artists") or [] + album_artist = ( + album_artists[0].get("name") + if album_artists and isinstance(album_artists[0], dict) + else "" + ) + + tracks_page = album.get("tracks") or {} + items: list[dict[str, Any]] = [] + while True: + for raw in tracks_page.get("items") or []: + if not raw or not raw.get("id"): + continue + items.append(_normalize_track_payload(raw, album_name=title)) + next_url = tracks_page.get("next") + if not next_url: + break + tracks_page = spotify_client._request_json(str(next_url)) + return title, items, str(album_artist or "") + + +def _resolve_db_path() -> str: + return os.environ.get("RETREIVR_DB_PATH", os.path.join(os.getcwd(), "retreivr.sqlite3")) + + +def _load_downloaded_paths_for_playlist(playlist_id: str) -> list[str]: + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(_resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + cur = conn.cursor() + cur.execute( + """ + SELECT file_path + FROM downloaded_music_tracks + WHERE playlist_id=? + ORDER BY downloaded_at ASC, id ASC + """, + (playlist_id,), + ) + rows = cur.fetchall() + return [str(row["file_path"]) for row in rows if row["file_path"]] + except sqlite3.Error: + logging.exception("Failed to load downloaded tracks for playlist %s", playlist_id) + return [] + finally: + if conn is not None: + conn.close() + + +def _resolve_playlist_dirs(config: Any) -> tuple[Path, Path]: + cfg = config if isinstance(config, dict) else {} + music_root = Path(str(cfg.get("music_download_folder") or "Music")) + playlist_root = Path( + str(cfg.get("playlists_folder") or cfg.get("playlist_export_folder") or (music_root / "Playlists")) + ) + return playlist_root, music_root + + +def _normalize_track_payload(track: dict[str, Any], *, album_name: str | None = None) -> dict[str, Any]: + artists = track.get("artists") or [] + first_artist = artists[0].get("name") if artists and isinstance(artists[0], dict) else None + external_ids = track.get("external_ids") or {} + return { + "spotify_track_id": track.get("id"), + "artist": first_artist, + "title": track.get("name"), + "album": album_name or ((track.get("album") or {}).get("name")), + "duration_ms": track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + "track_num": track.get("track_number"), + "disc_num": track.get("disc_number"), + } diff --git a/api/main.py b/api/main.py index 610b205..38d7cfc 100644 --- a/api/main.py +++ b/api/main.py @@ -22,21 +22,24 @@ def _require_python_311(): import logging import mimetypes import os +import re import sqlite3 import subprocess import shutil import tempfile import threading import time +import requests +from pathlib import Path from datetime import datetime, timedelta, timezone from zoneinfo import ZoneInfo from uuid import uuid4 -from urllib.parse import urlparse +from urllib.parse import quote, urlparse from typing import Optional import anyio from fastapi import Body, FastAPI, HTTPException, Query, Request -from fastapi.responses import JSONResponse, PlainTextResponse, StreamingResponse +from fastapi.responses import JSONResponse, PlainTextResponse, RedirectResponse, StreamingResponse from fastapi.staticfiles import StaticFiles from google_auth_oauthlib.flow import InstalledAppFlow from googleapiclient.errors import HttpError @@ -71,6 +74,7 @@ def _require_python_311(): SpotifyPlaylistImportError, SpotifyPlaylistImporter, ) +from metadata.services.musicbrainz_service import get_musicbrainz_service from engine.core import ( EngineStatus, @@ -107,6 +111,20 @@ def _require_python_311(): resolve_dir, ) from engine.runtime import get_runtime_info +from input.intent_router import IntentType, detect_intent +from api.intent_dispatcher import execute_intent as dispatch_intent +from spotify.oauth_client import SPOTIFY_TOKEN_URL, build_auth_url +from spotify.client import SpotifyPlaylistClient +from spotify.oauth_store import SpotifyOAuthStore, SpotifyOAuthToken +from db.playlist_snapshots import PlaylistSnapshotStore +from scheduler.jobs.spotify_playlist_watch import ( + normalize_spotify_playlist_identifier, + playlist_watch_job, + spotify_liked_songs_watch_job, + spotify_playlists_watch_job, + spotify_saved_albums_watch_job, + spotify_user_playlists_watch_job, +) APP_NAME = "Retreivr API" STATUS_SCHEMA_VERSION = 1 @@ -118,8 +136,18 @@ def _require_python_311(): _TRUST_PROXY = os.environ.get("YT_ARCHIVER_TRUST_PROXY", "").strip().lower() in {"1", "true", "yes", "on"} SCHEDULE_JOB_ID = "archive_schedule" WATCHER_JOB_ID = "playlist_watcher" +LIKED_SONGS_JOB_ID = "spotify_liked_songs_watch" +SAVED_ALBUMS_JOB_ID = "spotify_saved_albums_watch" +USER_PLAYLISTS_JOB_ID = "spotify_user_playlists_watch" +SPOTIFY_PLAYLISTS_WATCH_JOB_ID = "spotify_playlists_watch" DEFERRED_RUN_JOB_ID = "deferred_run" WATCHER_QUIET_WINDOW_SECONDS = 60 +COVER_ART_CACHE_TTL_SECONDS = 3600 +DEFAULT_LIKED_SONGS_SYNC_INTERVAL_MINUTES = 15 +DEFAULT_SAVED_ALBUMS_SYNC_INTERVAL_MINUTES = 30 +DEFAULT_USER_PLAYLISTS_SYNC_INTERVAL_MINUTES = 30 +DEFAULT_SPOTIFY_PLAYLISTS_SYNC_INTERVAL_MINUTES = 15 +logger = logging.getLogger(__name__) OAUTH_SCOPES = ["https://www.googleapis.com/auth/youtube.readonly"] OAUTH_SESSION_TTL = timedelta(minutes=15) _OAUTH_SESSIONS = {} @@ -134,6 +162,17 @@ def _require_python_311(): WEBUI_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "webUI")) + +def _mb_service(): + return get_musicbrainz_service() + + +def _search_music_album_candidates(query: str, *, limit: int) -> list[dict]: + normalized_query = str(query or "").strip() + if not normalized_query: + return [] + return _mb_service().search_release_groups(normalized_query, limit=limit) + def _is_http_url(value: str | None) -> bool: if not value or not isinstance(value, str): return False @@ -156,6 +195,40 @@ def _sanitize_non_http_urls(obj): return [_sanitize_non_http_urls(v) for v in obj] return obj +def notify_run_summary(config, *, run_type: str, status, started_at, finished_at): + if run_type not in {"scheduled", "watcher"}: + return + + successes = int(getattr(status, "run_successes", 0) or 0) + failures = int(getattr(status, "run_failures", 0) or 0) + attempted = successes + failures + + if attempted <= 0: + return + + duration_label = "unknown" + if started_at and finished_at: + start_dt = _parse_iso(started_at) + finish_dt = _parse_iso(finished_at) + if start_dt is not None and finish_dt is not None: + duration_sec = int((finish_dt - start_dt).total_seconds()) + m, s = divmod(max(0, duration_sec), 60) + duration_label = f"{m}m {s}s" if m else f"{s}s" + + msg = ( + "Retreivr Run Summary\n" + f"Run type: {run_type}\n" + f"Attempted: {attempted}\n" + f"Succeeded: {successes}\n" + f"Failed: {failures}\n" + f"Duration: {duration_label}" + ) + + try: + telegram_notify(config, msg) + except Exception: + logging.exception("Telegram notify failed (run_type=%s)", run_type) + def normalize_search_payload(payload: dict | None, *, default_sources: list[str]) -> dict: if payload is None: @@ -462,6 +535,7 @@ class SearchRequestPayload(BaseModel): duration_hint_sec: int | None = None quality_min_bitrate_kbps: int | None = None lossless_only: bool = False + music_mode: bool = False auto_enqueue: bool = True source_priority: list[str] | str | None = None max_candidates_per_source: int = 5 @@ -484,6 +558,167 @@ class SpotifyPlaylistImportPayload(BaseModel): playlist_url: str +class IntentExecutePayload(BaseModel): + intent_type: IntentType + identifier: str + + +class _IntentQueueAdapter: + """Queue adapter that writes intent payloads into the unified download queue.""" + + def enqueue(self, payload: dict) -> None: + if not isinstance(payload, dict): + logging.warning("Intent enqueue skipped: payload is not a dict") + return + engine = getattr(app.state, "worker_engine", None) + store = getattr(engine, "store", None) if engine is not None else None + if store is None: + logging.warning("Intent enqueue skipped: worker engine store unavailable") + return + + media_intent = str(payload.get("media_intent") or "").strip() or "track" + origin = "spotify_playlist" if payload.get("playlist_id") else "intent" + origin_id = str(payload.get("playlist_id") or payload.get("spotify_track_id") or "manual") + destination = str(payload.get("destination") or "").strip() or None + final_format = str(payload.get("final_format") or "").strip() or None + + def _to_dict(value): + if isinstance(value, dict): + return dict(value) + if value is None: + return {} + out = {} + for key in ( + "title", + "artist", + "album", + "album_artist", + "track_num", + "disc_num", + "date", + "genre", + "isrc", + "mbid", + "lyrics", + ): + if hasattr(value, key): + out[key] = getattr(value, key) + return out + + def _enqueue_music_query_job(artist: str, track: str, album: str | None = None) -> None: + normalized_artist = str(artist or "").strip() + normalized_track = str(track or "").strip() + normalized_album = str(album or "").strip() or None + if not normalized_artist or not normalized_track: + logging.warning("Intent enqueue skipped: music query missing artist/track") + return + query = quote(f"{normalized_artist} {normalized_track}".strip()) + url = f"https://music.youtube.com/search?q={query}" + output_template = { + "audio_mode": True, + "artist": normalized_artist, + "track": normalized_track, + "album": normalized_album, + "track_number": payload.get("track_number"), + "disc_number": payload.get("disc_number"), + "release_date": payload.get("release_date"), + "duration_ms": payload.get("duration_ms"), + "artwork_url": payload.get("artwork_url"), + "canonical_metadata": { + "artist": normalized_artist, + "track": normalized_track, + "album": normalized_album, + "duration_ms": payload.get("duration_ms"), + }, + } + if destination: + output_template["output_dir"] = destination + if final_format: + output_template["final_format"] = final_format + canonical_id = ( + f"music_track:{normalized_artist.lower()}:{(normalized_album or '').lower()}:" + f"{str(payload.get('track_number') or '').strip()}:{normalized_track.lower()}" + ) + store.enqueue_job( + origin=origin, + origin_id=origin_id, + media_type="music", + media_intent="music_track", + source="youtube_music", + url=url, + input_url=url, + output_template=output_template, + resolved_destination=destination, + canonical_id=canonical_id, + ) + logging.info( + "Intent payload queued playlist_id=%s spotify_track_id=%s", + payload.get("playlist_id"), + payload.get("spotify_track_id"), + ) + + if media_intent == "music_track": + _enqueue_music_query_job( + str(payload.get("artist") or ""), + str(payload.get("track") or payload.get("title") or ""), + str(payload.get("album") or ""), + ) + return + + resolved_media = payload.get("resolved_media") if isinstance(payload.get("resolved_media"), dict) else {} + media_url = str(resolved_media.get("media_url") or payload.get("url") or "").strip() + if not media_url: + fallback_artist = str(payload.get("artist") or "").strip() + fallback_track = str(payload.get("track") or payload.get("title") or "").strip() + fallback_album = str(payload.get("album") or "").strip() or None + if fallback_artist and fallback_track: + _enqueue_music_query_job(fallback_artist, fallback_track, fallback_album) + return + logging.warning("Intent enqueue skipped: no media URL or searchable artist/title available") + return + source = str(resolved_media.get("source_id") or payload.get("source") or resolve_source(media_url)).strip() or "unknown" + music_metadata = _to_dict(payload.get("music_metadata")) + output_template = { + "audio_mode": True, + "canonical_metadata": music_metadata, + "artist": music_metadata.get("artist"), + "album": music_metadata.get("album"), + "track": music_metadata.get("title"), + "track_number": music_metadata.get("track_num"), + "disc_number": music_metadata.get("disc_num"), + "duration_ms": resolved_media.get("duration_ms"), + } + if destination: + output_template["output_dir"] = destination + if final_format: + output_template["final_format"] = final_format + external_ids = music_metadata.get("external_ids") if isinstance(music_metadata.get("external_ids"), dict) else {} + canonical_id = str( + music_metadata.get("isrc") + or music_metadata.get("mbid") + or external_ids.get("isrc") + or payload.get("spotify_track_id") + or "" + ).strip() or None + store.enqueue_job( + origin=origin, + origin_id=origin_id, + media_type="music", + media_intent=media_intent, + source=source, + url=media_url, + input_url=media_url, + output_template=output_template, + resolved_destination=destination, + canonical_id=canonical_id, + ) + logging.info( + "Intent payload queued playlist_id=%s spotify_track_id=%s", + payload.get("playlist_id"), + payload.get("spotify_track_id"), + ) + + class SafeJSONResponse(JSONResponse): def render(self, content): return json.dumps( @@ -568,6 +803,7 @@ async def startup(): app.state.schedule_config = schedule_config app.state.scheduler.start() _apply_schedule_config(schedule_config) + _apply_spotify_schedule(config or {}) if schedule_config.get("enabled") and schedule_config.get("run_on_startup"): asyncio.create_task(_handle_scheduled_run()) if schedule_config.get("enabled"): @@ -597,16 +833,18 @@ async def startup(): config or {}, paths=app.state.paths, url=diag_url, - final_format_override="webm", + final_format_override="mkv", ) app.state.spotify_playlist_importer = SpotifyPlaylistImporter() app.state.spotify_import_status = {} + app.state.music_cover_art_cache = {} app.state.worker_stop_event = threading.Event() app.state.worker_engine = DownloadWorkerEngine( app.state.paths.db_path, config or {}, app.state.paths, + search_service=app.state.search_service, ) def _worker_runner(): @@ -983,7 +1221,7 @@ def _run_immediate_download_to_client( if audio_mode: ext = final_format or "mp3" elif not ext: - ext = final_format or "webm" + ext = final_format or "mkv" template = audio_template if audio_mode else filename_template cleaned_name = build_output_filename(meta, video_id, ext, template, audio_mode) final_path = os.path.join(temp_dir, cleaned_name) @@ -1650,6 +1888,40 @@ def _read_config_or_404(): return safe_json(_strip_deprecated_fields(config)) +def _spotify_client_credentials(config: dict | None) -> tuple[str, str]: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + client_id = str(spotify_cfg.get("client_id") or cfg.get("SPOTIFY_CLIENT_ID") or "").strip() + client_secret = str(spotify_cfg.get("client_secret") or cfg.get("SPOTIFY_CLIENT_SECRET") or "").strip() + return client_id, client_secret + + +def _build_spotify_client_with_optional_oauth(config: dict | None) -> SpotifyPlaylistClient: + """Build a Spotify client using OAuth access token when valid, else public mode.""" + client_id, client_secret = _spotify_client_credentials(config) + if not client_id or not client_secret: + return SpotifyPlaylistClient() + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + existing = store.load() + try: + token = store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + except Exception as exc: + logging.warning("Spotify OAuth token validation failed; using public mode: %s", exc) + token = None + + if token is not None: + return SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ) + + if existing is not None: + logging.warning("Spotify OAuth token expired/invalid and was cleared; using public mode") + return SpotifyPlaylistClient(client_id=client_id, client_secret=client_secret) + + def _read_config_for_scheduler(): config_path = app.state.config_path if not os.path.exists(config_path): @@ -1739,7 +2011,7 @@ async def _runner(): effective_final_format_override = ( config.get("default_video_format") or config.get("final_format") - or "webm" + or "mkv" ) try: logging.info( @@ -1816,6 +2088,27 @@ async def _runner(): delivery_mode=delivery_mode or "server", ) await anyio.to_thread.run_sync(run_callable) + if ( + run_source == "api" + and not single_url + and not playlist_id + and bool((config.get("spotify") or {}).get("sync_user_playlists")) + ): + logging.info("Manual run triggering Spotify playlist sync (override downtime)") + try: + await _spotify_playlists_schedule_tick( + config=config, + db=PlaylistSnapshotStore(app.state.paths.db_path), + queue=_IntentQueueAdapter(), + spotify_client=_build_spotify_client_with_optional_oauth(config), + search_service=app.state.search_service, + ignore_downtime=True, + ) + logging.info("Manual-run Spotify playlist sync completed") + except Exception: + logging.exception("Manual-run Spotify playlist sync failed") + if run_source == "api" and not single_url and not playlist_id: + logging.info("Manual run completed (archive + Spotify sync)") # Ensure UI state finalization for direct URL runs if single_url: try: @@ -1862,6 +2155,14 @@ async def _runner(): logging.info("State reset to idle") elif app.state.state in {"running", "completed"}: app.state.state = "idle" + + notify_run_summary( + config, + run_type=run_source, + status=status, + started_at=app.state.started_at, + finished_at=app.state.finished_at, + ) app.state.run_task = asyncio.create_task(_runner()) @@ -1945,6 +2246,59 @@ def _schedule_tick(): asyncio.run_coroutine_threadsafe(_handle_scheduled_run(), loop) +def _liked_songs_schedule_tick(): + loop = app.state.loop + if not loop or loop.is_closed(): + return + asyncio.run_coroutine_threadsafe(_handle_liked_songs_scheduled_run(), loop) + + +def _saved_albums_schedule_tick(): + loop = app.state.loop + if not loop or loop.is_closed(): + return + asyncio.run_coroutine_threadsafe(_handle_saved_albums_scheduled_run(), loop) + + +def _user_playlists_schedule_tick(): + loop = app.state.loop + if not loop or loop.is_closed(): + return + asyncio.run_coroutine_threadsafe(_handle_user_playlists_scheduled_run(), loop) + + +def _spotify_playlists_schedule_tick( + config=None, + db=None, + queue=None, + spotify_client=None, + search_service=None, + ignore_downtime: bool = False, +): + if config is not None: + return spotify_playlists_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ignore_downtime=ignore_downtime, + ) + + loop = app.state.loop + if not loop or loop.is_closed(): + return + config = _read_config_for_scheduler() + downtime_active = False + if config: + downtime_active, _ = _check_downtime(config) + if downtime_active: + logging.info("Interval Spotify sync tick skipped due to downtime") + return + logging.info("Interval Spotify sync tick starting") + asyncio.run_coroutine_threadsafe(_handle_spotify_playlists_scheduled_run(), loop) + + async def _handle_scheduled_run(): if app.state.running: logging.info("Scheduled run skipped; run already active") @@ -1965,6 +2319,275 @@ async def _handle_scheduled_run(): _set_schedule_state(next_run=_get_next_run_iso()) +def _resolve_liked_songs_interval_minutes(config: dict | None) -> int: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_value = spotify_cfg.get("liked_songs_sync_interval_minutes") + if raw_value is None: + raw_value = cfg.get("liked_songs_sync_interval_minutes") + try: + interval = int(raw_value) + except (TypeError, ValueError): + interval = DEFAULT_LIKED_SONGS_SYNC_INTERVAL_MINUTES + return max(1, interval) + + +def _resolve_saved_albums_interval_minutes(config: dict | None) -> int: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_value = spotify_cfg.get("saved_albums_sync_interval_minutes") + if raw_value is None: + raw_value = cfg.get("saved_albums_sync_interval_minutes") + try: + interval = int(raw_value) + except (TypeError, ValueError): + interval = DEFAULT_SAVED_ALBUMS_SYNC_INTERVAL_MINUTES + return max(1, interval) + + +def _resolve_user_playlists_interval_minutes(config: dict | None) -> int: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_value = spotify_cfg.get("user_playlists_sync_interval_minutes") + if raw_value is None: + raw_value = cfg.get("user_playlists_sync_interval_minutes") + try: + interval = int(raw_value) + except (TypeError, ValueError): + interval = DEFAULT_USER_PLAYLISTS_SYNC_INTERVAL_MINUTES + return max(1, interval) + + +def _resolve_spotify_playlists_interval_minutes(config: dict | None) -> int: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_value = spotify_cfg.get("watch_playlists_interval_minutes") + if raw_value is None: + raw_value = cfg.get("watch_playlists_interval_minutes") + try: + interval = int(raw_value) + except (TypeError, ValueError): + interval = DEFAULT_SPOTIFY_PLAYLISTS_SYNC_INTERVAL_MINUTES + return max(1, interval) + + +def _normalized_watch_playlists(config: dict | None) -> list[str]: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_values = spotify_cfg.get("watch_playlists") + if raw_values is None: + raw_values = cfg.get("watch_playlists", []) if isinstance(cfg, dict) else [] + if not isinstance(raw_values, list): + return [] + playlist_ids: list[str] = [] + seen: set[str] = set() + for raw_value in raw_values: + playlist_id = normalize_spotify_playlist_identifier(str(raw_value or "")) + if not playlist_id or not re.match(r"^[A-Za-z0-9]+$", playlist_id): + logging.warning("Skipping invalid Spotify playlist identifier: %s", raw_value) + continue + if playlist_id in seen: + continue + seen.add(playlist_id) + playlist_ids.append(playlist_id) + return playlist_ids + + +def _has_connected_spotify_oauth_token(db_path: str) -> bool: + try: + return SpotifyOAuthStore(Path(db_path)).load() is not None + except Exception: + return False + + +async def _handle_liked_songs_scheduled_run() -> None: + config = _read_config_for_scheduler() + if not config: + return + + client_id, client_secret = _spotify_client_credentials(config) + if not client_id or not client_secret: + return + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + token = store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + return + + try: + await spotify_liked_songs_watch_job( + config=config, + db=PlaylistSnapshotStore(app.state.paths.db_path), + queue=_IntentQueueAdapter(), + spotify_client=SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ), + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Scheduled Spotify Liked Songs sync failed") + + +async def _handle_saved_albums_scheduled_run() -> None: + config = _read_config_for_scheduler() + if not config: + return + + client_id, client_secret = _spotify_client_credentials(config) + if not client_id or not client_secret: + return + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + token = store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + return + + try: + await spotify_saved_albums_watch_job( + config=config, + db=PlaylistSnapshotStore(app.state.paths.db_path), + queue=_IntentQueueAdapter(), + spotify_client=SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ), + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Scheduled Spotify Saved Albums sync failed") + + +async def _handle_user_playlists_scheduled_run() -> None: + config = _read_config_for_scheduler() + if not config: + return + + client_id, client_secret = _spotify_client_credentials(config) + if not client_id or not client_secret: + return + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + token = store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + return + + try: + await spotify_user_playlists_watch_job( + config=config, + db=PlaylistSnapshotStore(app.state.paths.db_path), + queue=_IntentQueueAdapter(), + spotify_client=SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ), + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Scheduled Spotify User Playlists sync failed") + + +async def _handle_spotify_playlists_scheduled_run() -> None: + config = _read_config_for_scheduler() + if not config: + return + + spotify_client = _build_spotify_client_with_optional_oauth(config) + snapshot_store = PlaylistSnapshotStore(app.state.paths.db_path) + queue = _IntentQueueAdapter() + try: + await spotify_playlists_watch_job( + config=config, + db=snapshot_store, + queue=queue, + spotify_client=spotify_client, + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Scheduled Spotify playlists sync failed") + + +def _apply_spotify_schedule(config: dict): + logger.info("Applying Spotify scheduler configuration") + scheduler = app.state.scheduler + if not scheduler: + return + + # Remove existing Spotify jobs + for job_id in [ + "spotify_liked_songs_watch", + "spotify_saved_albums_watch", + "spotify_user_playlists_watch", + "spotify_playlists_watch", + ]: + try: + scheduler.remove_job(job_id) + logger.info(f"Removed job {job_id}") + except Exception: + pass + + spotify_cfg = config.get("spotify", {}) + + # Liked Songs + if spotify_cfg.get("sync_liked_songs"): + interval = int(spotify_cfg.get("liked_songs_sync_interval_minutes", 15)) + scheduler.add_job( + _liked_songs_schedule_tick, + "interval", + minutes=interval, + id="spotify_liked_songs_watch", + replace_existing=True, + ) + logger.info(f"Spotify liked songs sync enabled (interval={interval} min)") + else: + logger.info("Spotify liked songs sync disabled by config") + + # Saved Albums + if spotify_cfg.get("sync_saved_albums"): + interval = int(spotify_cfg.get("saved_albums_sync_interval_minutes", 30)) + scheduler.add_job( + _saved_albums_schedule_tick, + "interval", + minutes=interval, + id="spotify_saved_albums_watch", + replace_existing=True, + ) + logger.info(f"Spotify saved albums sync enabled (interval={interval} min)") + else: + logger.info("Spotify saved albums sync disabled by config") + + # User Playlists (OAuth-based) + if spotify_cfg.get("sync_user_playlists"): + interval = int(spotify_cfg.get("user_playlists_sync_interval_minutes", 30)) + scheduler.add_job( + _user_playlists_schedule_tick, + "interval", + minutes=interval, + id="spotify_user_playlists_watch", + replace_existing=True, + ) + logger.info(f"Spotify user playlists sync enabled (interval={interval} min)") + else: + logger.info("Spotify user playlists sync disabled by config") + + # Manual playlist polling (watch_playlists) + if spotify_cfg.get("sync_user_playlists") and spotify_cfg.get("watch_playlists"): + interval = int(spotify_cfg.get("user_playlists_sync_interval_minutes", 30)) + scheduler.add_job( + _spotify_playlists_schedule_tick, + "interval", + minutes=interval, + id="spotify_playlists_watch", + replace_existing=True, + ) + logger.info("Spotify manual playlist watch enabled") + else: + logger.info("Spotify manual playlist watch disabled") + + def _apply_schedule_config(schedule): scheduler = app.state.scheduler if not scheduler: @@ -3001,6 +3624,7 @@ async def api_update_schedule(payload: ScheduleRequest): app.state.schedule_config = current _apply_schedule_config(current) + _apply_spotify_schedule(config or {}) return _schedule_response() @@ -3242,17 +3866,39 @@ async def create_search_request(request: dict = Body(...)): normalized = normalize_search_payload(raw_payload, default_sources=enabled_sources) except ValueError as exc: raise HTTPException(status_code=400, detail=str(exc)) from exc + music_candidates = [] + music_resolution = None + if bool(normalized.get("music_mode")): + music_candidates = _mb_service().search_release_groups(str(normalized.get("query") or ""), limit=5) + logger.info(f"[MUSIC] mode=ON candidates={len(music_candidates)} resolution=null") + logging.debug( + "Home search: music_mode=%s query=%s", + bool(normalized.get("music_mode")), + str(normalized.get("query") or ""), + ) if normalized["delivery_mode"] == "client" and normalized["destination_path"]: raise HTTPException(status_code=400, detail="Client delivery does not use a server destination") if normalized["delivery_mode"] == "client" and not normalized["search_only"]: raise HTTPException(status_code=400, detail="Search & Download is not available for client delivery") + intent = detect_intent(str(normalized.get("query") or "")) + if intent.type != IntentType.SEARCH: + return { + "detected_intent": intent.type.value, + "identifier": intent.identifier, + "music_mode": bool(normalized["music_mode"]), + "music_candidates": music_candidates, + "music_resolution": music_resolution, + } + if "source_priority" not in raw_payload or not raw_payload.get("source_priority"): raw_payload["source_priority"] = normalized["sources"] if "auto_enqueue" not in raw_payload: raw_payload["auto_enqueue"] = not normalized["search_only"] if "media_type" not in raw_payload: raw_payload["media_type"] = "music" if normalized["music_mode"] else "generic" + if "music_mode" not in raw_payload: + raw_payload["music_mode"] = normalized["music_mode"] if "destination_dir" not in raw_payload and normalized["destination"] is not None: raw_payload["destination_dir"] = normalized["destination"] @@ -3270,6 +3916,7 @@ async def create_search_request(request: dict = Body(...)): "duration_hint_sec", "quality_min_bitrate_kbps", "lossless_only", + "music_mode", "auto_enqueue", "source_priority", "max_candidates_per_source", @@ -3293,7 +3940,96 @@ async def create_search_request(request: dict = Body(...)): "destination_path": normalized["destination_path"], } logging.debug("Normalized search payload", extra={"payload": normalized, "request_id": request_id}) - return {"request_id": request_id} + return { + "request_id": request_id, + "music_mode": bool(normalized["music_mode"]), + "music_candidates": music_candidates, + "music_resolution": music_resolution, + } + + +@app.post("/api/intent/execute") +async def execute_intent(payload: dict = Body(...)): + """Execute intent requests by routing to the active ingestion pipeline.""" + intent_raw = str((payload or {}).get("intent_type") or "").strip() + identifier = str((payload or {}).get("identifier") or "").strip() + if not intent_raw: + raise HTTPException(status_code=400, detail="intent_type is required") + if not identifier: + raise HTTPException(status_code=400, detail="identifier is required") + try: + intent_type = IntentType(intent_raw) + except ValueError as exc: + raise HTTPException(status_code=400, detail="invalid intent_type") from exc + config = _read_config_or_404() + dispatcher_config = dict(config) + dispatcher_config["search_service"] = app.state.search_service + db = PlaylistSnapshotStore(app.state.paths.db_path) + queue = _IntentQueueAdapter() + spotify_client = _build_spotify_client_with_optional_oauth(config) + return await dispatch_intent( + intent_type=intent_type.value, + identifier=identifier, + config=dispatcher_config, + db=db, + queue=queue, + spotify_client=spotify_client, + ) + + +@app.post("/api/intent/preview") +async def preview_intent(payload: dict = Body(...)): + """Fetch metadata preview for supported intents (plumbing only).""" + intent_raw = str((payload or {}).get("intent_type") or "").strip() + identifier = str((payload or {}).get("identifier") or "").strip() + if not intent_raw: + raise HTTPException(status_code=400, detail="intent_type is required") + if not identifier: + raise HTTPException(status_code=400, detail="identifier is required") + try: + intent_type = IntentType(intent_raw) + except ValueError as exc: + raise HTTPException(status_code=400, detail="invalid intent_type") from exc + + if intent_type not in {IntentType.SPOTIFY_ALBUM, IntentType.SPOTIFY_PLAYLIST}: + raise HTTPException(status_code=400, detail="intent preview not supported for this intent_type") + + config = _read_config_or_404() + client = _build_spotify_client_with_optional_oauth(config) + encoded = quote(identifier, safe="") + try: + if intent_type == IntentType.SPOTIFY_ALBUM: + data = client._request_json( + f"https://api.spotify.com/v1/albums/{encoded}", + params={"fields": "name,artists(name),total_tracks"}, + ) + artists = data.get("artists") or [] + artist = artists[0].get("name") if artists and isinstance(artists[0], dict) else "" + track_count = int(data.get("total_tracks") or 0) + return { + "intent_type": intent_type.value, + "identifier": identifier, + "title": str(data.get("name") or ""), + "artist": str(artist or ""), + "track_count": track_count, + } + + data = client._request_json( + f"https://api.spotify.com/v1/playlists/{encoded}", + params={"fields": "name,owner(display_name),tracks(total)"}, + ) + owner = (data.get("owner") or {}).get("display_name") + track_count = int(((data.get("tracks") or {}).get("total")) or 0) + return { + "intent_type": intent_type.value, + "identifier": identifier, + "title": str(data.get("name") or ""), + "artist": str(owner or ""), + "track_count": track_count, + } + except Exception as exc: + logging.exception("Intent preview failed for intent=%s identifier=%s", intent_type.value, identifier) + raise HTTPException(status_code=502, detail=f"intent preview failed: {exc}") from exc @app.get("/api/search/requests") @@ -3330,6 +4066,134 @@ async def run_search_resolution_once(): return {"request_id": request_id} +@app.post("/api/music/album/download") +def download_full_album(data: dict): + release_group_id = str((data or {}).get("release_group_id") or "").strip() or None + album_id = str((data or {}).get("album_id") or "").strip() or None + release_id = str((data or {}).get("release_id") or album_id or "").strip() or None + if not release_group_id and not album_id: + return {"error": "release_group_id or album_id required"} + logger.info(f"[MUSIC] explicit album download request release_group={release_group_id}") + + selected_reason = "explicit_release_id" + if release_group_id: + prefer_country = None + try: + cfg = _read_config_or_404() + configured_country = str((cfg or {}).get("locale_country") or (cfg or {}).get("country") or "").strip().upper() + prefer_country = configured_country or None + except Exception: + prefer_country = None + selection = _mb_service().pick_best_release_with_reason(release_group_id, prefer_country=prefer_country) + release_id = str(selection.get("release_id") or "").strip() or None + selected_reason = str(selection.get("reason") or "release_group_selection") + if not release_id: + return {"error": "unable to select release from release_group"} + logger.info(f"[MUSIC] selected_release={release_id} from release_group={release_group_id} reason={selected_reason}") + + tracks = _mb_service().fetch_release_tracks(release_id or "") + if not tracks: + return {"error": "unable to fetch tracks"} + logger.info(f"[MUSIC] Album {release_group_id or release_id} fetched {len(tracks)} tracks") + if len(tracks) == 0: + raise HTTPException( + status_code=404, + detail="Album resolved but no tracks returned from MusicBrainz" + ) + + queue = _IntentQueueAdapter() + enqueued = 0 + + for track in tracks: + artist = track.get("artist") + title = track.get("title") + track_number_raw = track.get("track_number") + disc_number_raw = track.get("disc_number") + try: + track_number = int(track_number_raw) if track_number_raw is not None else None + except (TypeError, ValueError): + track_number = None + try: + disc_number = int(disc_number_raw) if disc_number_raw is not None else None + except (TypeError, ValueError): + disc_number = None + logger.debug(f"[MUSIC] enqueue track {artist} - {title}") + payload = { + "media_intent": "music_track", + "artist": artist, + "album": track.get("album"), + "track": title, + "track_number": track_number, + "disc_number": disc_number, + "release_date": track.get("release_date"), + "mb_release_id": release_id, + "mb_release_group_id": release_group_id, + "release_id": release_id, + "release_group_id": release_group_id, + "duration_ms": track.get("duration_ms"), + "artwork_url": track.get("artwork_url"), + } + queue.enqueue(payload) + enqueued += 1 + logger.info(f"[MUSIC] album enqueue complete count={len(tracks)}") + + return { + "status": "ok", + "tracks_enqueued": enqueued + } + + +@app.post("/api/music/album/candidates") +def music_album_candidates(payload: dict): + query = str((payload or {}).get("query") or "").strip() + raw_candidates = _search_music_album_candidates(query, limit=10) + candidates = [ + { + "album_id": item.get("release_group_id"), + "title": item.get("title"), + "artist": item.get("artist_credit"), + "first_released": item.get("first_release_date"), + "track_count": item.get("track_count"), + "score": item.get("score"), + } + for item in raw_candidates + ] + return { + "status": "ok", + "album_candidates": candidates or [], + } + + +@app.get("/api/music/albums/search") +def music_albums_search(q: str = Query("", alias="q"), limit: int = Query(10, ge=1, le=50)): + return _search_music_album_candidates(str(q or ""), limit=int(limit)) + + +@app.get("/api/music/album/art/{album_id}") +def music_album_art(album_id: str): + album_id = str(album_id or "").strip() + if not album_id: + raise HTTPException(status_code=400, detail="album_id is required") + + cache = getattr(app.state, "music_cover_art_cache", None) + now = time.time() + if isinstance(cache, dict): + cached = cache.get(album_id) + if isinstance(cached, dict): + cached_at = float(cached.get("cached_at") or 0) + if now - cached_at < COVER_ART_CACHE_TTL_SECONDS: + return {"status": "ok", "cover_url": cached.get("cover_url")} + + cover_url = _mb_service().fetch_release_group_cover_art_url(album_id, timeout=8) + + if isinstance(cache, dict): + cache[album_id] = { + "cover_url": cover_url, + "cached_at": now, + } + return {"status": "ok", "cover_url": cover_url} + + @app.post("/api/spotify/playlists/import") async def import_spotify_playlist(payload: SpotifyPlaylistImportPayload): playlist_url = (payload.playlist_url or "").strip() @@ -3397,6 +4261,184 @@ async def spotify_playlist_status(): return {"statuses": app.state.spotify_import_status} +@app.get("/api/spotify/oauth/connect") +async def spotify_oauth_connect(): + """Build Spotify OAuth connect URL and store anti-CSRF state in memory.""" + config = _read_config_or_404() + spotify_cfg = (config.get("spotify") or {}) if isinstance(config, dict) else {} + client_id = ( + str(spotify_cfg.get("client_id") or config.get("SPOTIFY_CLIENT_ID") or "").strip() + if isinstance(config, dict) + else "" + ) + redirect_uri = ( + str(spotify_cfg.get("redirect_uri") or config.get("SPOTIFY_REDIRECT_URI") or "").strip() + if isinstance(config, dict) + else "" + ) + if not client_id: + raise HTTPException(status_code=400, detail="SPOTIFY_CLIENT_ID is required in config") + if not redirect_uri: + raise HTTPException(status_code=400, detail="SPOTIFY_REDIRECT_URI is required in config") + + app.state.spotify_oauth_state = None + state = str(uuid4()) + app.state.spotify_oauth_state = state + scope = "user-library-read playlist-read-private playlist-read-collaborative" + auth_url = build_auth_url( + client_id=client_id, + redirect_uri=redirect_uri, + scope=scope, + state=state, + ) + return {"auth_url": auth_url} + + +@app.get("/api/spotify/oauth/status") +async def spotify_oauth_status(): + """Return Spotify OAuth connection status without exposing sensitive tokens.""" + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + token = store.load() + if token is None: + return {"connected": False} + + scopes = [part for part in str(token.scope or "").split() if part] + payload: dict[str, object] = {"connected": True} + if scopes: + payload["scopes"] = scopes + payload["expires_at"] = int(token.expires_at) + return payload + + +@app.get("/api/spotify/oauth/callback") +async def spotify_oauth_callback(code: str | None = None, state: str | None = None, error: str | None = None): + """Handle Spotify OAuth callback and persist tokens.""" + if error: + raise HTTPException(status_code=400, detail=f"spotify_oauth_error: {error}") + if not code: + raise HTTPException(status_code=400, detail="missing code") + if not state: + raise HTTPException(status_code=400, detail="missing state") + + expected_state = str(getattr(app.state, "spotify_oauth_state", "") or "") + if not expected_state or state != expected_state: + raise HTTPException(status_code=400, detail="invalid oauth state") + + config = _read_config_or_404() + spotify_cfg = (config.get("spotify") or {}) if isinstance(config, dict) else {} + client_id = ( + str(spotify_cfg.get("client_id") or config.get("SPOTIFY_CLIENT_ID") or "").strip() + if isinstance(config, dict) + else "" + ) + client_secret = ( + str(spotify_cfg.get("client_secret") or config.get("SPOTIFY_CLIENT_SECRET") or "").strip() + if isinstance(config, dict) + else "" + ) + redirect_uri = ( + str(spotify_cfg.get("redirect_uri") or config.get("SPOTIFY_REDIRECT_URI") or "").strip() + if isinstance(config, dict) + else "" + ) + if not client_id: + raise HTTPException(status_code=400, detail="SPOTIFY_CLIENT_ID is required in config") + if not client_secret: + raise HTTPException(status_code=400, detail="SPOTIFY_CLIENT_SECRET is required in config") + if not redirect_uri: + raise HTTPException(status_code=400, detail="SPOTIFY_REDIRECT_URI is required in config") + + try: + token_response = requests.post( + SPOTIFY_TOKEN_URL, + data={ + "grant_type": "authorization_code", + "code": code, + "redirect_uri": redirect_uri, + "client_id": client_id, + "client_secret": client_secret, + }, + timeout=20, + ) + except Exception as exc: + raise HTTPException(status_code=400, detail=f"token exchange failed: {exc}") from exc + + if token_response.status_code != 200: + detail = (token_response.text or "").strip() or f"status={token_response.status_code}" + raise HTTPException(status_code=400, detail=f"token exchange failed: {detail}") + + payload = token_response.json() + access_token = str(payload.get("access_token") or "").strip() + refresh_token = str(payload.get("refresh_token") or "").strip() + expires_in = payload.get("expires_in") + scope = str(payload.get("scope") or "").strip() + if not access_token: + raise HTTPException(status_code=400, detail="token exchange failed: missing access_token") + if not refresh_token: + raise HTTPException(status_code=400, detail="token exchange failed: missing refresh_token") + if expires_in is None: + raise HTTPException(status_code=400, detail="token exchange failed: missing expires_in") + if not scope: + raise HTTPException(status_code=400, detail="token exchange failed: missing scope") + + try: + expires_at = int(time.time()) + int(expires_in) + except (TypeError, ValueError) as exc: + raise HTTPException(status_code=400, detail="token exchange failed: invalid expires_in") from exc + + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + store.save( + SpotifyOAuthToken( + access_token=access_token, + refresh_token=refresh_token, + expires_at=expires_at, + scope=scope, + ) + ) + try: + sync_db = PlaylistSnapshotStore(app.state.paths.db_path) + sync_queue = _IntentQueueAdapter() + sync_client = SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=access_token, + ) + await spotify_liked_songs_watch_job( + config=config, + db=sync_db, + queue=sync_queue, + spotify_client=sync_client, + search_service=app.state.search_service, + ) + await spotify_saved_albums_watch_job( + config=config, + db=sync_db, + queue=sync_queue, + spotify_client=sync_client, + search_service=app.state.search_service, + ) + await spotify_user_playlists_watch_job( + config=config, + db=sync_db, + queue=sync_queue, + spotify_client=sync_client, + search_service=app.state.search_service, + ) + except Exception: + logging.exception("Post-OAuth immediate Spotify sync failed") + _apply_spotify_schedule(config or {}) + app.state.spotify_oauth_state = None + return RedirectResponse(url="/#config?spotify=connected", status_code=302) + + +@app.post("/api/spotify/oauth/disconnect") +async def spotify_oauth_disconnect(): + """Clear stored Spotify OAuth token state.""" + store = SpotifyOAuthStore(Path(app.state.paths.db_path)) + store.clear() + return {"status": "disconnected"} + + @app.get("/api/search/items/{item_id}/candidates") async def get_search_candidates(item_id: str): service = app.state.search_service @@ -3756,6 +4798,20 @@ async def api_get_config(): async def api_put_config(payload: dict = Body(...)): payload = _strip_deprecated_fields(payload) errors = validate_config(payload) + # Saving config should not require Spotify OAuth client credentials. + # Those are validated only when the Spotify connect flow is invoked. + errors = [ + err for err in errors + if not any( + marker in str(err) + for marker in ( + "SPOTIFY_CLIENT_ID", + "SPOTIFY_CLIENT_SECRET", + "spotify.client_id", + "spotify.client_secret", + ) + ) + ] if errors: raise HTTPException(status_code=400, detail={"errors": errors}) @@ -3785,6 +4841,7 @@ async def api_put_config(payload: dict = Body(...)): schedule = _merge_schedule_config(payload.get("schedule")) app.state.schedule_config = schedule _apply_schedule_config(schedule) + _apply_spotify_schedule(payload or {}) policy = normalize_watch_policy(payload) if getattr(normalize_watch_policy, "valid", True): app.state.watch_policy = policy diff --git a/app/musicbrainz/__init__.py b/app/musicbrainz/__init__.py new file mode 100644 index 0000000..ce7d285 --- /dev/null +++ b/app/musicbrainz/__init__.py @@ -0,0 +1,25 @@ +from metadata.services.musicbrainz_service import MUSICBRAINZ_USER_AGENT, get_musicbrainz_service + + +def search_release_groups(query: str, limit: int = 10): + return get_musicbrainz_service().search_release_groups(query, limit=limit) + + +def pick_best_release_with_reason(release_group_id: str, *, prefer_country: str | None = None): + return get_musicbrainz_service().pick_best_release_with_reason(release_group_id, prefer_country=prefer_country) + + +def pick_best_release(release_group_id: str): + return get_musicbrainz_service().pick_best_release(release_group_id) + + +def fetch_release_tracks(release_id: str): + return get_musicbrainz_service().fetch_release_tracks(release_id) + +__all__ = [ + "MUSICBRAINZ_USER_AGENT", + "search_release_groups", + "pick_best_release", + "pick_best_release_with_reason", + "fetch_release_tracks", +] diff --git a/app/musicbrainz/cache.py b/app/musicbrainz/cache.py new file mode 100644 index 0000000..4bd8f56 --- /dev/null +++ b/app/musicbrainz/cache.py @@ -0,0 +1,60 @@ +import json +import os +import threading +import time +from pathlib import Path +from typing import Any + + +class MusicBrainzCache: + def __init__(self, cache_path: str | None = None) -> None: + path = cache_path or os.getenv("MUSICBRAINZ_CACHE_PATH") or ".cache/musicbrainz_cache.json" + self._path = Path(path) + self._lock = threading.Lock() + self._data: dict[str, dict[str, Any]] = {} + self._loaded = False + + def _load_locked(self) -> None: + if self._loaded: + return + self._loaded = True + try: + if self._path.exists(): + payload = json.loads(self._path.read_text(encoding="utf-8")) + if isinstance(payload, dict): + self._data = payload + except Exception: + self._data = {} + + def _persist_locked(self) -> None: + self._path.parent.mkdir(parents=True, exist_ok=True) + tmp_path = self._path.with_suffix(f"{self._path.suffix}.tmp") + tmp_path.write_text(json.dumps(self._data, ensure_ascii=True, separators=(",", ":")), encoding="utf-8") + tmp_path.replace(self._path) + + def get(self, key: str) -> Any: + now = time.time() + with self._lock: + self._load_locked() + row = self._data.get(key) + if not isinstance(row, dict): + return None + expires_at = float(row.get("expires_at") or 0.0) + if expires_at <= now: + self._data.pop(key, None) + try: + self._persist_locked() + except Exception: + pass + return None + return row.get("value") + + def set(self, key: str, value: Any, ttl_seconds: int) -> None: + now = time.time() + with self._lock: + self._load_locked() + self._data[key] = { + "expires_at": now + max(1, int(ttl_seconds)), + "value": value, + } + self._persist_locked() diff --git a/config/config_sample.json b/config/config_sample.json index e1cdb56..455ebf0 100644 --- a/config/config_sample.json +++ b/config/config_sample.json @@ -46,6 +46,13 @@ ], "yt_dlp_cookies": "", "yt_dlp_opts": {}, + "youtube": { + "cookies": { + "enabled": false, + "path": "tokens/youtube_cookies.txt", + "fallback_only": true + } + }, "filename_template": "%(title)s - %(uploader)s - %(upload_date)s.%(ext)s", "music_filename_template": "%(artist)s/%(album)s/%(track_number)s - %(track)s.%(ext)s", "music_metadata": { diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000..9e06cd2 --- /dev/null +++ b/config/settings.py @@ -0,0 +1,9 @@ +"""Application settings constants.""" + +from __future__ import annotations + +# Toggle for enabling or disabling media-duration checks. +ENABLE_DURATION_VALIDATION = True + +# Allowed absolute difference between expected and actual media duration. +SPOTIFY_DURATION_TOLERANCE_SECONDS = 5.0 diff --git a/db/__init__.py b/db/__init__.py new file mode 100644 index 0000000..2a2223a --- /dev/null +++ b/db/__init__.py @@ -0,0 +1,5 @@ +"""Database helpers for Retreivr.""" + +from db.playlist_snapshots import PlaylistSnapshotStore, SnapshotWriteResult + +__all__ = ["PlaylistSnapshotStore", "SnapshotWriteResult"] diff --git a/db/downloaded_tracks.py b/db/downloaded_tracks.py new file mode 100644 index 0000000..a573390 --- /dev/null +++ b/db/downloaded_tracks.py @@ -0,0 +1,74 @@ +"""Persistence helpers for downloaded Spotify tracks by ISRC.""" + +from __future__ import annotations + +import os +import sqlite3 + +from db.migrations import ensure_downloaded_music_tracks_table + +_DEFAULT_DB_ENV_KEY = "RETREIVR_DB_PATH" + + +def _resolve_db_path() -> str: + return os.environ.get(_DEFAULT_DB_ENV_KEY, os.path.join(os.getcwd(), "retreivr.sqlite3")) + + +def _connect(db_path: str | None = None) -> sqlite3.Connection: + conn = sqlite3.connect(db_path or _resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + ensure_downloaded_music_tracks_table(conn) + return conn + + +def has_downloaded_isrc(playlist_id: str, isrc: str) -> bool: + """Return True when an ISRC is already recorded for a playlist.""" + pid = (playlist_id or "").strip() + track_isrc = (isrc or "").strip() + if not pid or not track_isrc: + return False + + conn = _connect() + try: + cur = conn.cursor() + cur.execute( + """ + SELECT 1 + FROM downloaded_music_tracks + WHERE playlist_id=? AND isrc=? + LIMIT 1 + """, + (pid, track_isrc), + ) + return cur.fetchone() is not None + finally: + conn.close() + + +def record_downloaded_track(playlist_id: str, isrc: str, file_path: str) -> None: + """Insert a downloaded track record for playlist/idempotency tracking.""" + pid = (playlist_id or "").strip() + track_isrc = (isrc or "").strip() + path = (file_path or "").strip() + if not pid: + raise ValueError("playlist_id is required") + if not track_isrc: + raise ValueError("isrc is required") + if not path: + raise ValueError("file_path is required") + + conn = _connect() + try: + cur = conn.cursor() + cur.execute( + """ + INSERT OR IGNORE INTO downloaded_music_tracks (playlist_id, isrc, file_path) + VALUES (?, ?, ?) + """, + (pid, track_isrc, path), + ) + conn.commit() + finally: + conn.close() + diff --git a/db/migrations.py b/db/migrations.py new file mode 100644 index 0000000..a6ed48f --- /dev/null +++ b/db/migrations.py @@ -0,0 +1,75 @@ +"""SQLite migrations for Spotify playlist snapshot persistence.""" + +from __future__ import annotations + +import sqlite3 + + +def ensure_playlist_snapshot_tables(conn: sqlite3.Connection) -> None: + """Create snapshot tables and indexes when they do not already exist.""" + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE IF NOT EXISTS playlist_snapshots ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + playlist_id TEXT NOT NULL, + snapshot_id TEXT NOT NULL, + timestamp TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + cur.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS uq_playlist_snapshots_playlist_snapshot " + "ON playlist_snapshots (playlist_id, snapshot_id)" + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS idx_playlist_snapshots_snapshot_lookup " + "ON playlist_snapshots (playlist_id, snapshot_id)" + ) + + cur.execute( + """ + CREATE TABLE IF NOT EXISTS playlist_snapshot_items ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + snapshot_id INTEGER NOT NULL, + spotify_track_id TEXT NOT NULL, + position INTEGER NOT NULL, + added_at TEXT, + FOREIGN KEY (snapshot_id) REFERENCES playlist_snapshots(id) ON DELETE CASCADE + ) + """ + ) + cur.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS uq_playlist_snapshot_items_unique_position " + "ON playlist_snapshot_items (snapshot_id, spotify_track_id, position)" + ) + conn.commit() + + +def ensure_downloaded_music_tracks_table(conn: sqlite3.Connection) -> None: + """Create downloaded Spotify tracks table and idempotency index.""" + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE IF NOT EXISTS downloaded_music_tracks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + playlist_id TEXT NOT NULL, + isrc TEXT NOT NULL, + file_path TEXT NOT NULL, + downloaded_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + """ + ) + cur.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS uq_downloaded_music_tracks_playlist_isrc " + "ON downloaded_music_tracks (playlist_id, isrc)" + ) + conn.commit() + + +def rollback_downloaded_music_tracks_table(conn: sqlite3.Connection) -> None: + """Rollback downloaded Spotify tracks table migration.""" + cur = conn.cursor() + cur.execute("DROP INDEX IF EXISTS uq_downloaded_music_tracks_playlist_isrc") + cur.execute("DROP TABLE IF EXISTS downloaded_music_tracks") + conn.commit() diff --git a/db/playlist_snapshots.py b/db/playlist_snapshots.py new file mode 100644 index 0000000..a38f86a --- /dev/null +++ b/db/playlist_snapshots.py @@ -0,0 +1,322 @@ +"""Persistence helpers for Spotify playlist snapshots.""" + +from __future__ import annotations + +import os +import sqlite3 +import hashlib +from dataclasses import dataclass +from typing import Any + +from db.migrations import ensure_playlist_snapshot_tables + +_DEFAULT_DB_ENV_KEY = "RETREIVR_DB_PATH" + + +def _resolve_db_path() -> str: + return os.environ.get(_DEFAULT_DB_ENV_KEY, os.path.join(os.getcwd(), "retreivr.sqlite3")) + + +def _connect(db_path: str | None = None) -> sqlite3.Connection: + conn = sqlite3.connect(db_path or _resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + ensure_playlist_snapshot_tables(conn) + return conn + + +def _normalize_snapshot_rows(items: list[dict[str, Any]]) -> list[tuple[str, int, Any]]: + rows: list[tuple[str, int, int, Any]] = [] + for idx, item in enumerate(items or []): + if not isinstance(item, dict): + continue + track_id = str(item.get("spotify_track_id") or "").strip() + if not track_id: + continue + try: + position = int(item.get("position", idx)) + except Exception: + position = int(idx) + rows.append((track_id, position, idx, item.get("added_at"))) + rows.sort(key=lambda row: (row[1], row[2], row[0])) + return [(track_id, position, added_at) for track_id, position, _idx, added_at in rows] + + +def _snapshot_hash_from_rows(rows: list[tuple[str, int, Any]]) -> str: + payload = "\n".join( + f"{idx}|{track_id}|{position}|{added_at or ''}" + for idx, (track_id, position, added_at) in enumerate(rows) + ).encode("utf-8") + return hashlib.sha256(payload).hexdigest() + + +@dataclass(frozen=True) +class SnapshotWriteResult: + """Result payload for class-based snapshot writes.""" + + inserted: bool + snapshot_db_id: int | None + reason: str | None = None + + +def get_latest_snapshot(playlist_id: str) -> tuple[str | None, list[dict[str, Any]]]: + """Return latest `(snapshot_id, items)` for a playlist, or `(None, [])` when missing.""" + pid = (playlist_id or "").strip() + if not pid: + return None, [] + + conn = _connect() + try: + cur = conn.cursor() + cur.execute( + """ + SELECT id, snapshot_id + FROM playlist_snapshots + WHERE playlist_id=? + ORDER BY id DESC + LIMIT 1 + """, + (pid,), + ) + row = cur.fetchone() + if not row: + return None, [] + + snapshot_row_id = int(row["id"]) + snapshot_id = str(row["snapshot_id"]) + cur.execute( + """ + SELECT spotify_track_id, position, added_at + FROM playlist_snapshot_items + WHERE snapshot_id=? + ORDER BY position ASC, id ASC + """, + (snapshot_row_id,), + ) + items = [dict(item) for item in cur.fetchall()] + return snapshot_id, items + finally: + conn.close() + + +def store_snapshot(playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> None: + """Store a snapshot and items only when `snapshot_id` differs from the latest snapshot.""" + pid = (playlist_id or "").strip() + sid = (snapshot_id or "").strip() + if not pid: + raise ValueError("playlist_id is required") + if not sid: + raise ValueError("snapshot_id is required") + + conn = _connect() + try: + cur = conn.cursor() + normalized_rows = _normalize_snapshot_rows(items) + current_hash = _snapshot_hash_from_rows(normalized_rows) + cur.execute("BEGIN IMMEDIATE") + cur.execute( + """ + SELECT id, snapshot_id + FROM playlist_snapshots + WHERE playlist_id=? + ORDER BY id DESC + LIMIT 1 + """, + (pid,), + ) + latest = cur.fetchone() + if latest and str(latest["snapshot_id"]) == sid: + conn.commit() + return + if latest: + cur.execute( + """ + SELECT spotify_track_id, position, added_at + FROM playlist_snapshot_items + WHERE snapshot_id=? + ORDER BY position ASC, id ASC + """, + (int(latest["id"]),), + ) + previous_rows = [ + ( + str(item["spotify_track_id"]), + int(item["position"]), + item["added_at"], + ) + for item in cur.fetchall() + ] + if _snapshot_hash_from_rows(previous_rows) == current_hash: + conn.commit() + return + + cur.execute( + """ + INSERT INTO playlist_snapshots (playlist_id, snapshot_id) + VALUES (?, ?) + """, + (pid, sid), + ) + snapshot_row_id = int(cur.lastrowid) + + rows = [(snapshot_row_id, track_id, position, added_at) for track_id, position, added_at in normalized_rows] + + if rows: + cur.executemany( + """ + INSERT INTO playlist_snapshot_items ( + snapshot_id, spotify_track_id, position, added_at + ) VALUES (?, ?, ?, ?) + """, + rows, + ) + + conn.commit() + finally: + conn.close() + + +class PlaylistSnapshotStore: + """Compatibility wrapper around module-level snapshot helpers.""" + + def __init__(self, db_path: str) -> None: + self.db_path = db_path + + def _connect(self) -> sqlite3.Connection: + conn = sqlite3.connect(self.db_path, check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON") + ensure_playlist_snapshot_tables(conn) + return conn + + def ensure_schema(self) -> None: + """Ensure snapshot schema exists.""" + conn = self._connect() + conn.close() + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + """Return latest snapshot metadata and ordered items for `playlist_id`.""" + pid = (playlist_id or "").strip() + if not pid: + return None + conn = self._connect() + try: + cur = conn.cursor() + cur.execute( + """ + SELECT id, playlist_id, snapshot_id, timestamp + FROM playlist_snapshots + WHERE playlist_id=? + ORDER BY id DESC + LIMIT 1 + """, + (pid,), + ) + row = cur.fetchone() + if not row: + return None + snapshot = dict(row) + cur.execute( + """ + SELECT spotify_track_id, position, added_at + FROM playlist_snapshot_items + WHERE snapshot_id=? + ORDER BY position ASC, id ASC + """, + (int(row["id"]),), + ) + items = [dict(item) for item in cur.fetchall()] + snapshot["items"] = items + snapshot["track_count"] = len(items) + snapshot["fetched_at"] = snapshot.get("timestamp") + snapshot["raw_json"] = None + return snapshot + finally: + conn.close() + + def store_snapshot( + self, + playlist_id: str, + snapshot_id: str, + items: list[dict[str, Any]], + ) -> SnapshotWriteResult: + """Store snapshot with fast-path skip when unchanged.""" + pid = (playlist_id or "").strip() + sid = (snapshot_id or "").strip() + if not pid: + raise ValueError("playlist_id is required") + if not sid: + raise ValueError("snapshot_id is required") + + conn = self._connect() + try: + cur = conn.cursor() + normalized_rows = _normalize_snapshot_rows(items) + current_hash = _snapshot_hash_from_rows(normalized_rows) + cur.execute("BEGIN IMMEDIATE") + cur.execute( + """ + SELECT id, snapshot_id + FROM playlist_snapshots + WHERE playlist_id=? + ORDER BY id DESC + LIMIT 1 + """, + (pid,), + ) + latest = cur.fetchone() + if latest and str(latest["snapshot_id"]) == sid: + conn.commit() + return SnapshotWriteResult( + inserted=False, + snapshot_db_id=int(latest["id"]), + reason="snapshot_unchanged", + ) + if latest: + cur.execute( + """ + SELECT spotify_track_id, position, added_at + FROM playlist_snapshot_items + WHERE snapshot_id=? + ORDER BY position ASC, id ASC + """, + (int(latest["id"]),), + ) + previous_rows = [ + ( + str(item["spotify_track_id"]), + int(item["position"]), + item["added_at"], + ) + for item in cur.fetchall() + ] + if _snapshot_hash_from_rows(previous_rows) == current_hash: + conn.commit() + return SnapshotWriteResult( + inserted=False, + snapshot_db_id=int(latest["id"]), + reason="snapshot_hash_unchanged", + ) + + cur.execute( + """ + INSERT INTO playlist_snapshots (playlist_id, snapshot_id) + VALUES (?, ?) + """, + (pid, sid), + ) + snapshot_row_id = int(cur.lastrowid) + rows = [(snapshot_row_id, track_id, position, added_at) for track_id, position, added_at in normalized_rows] + if rows: + cur.executemany( + """ + INSERT INTO playlist_snapshot_items ( + snapshot_id, spotify_track_id, position, added_at + ) VALUES (?, ?, ?, ?) + """, + rows, + ) + conn.commit() + return SnapshotWriteResult(inserted=True, snapshot_db_id=snapshot_row_id) + finally: + conn.close() diff --git a/docker/docker-compose.portainer.yml.example b/docker/docker-compose.portainer.yml.example index 57fb37c..7f6140c 100644 --- a/docker/docker-compose.portainer.yml.example +++ b/docker/docker-compose.portainer.yml.example @@ -3,9 +3,9 @@ version: "3.9" services: retreivr: # Docker Hub: - image: retreivr/retreivr:0.9.1 + image: retreivr/retreivr:0.9.3 # Or GHCR: - # image: ghcr.io/retreivr/retreivr:0.9.1 + # image: ghcr.io/retreivr/retreivr:0.9.3 container_name: retreivr ports: diff --git a/docker/docker-compose.yml.example b/docker/docker-compose.yml.example index 9fd6e70..446c001 100644 --- a/docker/docker-compose.yml.example +++ b/docker/docker-compose.yml.example @@ -2,7 +2,7 @@ version: "3.9" services: retreivr: - image: ghcr.io/retreivr/retreivr:0.9.1 + image: ghcr.io/retreivr/retreivr:0.9.3 container_name: retreivr ports: - "8090:8000" @@ -14,9 +14,12 @@ services: # Map /app/data to your media storage location. - ./data:/app/data + # Example (recommended): + # - /path/on/host/downloads:/app/downloads restart: unless-stopped # Notes: # - Playlist folder paths in config.json are relative to /downloads inside the container. # - For media libraries, map ./downloads to your actual storage path on the host. +# - Default video container is MKV as of v0.9.3 (configurable in config.json). diff --git a/docs/Spofity_Integration.md b/docs/Spofity_Integration.md new file mode 100644 index 0000000..f8e9e16 --- /dev/null +++ b/docs/Spofity_Integration.md @@ -0,0 +1,279 @@ +Spotify Integration (2026) – Architecture, Requirements, and Configuration + +Overview + +Retreivr integrates with Spotify for two primary purposes: + 1. Retrieve track lists from Spotify playlists, albums, and user libraries. + 2. Retrieve metadata (artist, album, track name, etc.) to enrich downloaded media. + +As of February 2026, Spotify’s Web API access rules significantly impact how this integration works. This document explains: + • What works without OAuth + • What requires OAuth + • What may require Spotify Premium + • How Retreivr is wired + • How to configure it correctly + +⸻ + +1️⃣ Spotify Web API Reality (Feb 2026) + +Spotify introduced major restrictions to the Web API. +https://developer.spotify.com/documentation/web-api/references/changes/february-2026 + +Public Metadata (No OAuth Required) + +These endpoints still work using Client Credentials (App-only token): + • GET /albums/{id} + • GET /tracks/{id} + • GET /artists/{id} + • GET /search + • GET /playlists/{id} (metadata only, not track list) + +What this means: + • You can fetch album metadata. + • You can fetch track metadata. + • You can fetch playlist metadata (name, owner, description). + • You CANNOT fetch full playlist track lists using client credentials. + +⸻ + +Playlist Track Lists (OAuth Required) + +To retrieve playlist tracks: +GET /playlists/{id}/items + +You must use: + • OAuth user token + • Correct scopes + • App must not be restricted + +Required Scopes + +For public and private playlists: +playlist-read-private +playlist-read-collaborative + +For user libraries (optional features): +user-library-read + + +⸻ + +Premium Requirement Confusion + +Spotify’s dashboard may show: + +“Your application is blocked from accessing the Web API since you do not have a Spotify Premium subscription.” + +In Development Mode, this restriction typically applies only to: + • Player endpoints + • Playback control + • Some advanced personal endpoints + +For Retreivr’s purposes (reading playlists and metadata): + • Premium is NOT required + • OAuth is required for playlist track lists + • Public metadata works without OAuth + +If your app is: + • In Development Mode + • Using Web API only + • Not using playback endpoints + +It should function correctly after OAuth approval. + +⸻ + +2️⃣ What Retreivr Uses Spotify For + +Retreivr uses Spotify in two modes: + +⸻ + +A) Public Metadata Mode (No OAuth Required) + +Used for: + • Metadata enrichment during music downloads + • Album structure creation + • Search resolution + • Validation + +Works with: + • Client ID + • Client Secret + +No OAuth required. + +⸻ + +B) Playlist / Library Sync Mode (OAuth Required) + +Used for: + • Scheduled Spotify playlist polling + • Liked Songs sync + • Saved Albums sync + • User Playlists sync + +Requires: + • OAuth user token + • Valid scopes + • Working redirect URI + +⸻ + +3️⃣ Retreivr Spotify Modes + +Mode 1 – Metadata Only + +You provide: +"spotify": { + "client_id": "YOUR_CLIENT_ID", + "client_secret": "YOUR_CLIENT_SECRET" +} + +Retreivr can: + • Fetch album metadata + • Fetch track metadata + • Use Spotify search + • Structure music folders properly + +No OAuth required. + +⸻ + +Mode 2 – Playlist Sync + +You provide: +"spotify": { + "client_id": "YOUR_CLIENT_ID", + "client_secret": "YOUR_CLIENT_SECRET", + "redirect_uri": "http://127.0.0.1:8090/api/spotify/oauth/callback", + "sync_user_playlists": true, + "watch_playlists": [ + "0oy3UMfOAENX9X7haGdRRv", + "https://open.spotify.com/playlist/5EZkoiqOms6HvUGPd0vMxy" + ] +} + +Then: + 1. Click Connect Spotify in Config page + 2. Complete OAuth authorization + 3. Retreivr stores token + 4. Scheduler begins polling playlists + +⸻ + +4️⃣ How Playlist Polling Works in Retreivr + 1. Scheduler tick fires + 2. Spotify OAuth token validated + 3. Playlist IDs normalized + 4. /playlists/{id}/items called + 5. Track list diffed against snapshot + 6. New tracks enqueued + 7. M3U rebuilt (best-effort) + 8. Completion logged + +If OAuth is invalid: + • Playlist sync fails + • Metadata-only mode still works + +⸻ + +5️⃣ Development Mode Requirements + +Spotify app settings must include: + +APIs Used + • ✅ Web API + +Redirect URI + +Must exactly match config: +http://127.0.0.1:8090/api/spotify/oauth/callback + +App Status +Development Mode + +This is acceptable. + +You must re-authorize after: + • Changing scopes + • Changing redirect URI + • Resetting token store + +⸻ + +6️⃣ What Does NOT Require Premium + +Feature +Premium Required? +Album metadata +❌ +Track metadata +❌ +Public playlist metadata +❌ +Public playlist track list (via OAuth) +❌ (in Dev Mode) +Liked Songs +❌ +Saved Albums +❌ + + +mium is required primarily for: + • Playback endpoints + • Player control APIs + +Retreivr does not use these. + +⸻ + +7️⃣ Recommended Configuration Patterns + +Minimal (Metadata Only) +"spotify": { + "client_id": "xxx", + "client_secret": "xxx" +} + +Playlist Sync (Public Playlists) +"spotify": { + "client_id": "xxx", + "client_secret": "xxx", + "redirect_uri": "http://127.0.0.1:8090/api/spotify/oauth/callback", + "sync_user_playlists": true, + "watch_playlists": [ + "PLAYLIST_ID", + "https://open.spotify.com/playlist/PLAYLIST_ID" + ], + "user_playlists_sync_interval_minutes": 15 +} + + +⸻ + +8️⃣ Important Notes + • Downtime window does NOT block manual runs (by design). + • Scheduler respects downtime. + • OAuth failures log explicitly. + • Playlist track fetching requires OAuth — not client credentials. + • Metadata enrichment works independently of playlist polling. + +⸻ + +9️⃣ Summary + +As of 2026: + • OAuth is required for playlist track lists. + • Client credentials are sufficient for metadata. + • Premium is NOT required for Retreivr’s use case. + • Development Mode is acceptable. + • Redirect URI must match exactly. + • Proper scopes must be requested. + +Retreivr supports both: + • Lightweight metadata-only mode + • Full playlist synchronization mode + +Depending on user configuration diff --git a/docs/musicbrainz.md b/docs/musicbrainz.md new file mode 100644 index 0000000..0282fc9 --- /dev/null +++ b/docs/musicbrainz.md @@ -0,0 +1,68 @@ +# MusicBrainz Integration + +## Purpose + +Retreivr uses MusicBrainz for: + +- Album candidate discovery from user search queries. +- Canonical release selection for a chosen release-group. +- Canonical track ordering/metadata before enqueueing per-track download jobs. +- Canonical metadata authority during search-time resolution, with Spotify used only as gated fallback. + +This is additive to the existing search/download pipeline. It does not auto-download on search. + +## Album Download Flow + +1. User enables Music Mode and searches on Home. +2. Backend queries MusicBrainz release-groups and returns album candidates. +3. User explicitly picks a candidate and clicks download. +4. Backend resolves a best release inside the selected release-group. +5. Backend fetches tracklist for that release and enqueues one `music_track` job per track. +6. Worker resolves playable audio for each track using normal adapters and existing postprocessing/tagging. + +## Album Candidate API + +- Canonical route: `GET /api/music/albums/search?q=&limit=` + - Returns release-group candidates directly from the centralized MusicBrainzService search path. +- Compatibility route: `POST /api/music/album/candidates` + - Calls the same canonical search implementation internally and returns the legacy envelope: + - `{ "status": "ok", "album_candidates": [...] }` + +## Rate Limiting and Request Behavior + +MusicBrainz calls are centralized in `metadata/services/musicbrainz_service.py`: + +- Real User-Agent is always sent: + - `Retreivr/ (+repo/contact)` (configurable via `MUSICBRAINZ_USER_AGENT`) +- Timeout is centralized (default 10s). +- Retries are centralized for transient failures. +- Client-side rate limit is enforced: + - default `1 request/second` + - configurable via `MUSICBRAINZ_MIN_INTERVAL_SECONDS` + +## Caching + +Caching is implemented with: + +- In-memory cache (process-local reads). + +No DB schema/table changes are required. + +Cache keys: + +- `album_search:` +- `release_group:` +- `release_tracks:` + +TTL: + +- Album search: 24 hours +- Release-group release listing: 24 hours +- Release tracks: 7 days + +## Known Limitations + +- Ambiguous titles/artists can still return mixed candidates. +- Regional release differences can change selected release details. +- Live/compilation/soundtrack/remix filtering is heuristic, not perfect. +- Track duration availability depends on MusicBrainz completeness. diff --git a/download/worker.py b/download/worker.py new file mode 100644 index 0000000..cd359f1 --- /dev/null +++ b/download/worker.py @@ -0,0 +1,206 @@ +"""Download worker behavior for resolved Spotify media jobs.""" + +from __future__ import annotations + +import logging +import re +import shutil +from pathlib import Path +from typing import Any, Optional, Protocol + +from config.settings import ENABLE_DURATION_VALIDATION, SPOTIFY_DURATION_TOLERANCE_SECONDS +from db.downloaded_tracks import record_downloaded_track +from media.ffprobe import get_media_duration +from media.path_builder import build_music_path, ensure_parent_dir +from media.validation import validate_duration +from metadata.normalize import normalize_music_metadata +from metadata.tagging_service import tag_file +from metadata.types import CanonicalMetadata + +logger = logging.getLogger(__name__) + +JOB_STATUS_COMPLETED = "completed" +JOB_STATUS_FAILED = "failed" +JOB_STATUS_CANCELLED = "cancelled" +JOB_STATUS_VALIDATION_FAILED = "validation_failed" +JOB_ALLOWED_STATUSES = { + JOB_STATUS_COMPLETED, + JOB_STATUS_FAILED, + JOB_STATUS_CANCELLED, + JOB_STATUS_VALIDATION_FAILED, +} + + +class _Downloader(Protocol): + def download(self, media_url: str) -> str: + """Download a media URL and return the local file path.""" + + +class DownloadWorker: + """Worker that downloads media and applies optional music metadata tagging.""" + + def __init__(self, downloader: _Downloader) -> None: + self._downloader = downloader + + def process_job(self, job: Any) -> dict[str, str | None]: + """Process one job and return a structured status/file-path result. + + Returns: + A dict with keys: + - ``status``: one of ``completed``, ``failed``, ``validation_failed``. + - ``file_path``: output path when completed, otherwise ``None``. + """ + payload = getattr(job, "payload", None) or {} + + if payload.get("music_metadata"): + # Music metadata payloads are expected to include a resolved media URL. + resolved_media = payload.get("resolved_media") or {} + media_url = resolved_media.get("media_url") + metadata = payload.get("music_metadata") + if media_url: + try: + # Download from the resolved media URL, then tag with attached metadata. + file_path = self._downloader.download(media_url) + # Optionally enforce duration validation before any file tagging/write side effects. + if ENABLE_DURATION_VALIDATION: + expected_ms = None + if isinstance(metadata, dict): + expected_ms = metadata.get("expected_ms") + else: + expected_ms = getattr(metadata, "expected_ms", None) + + if expected_ms is not None: + if not validate_duration( + file_path, + int(expected_ms), + SPOTIFY_DURATION_TOLERANCE_SECONDS, + ): + expected_seconds = int(expected_ms) / 1000.0 + actual_seconds = float("nan") + try: + actual_seconds = get_media_duration(file_path) + except Exception: + logger.exception("failed to retrieve actual duration for validation log") + logger.warning( + "validation_failed actual=%.2fs expected=%.2fs tolerance=%.2f", + actual_seconds, + expected_seconds, + SPOTIFY_DURATION_TOLERANCE_SECONDS, + ) + self._set_job_status(job, payload, JOB_STATUS_VALIDATION_FAILED) + return {"status": JOB_STATUS_VALIDATION_FAILED, "file_path": None} + + metadata_obj = self._coerce_music_metadata(metadata) + normalized_metadata = normalize_music_metadata(metadata_obj) + # === Canonical Path Enforcement Starts Here === + temp_path = Path(file_path) + ext = temp_path.suffix.lstrip(".") + root_path = self._resolve_music_root(payload) + canonical_path = build_music_path(root_path, normalized_metadata, ext) + ensure_parent_dir(canonical_path) + try: + shutil.move(str(temp_path), str(canonical_path)) + except Exception: + logger.exception("failed to move file to canonical path path=%s", canonical_path) + self._set_job_status(job, payload, JOB_STATUS_FAILED) + return {"status": JOB_STATUS_FAILED, "file_path": None} + try: + tag_file(str(canonical_path), normalized_metadata) + except Exception: + logger.exception("failed to tag canonical file path=%s", canonical_path) + self._set_job_status(job, payload, JOB_STATUS_FAILED) + return {"status": JOB_STATUS_FAILED, "file_path": None} + # Record idempotency state only after download and tagging both succeed. + playlist_id = payload.get("playlist_id") + isrc = getattr(metadata, "isrc", None) + if not isrc and isinstance(metadata, dict): + isrc = metadata.get("isrc") + if playlist_id and isrc: + record_downloaded_track(str(playlist_id), str(isrc), str(canonical_path)) + self._set_job_status(job, payload, JOB_STATUS_COMPLETED) + return {"status": JOB_STATUS_COMPLETED, "file_path": str(canonical_path)} + except Exception: + logger.exception("music job processing failed") + self._set_job_status(job, payload, JOB_STATUS_FAILED) + return {"status": JOB_STATUS_FAILED, "file_path": None} + + # Non-music or incomplete payloads use the existing default worker behavior. + file_path = self.default_download_and_tag(job) + return {"status": JOB_STATUS_COMPLETED, "file_path": file_path} + + def default_download_and_tag(self, job: Any) -> str: + """Fallback behavior implemented by existing worker flows.""" + raise NotImplementedError + + @staticmethod + def _set_job_status(job: Any, payload: Any, status: str) -> None: + """Set worker job status using the supported terminal status values.""" + if status not in JOB_ALLOWED_STATUSES: + raise ValueError(f"unsupported job status: {status}") + setattr(job, "status", status) + if isinstance(payload, dict): + payload["status"] = status + + @staticmethod + def _coerce_music_metadata(metadata: Any) -> CanonicalMetadata: + """Coerce payload metadata into ``CanonicalMetadata`` for normalization/tagging.""" + if isinstance(metadata, CanonicalMetadata): + return metadata + + payload = metadata if isinstance(metadata, dict) else {} + track_num = safe_int(payload.get("track_num")) + disc_num = safe_int(payload.get("disc_num")) + return CanonicalMetadata( + title=str(payload.get("title") or "Unknown Title"), + artist=str(payload.get("artist") or "Unknown Artist"), + album=str(payload.get("album") or "Unknown Album"), + album_artist=str(payload.get("album_artist") or payload.get("artist") or "Unknown Artist"), + track_num=track_num if track_num is not None and track_num > 0 else 1, + disc_num=disc_num if disc_num is not None and disc_num > 0 else 1, + date=str(payload.get("date") or "Unknown"), + genre=str(payload.get("genre") or "Unknown"), + isrc=(str(payload.get("isrc")).strip() if payload.get("isrc") else None), + mbid=(str(payload.get("mbid")).strip() if payload.get("mbid") else None), + artwork=payload.get("artwork"), + lyrics=(str(payload.get("lyrics")).strip() if payload.get("lyrics") else None), + ) + + @staticmethod + def _resolve_music_root(payload: dict[str, Any]) -> Path: + """Resolve music root path from existing payload/config fields.""" + config = payload.get("config") if isinstance(payload, dict) else None + root_value = ( + payload.get("music_root") + or payload.get("destination") + or payload.get("destination_dir") + or payload.get("output_dir") + or (config.get("music_download_folder") if isinstance(config, dict) else None) + or "." + ) + root = Path(str(root_value)) + # build_music_path already inserts the "Music/" segment. + if root.name.lower() == "music": + return root.parent if str(root.parent) != "" else Path(".") + return root + + +def safe_int(value: Any) -> Optional[int]: + """Parse an integer from mixed input, returning ``None`` when unavailable. + + The parser extracts the first numeric portion from string inputs, e.g. + ``"01/12" -> 1`` and ``"Disc 1" -> 1``. ``None`` and non-numeric values + return ``None``. + """ + if value is None: + return None + if isinstance(value, bool): + return int(value) + if isinstance(value, int): + return value + match = re.search(r"\d+", str(value)) + if not match: + return None + try: + return int(match.group(0)) + except (TypeError, ValueError): + return None diff --git a/engine/core.py b/engine/core.py index 482ba4e..c8b0b7e 100644 --- a/engine/core.py +++ b/engine/core.py @@ -281,6 +281,26 @@ def validate_config(config): if cookie_file is not None and not isinstance(cookie_file, str): errors.append("yt_dlp_cookies must be a string") + youtube_cfg = config.get("youtube") + if youtube_cfg is not None: + if not isinstance(youtube_cfg, dict): + errors.append("youtube must be an object") + else: + cookies_cfg = youtube_cfg.get("cookies") + if cookies_cfg is not None: + if not isinstance(cookies_cfg, dict): + errors.append("youtube.cookies must be an object") + else: + enabled = cookies_cfg.get("enabled") + if enabled is not None and not isinstance(enabled, bool): + errors.append("youtube.cookies.enabled must be true/false") + fallback_only = cookies_cfg.get("fallback_only") + if fallback_only is not None and not isinstance(fallback_only, bool): + errors.append("youtube.cookies.fallback_only must be true/false") + path = cookies_cfg.get("path") + if path is not None and not isinstance(path, str): + errors.append("youtube.cookies.path must be a string") + filename_template = config.get("filename_template") if filename_template is not None and not isinstance(filename_template, str): errors.append("filename_template must be a string") @@ -835,7 +855,7 @@ def run_direct_url_self_test( *, paths: EnginePaths, url="https://youtu.be/PmtGDk0c-JM", - final_format_override="webm", + final_format_override="mkv", ): logging.info("RETREIVR_DIAG: enqueueing direct URL self-test for %s", url) status = EngineStatus() diff --git a/engine/job_queue.py b/engine/job_queue.py index b32fda9..2bd08f5 100644 --- a/engine/job_queue.py +++ b/engine/job_queue.py @@ -11,7 +11,7 @@ import threading import time import urllib.parse -from dataclasses import dataclass +from dataclasses import dataclass, replace from datetime import datetime, timedelta, timezone from typing import Optional from uuid import uuid4 @@ -22,8 +22,12 @@ from engine.json_utils import json_sanity_check, safe_json_dumps from engine.paths import EnginePaths, TOKENS_DIR, resolve_dir +from engine.search_scoring import rank_candidates, score_candidate +from metadata.naming import sanitize_component from metadata.queue import enqueue_metadata +logger = logging.getLogger(__name__) + JOB_STATUS_QUEUED = "queued" JOB_STATUS_CLAIMED = "claimed" JOB_STATUS_DOWNLOADING = "downloading" @@ -47,6 +51,10 @@ class CancelledError(Exception): class PostprocessingError(Exception): pass + +class CookieFallbackError(RuntimeError): + """Raised when the optional YouTube cookie fallback fails.""" + _FORMAT_VIDEO = ( "bestvideo[ext=webm][height<=1080]+bestaudio[ext=webm]/" "bestvideo[ext=webm][height<=720]+bestaudio[ext=webm]/" @@ -69,6 +77,7 @@ class PostprocessingError(Exception): re.IGNORECASE, ) _AUDIO_ARTIST_VEVO_RE = re.compile(r"(vevo)$", re.IGNORECASE) +_WORD_TOKEN_RE = re.compile(r"[a-z0-9]+") _YTDLP_DOWNLOAD_UNSAFE_KEYS = {"download", "skip_download", "simulate", "extract_flat"} @@ -94,6 +103,18 @@ class PostprocessingError(Exception): "user_agent", } +_MUSIC_TRACK_SOURCE_PRIORITY = ("youtube_music", "youtube", "soundcloud", "bandcamp") +_DEFAULT_MATCH_THRESHOLD = 0.92 +_MUSIC_TRACK_THRESHOLD = min(_DEFAULT_MATCH_THRESHOLD * 0.8, 0.70) +_MUSIC_TRACK_PENALTY_TERMS = ("live", "cover", "karaoke", "remix") +_MUSIC_TRACK_PENALIZE_TOKENS = ("live", "cover", "karaoke", "remix", "reaction", "ft.", "feat.", "instrumental") +_MUSIC_SOURCE_PRIORITY_WEIGHTS = { + "youtube_music": 10, + "youtube": 7, + "soundcloud": 4, + "bandcamp": 2, +} + @dataclass(frozen=True) class DownloadJob: @@ -195,6 +216,14 @@ def ensure_download_jobs_table(conn): "CREATE INDEX IF NOT EXISTS idx_download_jobs_source_status ON download_jobs (source, status)" ) cur.execute("CREATE INDEX IF NOT EXISTS idx_download_jobs_created ON download_jobs (created_at)") + cur.execute( + "CREATE INDEX IF NOT EXISTS idx_download_jobs_canonical_dest_status_created " + "ON download_jobs (canonical_id, resolved_destination, status, created_at DESC)" + ) + cur.execute( + "CREATE INDEX IF NOT EXISTS idx_download_jobs_url_dest_status_created " + "ON download_jobs (url, resolved_destination, status, created_at DESC)" + ) conn.commit() # --- ensure_downloads_table @@ -232,13 +261,14 @@ def ensure_download_history_table(conn): file_size_bytes INTEGER, input_url TEXT, canonical_url TEXT, - external_id TEXT + external_id TEXT, + channel_id TEXT ) """ ) cur.execute("PRAGMA table_info(download_history)") existing_columns = {row[1] for row in cur.fetchall()} - for column in ("input_url", "canonical_url", "external_id", "source"): + for column in ("input_url", "canonical_url", "external_id", "source", "channel_id"): if column not in existing_columns: cur.execute(f"ALTER TABLE download_history ADD COLUMN {column} TEXT") cur.execute( @@ -415,14 +445,25 @@ def find_duplicate_job(self, *, canonical_id=None, url=None, destination=None): return None query = f""" SELECT * FROM download_jobs - WHERE ({' OR '.join(clauses)}) AND status=? + WHERE ({' OR '.join(clauses)}) + AND status IN (?, ?, ?, ?, ?) ORDER BY created_at DESC LIMIT 1 """ - params.append(JOB_STATUS_COMPLETED) + params.extend( + [ + JOB_STATUS_COMPLETED, + JOB_STATUS_QUEUED, + JOB_STATUS_CLAIMED, + JOB_STATUS_DOWNLOADING, + JOB_STATUS_POSTPROCESSING, + ] + ) cur.execute(query, tuple(params)) row = cur.fetchone() - if not row or not self._row_has_valid_output(row): + if not row: + return None + if row["status"] == JOB_STATUS_COMPLETED and not self._row_has_valid_output(row): return None return self._row_to_job(row) finally: @@ -773,6 +814,7 @@ def __init__( *, retry_delay_seconds=30, adapters=None, + search_service=None, ): self.db_path = db_path self.config = config or {} @@ -780,6 +822,7 @@ def __init__( self.retry_delay_seconds = retry_delay_seconds self.store = DownloadJobStore(db_path) self.adapters = adapters or default_adapters() + self.search_service = search_service # Ensure required DB tables exist (idempotent). conn = sqlite3.connect(self.db_path, check_same_thread=False) try: @@ -792,6 +835,267 @@ def __init__( self._cancel_flags = {} self._cancel_lock = threading.Lock() + def _extract_resolved_candidate(self, resolved): + if not resolved: + return None, None + if isinstance(resolved, dict): + return resolved.get("url"), resolved.get("source") + url = getattr(resolved, "url", None) + source = getattr(resolved, "source", None) + return url, source + + def _music_tokens(self, value): + return _WORD_TOKEN_RE.findall(str(value or "").lower()) + + def _music_track_is_live(self, artist, track, album): + combined = " ".join([str(artist or ""), str(track or ""), str(album or "")]).lower() + return " live " in f" {combined} " + + def _normalize_score_100(self, candidate): + raw_score = candidate.get("adapter_score") + if raw_score is None: + raw_score = candidate.get("raw_score") + if raw_score is None: + raw_score = candidate.get("final_score") + max_score = candidate.get("adapter_max_possible") + if max_score is None: + max_score = candidate.get("max_score") + if max_score is None: + max_score = 1.0 + try: + raw_value = float(raw_score or 0.0) + max_value = float(max_score or 0.0) + if max_value <= 0: + return 0.0 + normalized = (raw_value / max_value) * 100.0 + return max(0.0, min(100.0, normalized)) + except Exception: + return 0.0 + + def _build_music_track_query(self, artist, track, album=None, *, is_live=False): + search_terms = [f'"{artist}"', f'"{track}"'] + if album: + search_terms.append(f'"{album}"') + search_terms.extend(["audio", "official", "topic"]) + return " ".join(part for part in search_terms if part).strip() + + def _music_track_adjust_score(self, expected, candidate, *, allow_live=False): + title = str(candidate.get("title") or "") + uploader = str(candidate.get("uploader") or candidate.get("artist_detected") or "") + source = str(candidate.get("source") or "") + title_tokens = self._music_tokens(title) + uploader_tokens = set(self._music_tokens(uploader)) + track_tokens = set(self._music_tokens(expected.get("track"))) + artist_tokens = set(self._music_tokens(expected.get("artist"))) + expected_track_tokens = self._music_tokens(expected.get("track")) + candidate_title_tokens = self._music_tokens(title) + + adjustment = 0.0 + reasons = [] + + if track_tokens and track_tokens.issubset(set(title_tokens)): + adjustment += 12.0 + reasons.append("exact_track_tokens") + title_match_increment = 0.0 + if expected_track_tokens and candidate_title_tokens: + if expected_track_tokens == candidate_title_tokens: + title_match_increment = 25.0 + else: + shared_count = len(set(expected_track_tokens) & set(candidate_title_tokens)) + title_match_increment = float(shared_count * 2) + adjustment += title_match_increment + reasons.append(f"title_match_{title_match_increment:.0f}") + logger.debug( + f"[MUSIC] title_match score_increase={title_match_increment:.0f} " + f"for candidate={candidate.get('url')}" + ) + + if artist_tokens and uploader_tokens: + overlap = len(artist_tokens & uploader_tokens) / max(len(artist_tokens), 1) + if overlap >= 0.60: + adjustment += 10.0 + reasons.append("artist_uploader_overlap_high") + elif overlap >= 0.30: + adjustment += 5.0 + reasons.append("artist_uploader_overlap") + + expected_duration = expected.get("duration_hint_sec") + candidate_duration = candidate.get("duration_sec") + try: + if expected_duration is not None and candidate_duration is not None: + diff_ms = abs((int(candidate_duration) * 1000) - (int(expected_duration) * 1000)) + duration_increment = 0.0 + if diff_ms <= 3000: + duration_increment = 20.0 + elif diff_ms <= 8000: + duration_increment = 10.0 + elif diff_ms <= 15000: + duration_increment = 5.0 + if duration_increment > 0.0: + adjustment += duration_increment + reasons.append(f"duration_bonus_{duration_increment:.0f}") + logger.debug( + f"[MUSIC] duration_bonus diff={diff_ms} score={duration_increment:.0f}" + ) + except Exception: + pass + + title_lower = title.lower() + if "provided to youtube" in title_lower: + adjustment += 8.0 + reasons.append("provided_to_youtube") + if "topic" in uploader.lower() and source in {"youtube", "youtube_music"}: + adjustment += 8.0 + reasons.append("topic_channel") + if "lyrics" in title_lower: + adjustment += 2.0 + reasons.append("lyrics_hint") + + for token in _MUSIC_TRACK_PENALIZE_TOKENS: + if allow_live and token == "live": + continue + if token in title_lower: + adjustment -= 10.0 + reasons.append(f"penalty_{token}") + logger.debug( + f"[MUSIC] penalizing token={token} new_score={adjustment:.0f} " + f"for {candidate.get('url')}" + ) + return adjustment, reasons + + def _resolve_music_track_with_adapters(self, artist, track, album=None, *, duration_hint_sec=None, allow_live=False): + expected = { + "artist": artist, + "track": track, + "album": album, + "duration_hint_sec": duration_hint_sec, + } + scored = [] + source_priority = [name for name in _MUSIC_TRACK_SOURCE_PRIORITY if name in self.adapters] + source_priority.extend([name for name in self.adapters.keys() if name not in source_priority]) + for source in source_priority: + adapter = self.adapters.get(source) + if not adapter: + continue + query = self._build_music_track_query(artist, track, album, is_live=allow_live) + try: + if hasattr(adapter, "_search"): + candidates = adapter._search(query, 6) + else: + candidates = adapter.search_track(artist, track, album, 6) + except Exception: + logging.exception("Music track search adapter failed source=%s", source) + continue + for candidate in candidates or []: + url = candidate.get("url") if isinstance(candidate, dict) else None + if not _is_http_url(url): + continue + candidate = dict(candidate) + candidate["source"] = candidate.get("source") or source + modifier = adapter.source_modifier(candidate) + candidate.update(score_candidate(expected, candidate, source_modifier=modifier)) + base_score = self._normalize_score_100(candidate) + source_weight = int(_MUSIC_SOURCE_PRIORITY_WEIGHTS.get(source, 0)) + logger.debug(f"[MUSIC] source_priority={source} weight={source_weight}") + adjustment, reasons = self._music_track_adjust_score(expected, candidate, allow_live=allow_live) + if source_weight: + adjustment += float(source_weight) + reasons.append(f"source_priority_{source_weight}") + candidate["music_adjustment"] = adjustment + candidate["music_adjustment_reasons"] = ",".join(reasons) + candidate["base_score"] = base_score + candidate["final_score_100"] = max(0.0, min(100.0, base_score + adjustment)) + candidate["final_score"] = candidate["final_score_100"] / 100.0 + scored.append(candidate) + if not scored: + return None + ranked = rank_candidates(scored, source_priority=source_priority) + for candidate in ranked: + candidate_score = float(candidate.get("final_score") or 0.0) + logger.info(f"[MUSIC] threshold_used={_MUSIC_TRACK_THRESHOLD:.2f} candidate_score={candidate_score:.3f}") + if candidate_score >= _MUSIC_TRACK_THRESHOLD: + return candidate + logger.warning(f"[MUSIC] top 5 candidates for track={track} scores:") + for candidate in ranked[:5]: + logger.warning( + " score=%.3f source=%s url=%s title=%s", + float(candidate.get("final_score") or 0.0), + candidate.get("source"), + candidate.get("url"), + candidate.get("title"), + ) + return None + + def _resolve_music_track_job(self, job): + payload = job.output_template if isinstance(job.output_template, dict) else {} + canonical = payload.get("canonical_metadata") if isinstance(payload.get("canonical_metadata"), dict) else {} + artist = str(payload.get("artist") or canonical.get("artist") or "").strip() + track = str(payload.get("track") or canonical.get("track") or canonical.get("title") or "").strip() + album = str(payload.get("album") or canonical.get("album") or "").strip() or None + duration_ms_raw = payload.get("duration_ms") + if duration_ms_raw is None: + duration_ms_raw = canonical.get("duration_ms") + if duration_ms_raw is None: + duration_ms_raw = canonical.get("duration") + duration_hint_sec = None + try: + if duration_ms_raw is not None: + duration_hint_sec = max(int(duration_ms_raw) // 1000, 1) + except Exception: + duration_hint_sec = None + allow_live = self._music_track_is_live(artist, track, album) + if not artist or not track: + logging.error("Music track search failed") + raise RuntimeError("music_track_metadata_missing") + logger.info(f"[WORKER] processing music_track artist={artist} track={track}") + + search_query = self._build_music_track_query(artist, track, album, is_live=allow_live) + logger.debug(f"[MUSIC] built search_query={search_query} for music_track") + resolved = None + if self.search_service and hasattr(self.search_service, "search_best_match"): + try: + resolved = self.search_service.search_best_match( + search_query, + threshold=_MUSIC_TRACK_THRESHOLD, + ) + except TypeError: + resolved = None + except Exception: + logging.exception("Music track search service failed query=%s", search_query) + if not resolved: + resolved = self._resolve_music_track_with_adapters( + artist, + track, + album, + duration_hint_sec=duration_hint_sec, + allow_live=allow_live, + ) + + resolved_url, resolved_source = self._extract_resolved_candidate(resolved) + if not _is_http_url(resolved_url): + logging.error("Music track search failed") + raise RuntimeError("music_track_no_candidate_above_threshold") + selected_score = None + if isinstance(resolved, dict): + selected_score = resolved.get("final_score") + logger.info( + f"[MUSIC] threshold={_MUSIC_TRACK_THRESHOLD:.2f} " + f"selected_score={selected_score if selected_score is not None else 'n/a'} " + f"candidate={resolved_url}" + ) + + source = resolved_source or resolve_source(resolved_url) + external_id = extract_video_id(resolved_url) if source in {"youtube", "youtube_music"} else None + canonical_url = canonicalize_url(source, resolved_url, external_id) + return replace( + job, + source=source, + url=resolved_url, + input_url=resolved_url, + canonical_url=canonical_url, + external_id=external_id, + ) + def run_once(self, *, stop_event=None): sources = self.store.list_sources_with_queued_jobs() threads = [] @@ -888,6 +1192,14 @@ def _run_source_once(self, source, lock, stop_event): lock.release() def _execute_job(self, job, *, stop_event=None): + if hasattr(job, "keys"): + job_keys = list(job.keys()) + else: + try: + job_keys = list(vars(job).keys()) + except Exception: + job_keys = [] + logger.debug(f"[WORKER] received job payload keys={job_keys}") if job.status != JOB_STATUS_CLAIMED: _log_event( logging.ERROR, @@ -911,6 +1223,17 @@ def _execute_job(self, job, *, stop_event=None): media_intent=job.media_intent, ) return + if hasattr(job, "get"): + intent = job.get("media_intent") or job.get("payload", {}).get("media_intent") + else: + payload = getattr(job, "payload", {}) or {} + if not isinstance(payload, dict): + payload = {} + intent = getattr(job, "media_intent", None) or payload.get("media_intent") + + if intent == "music_track": + logger.info(f"[WORKER] processing music_track: {job}") + job = self._resolve_music_track_job(job) adapter = self.adapters.get(job.source) if not adapter: _log_event( @@ -1085,7 +1408,7 @@ def execute(self, job, config, paths, *, stop_event=None, cancel_check=None, can if audio_mode: ext = final_format or "mp3" elif not ext: - ext = final_format or "webm" + ext = final_format or "mkv" template = audio_template if audio_mode else filename_template cleaned_name = build_output_filename(meta, video_id, ext, template, audio_mode) @@ -1097,8 +1420,10 @@ def execute(self, job, config, paths, *, stop_event=None, cancel_check=None, can embed_metadata(local_file, meta, video_id, paths.thumbs_dir) final_path = os.path.join(resolved_dir, cleaned_name) + final_path = resolve_collision_path(final_path) os.makedirs(os.path.dirname(final_path), exist_ok=True) atomic_move(local_file, final_path) + logger.info(f"[MUSIC] finalized file: {final_path}") shutil.rmtree(temp_dir, ignore_errors=True) size = None @@ -1154,6 +1479,69 @@ def resolve_cookie_file(config): return resolved +def resolve_youtube_cookie_fallback_file(config): + youtube_cfg = (config or {}).get("youtube") + if not isinstance(youtube_cfg, dict): + return None + cookies_cfg = youtube_cfg.get("cookies") + if not isinstance(cookies_cfg, dict): + return None + if not cookies_cfg.get("enabled"): + return None + if not cookies_cfg.get("fallback_only"): + return None + path = cookies_cfg.get("path") + if not isinstance(path, str) or not path.strip(): + return None + try: + resolved = resolve_dir(path, TOKENS_DIR) + except ValueError as exc: + logging.error("Invalid youtube cookies path: %s", exc) + return None + if not os.path.exists(resolved): + logging.warning("youtube cookies file not found: %s", resolved) + return None + return resolved + + +def _is_youtube_access_gate(message: str | None) -> bool: + if not message: + return False + lower_msg = message.lower() + triggers = [ + "this video is not available", + "sign in to confirm your age", + "login required", + "access denied", + "age restricted", + "age-restricted", + "age restriction", + ] + blockers = [ + "timed out", + "timeout", + "connection reset", + "temporary failure", + "network error", + "couldn't download webpage", + "unable to download webpage", + "http error 403", + "http error 404", + "geo-restricted", + "geoblocked", + "geo blocked", + "country", + "region", + "format not available", + "private", + "removed", + ] + if not any(trigger in lower_msg for trigger in triggers): + return False + if any(blocker in lower_msg for blocker in blockers): + return False + return True + def resolve_media_type(config, *, playlist_entry=None, url=None): media_type = None if isinstance(playlist_entry, dict): @@ -1274,6 +1662,13 @@ def is_youtube_music_url(url): return "music.youtube.com" in (parsed.netloc or "").lower() +def _is_http_url(url): + if not url or not isinstance(url, str): + return False + parsed = urllib.parse.urlparse(url) + return parsed.scheme in {"http", "https"} + + def build_ytdlp_opts(context): operation = context.get("operation") or "download" audio_mode = bool(context.get("audio_mode")) @@ -1548,6 +1943,28 @@ def _format_summary(info): } +def _select_youtube_cookie_fallback( + config, + url, + stderr_text, + opts, + media_type, +): + fallback_cookie = resolve_youtube_cookie_fallback_file(config) + if not fallback_cookie: + return None + if opts.get("cookiefile"): + return None + if is_music_media_type(media_type): + return None + source = resolve_source(url) + if source not in {"youtube", "youtube_music"}: + return None + if not _is_youtube_access_gate(stderr_text): + return None + return fallback_cookie + + def download_with_ytdlp( url, temp_dir, @@ -1684,7 +2101,13 @@ def _is_empty_download_error(e: Exception) -> bool: cmd_log = _argv_to_redacted_cli(cmd_argv) try: - subprocess.run(cmd_argv, check=True, stdout=DEVNULL, stderr=DEVNULL) + subprocess.run( + cmd_argv, + check=True, + stdout=DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) # Log AFTER the command has been executed, per requirement. _log_event( logging.INFO, @@ -1694,7 +2117,71 @@ def _is_empty_download_error(e: Exception) -> bool: cli=cmd_log, ) except CalledProcessError as exc: - # If a cookiefile is present and yt-dlp produced no completed file in temp_dir, retry once WITHOUT cookies. + stderr_output = (exc.stderr or "").strip() + fallback_cookie = _select_youtube_cookie_fallback( + config=config, + url=url, + stderr_text=stderr_output, + opts=opts_for_run, + media_type=media_type, + ) + if fallback_cookie: + _log_event( + logging.INFO, + "YTDLP_YOUTUBE_COOKIE_FALLBACK_ATTEMPT", + job_id=job_id, + url=url, + origin=origin, + media_type=media_type, + media_intent=media_intent, + error=stderr_output, + ) + retry_opts = dict(opts_for_run) + retry_opts["cookiefile"] = fallback_cookie + cmd_retry_argv = _render_ytdlp_cli_argv(retry_opts, url) + cmd_retry_log = _argv_to_redacted_cli(cmd_retry_argv) + try: + subprocess.run( + cmd_retry_argv, + check=True, + stdout=DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + _log_event( + logging.INFO, + "YTDLP_YOUTUBE_COOKIE_FALLBACK_SUCCEEDED", + job_id=job_id, + url=url, + origin=origin, + media_type=media_type, + media_intent=media_intent, + ) + _log_event( + logging.INFO, + "YTDLP_CLI_EQUIVALENT", + job_id=job_id, + url=url, + cli=cmd_retry_log, + ) + if (stop_event and stop_event.is_set()) or ( + callable(cancel_check) and cancel_check() + ): + raise CancelledError(cancel_reason or "Cancelled by user") + return info, _select_download_output(temp_dir, info, audio_mode) + except CalledProcessError as fallback_exc: + fallback_message = (fallback_exc.stderr or "").strip() + _log_event( + logging.ERROR, + "YTDLP_YOUTUBE_COOKIE_FALLBACK_FAILED", + job_id=job_id, + url=url, + origin=origin, + media_type=media_type, + media_intent=media_intent, + error=fallback_message, + ) + raise CookieFallbackError(f"yt_dlp_cookie_fallback_failed: {fallback_exc}") if opts.get("cookiefile"): found = False for entry in os.listdir(temp_dir): @@ -1789,7 +2276,10 @@ def _is_empty_download_error(e: Exception) -> bool: if (stop_event and stop_event.is_set()) or (callable(cancel_check) and cancel_check()): raise CancelledError(cancel_reason or "Cancelled by user") + return info, _select_download_output(temp_dir, info, audio_mode) + +def _select_download_output(temp_dir, info, audio_mode): local_path = None if isinstance(info, dict): local_path = info.get("_filename") @@ -1799,15 +2289,12 @@ def _is_empty_download_error(e: Exception) -> bool: if local_path: break - # If yt-dlp reported a concrete output file, use it if local_path and os.path.exists(local_path) and os.path.getsize(local_path) > 0: - return info, local_path + return local_path - # Otherwise, scan temp_dir for completed artifacts candidates = [] audio_candidates = [] for entry in os.listdir(temp_dir): - # Ignore yt-dlp temporary/partial artifacts if entry.endswith((".part", ".ytdl", ".temp")): continue candidate = os.path.join(temp_dir, entry) @@ -1820,22 +2307,27 @@ def _is_empty_download_error(e: Exception) -> bool: if size <= 0: continue candidates.append((size, candidate)) - if os.path.splitext(candidate)[1].lower() in {".m4a", ".webm", ".opus", ".aac", ".mp3", ".flac"}: + if os.path.splitext(candidate)[1].lower() in { + ".m4a", + ".webm", + ".opus", + ".aac", + ".mp3", + ".flac", + }: audio_candidates.append((size, candidate)) - # In audio_mode, we MUST have an audio-capable artifact if audio_mode: if not audio_candidates: raise PostprocessingError( "No audio stream resolved (video-only format selected)" ) audio_candidates.sort(reverse=True) - return info, audio_candidates[0][1] + return audio_candidates[0][1] - # Video mode fallback: pick the largest completed artifact if candidates: candidates.sort(reverse=True) - return info, candidates[0][1] + return candidates[0][1] raise RuntimeError("yt_dlp_no_output") @@ -1881,6 +2373,7 @@ def extract_meta(info, *, fallback_url=None): "video_id": info.get("id"), "title": info.get("title"), "channel": info.get("channel") or info.get("uploader"), + "channel_id": info.get("channel_id") or info.get("uploader_id"), "artist": info.get("artist") or info.get("uploader"), "album": info.get("album"), "album_artist": info.get("album_artist"), @@ -1899,7 +2392,7 @@ def extract_meta(info, *, fallback_url=None): def sanitize_for_filesystem(name, maxlen=180): if not name: return "" - safe = re.sub(r"[\\/\\?%*:|\"<>]", "_", str(name)).strip() + safe = sanitize_component(str(name)) safe = re.sub(r"\s+", " ", safe) return safe[:maxlen].strip() @@ -1907,13 +2400,8 @@ def sanitize_for_filesystem(name, maxlen=180): def pretty_filename(title, channel, upload_date): safe_title = sanitize_for_filesystem(title or "") safe_channel = sanitize_for_filesystem(channel or "") - date = upload_date or "" - if safe_channel and date: - return f"{safe_title} - {safe_channel} - {date}".strip(" -") if safe_channel: return f"{safe_title} - {safe_channel}".strip(" -") - if date: - return f"{safe_title} - {date}".strip(" -") return safe_title or "media" @@ -1960,15 +2448,13 @@ def build_audio_filename(meta, ext, *, template=None, fallback_id=None): album = sanitize_for_filesystem(_clean_audio_title(meta.get("album") or "")) track = sanitize_for_filesystem(_clean_audio_title(meta.get("track") or meta.get("title") or "")) track_number = format_track_number(meta.get("track_number")) - fallback = (fallback_id or "media")[:8] - fmt = { "artist": artist, "album": album, "track": track, "track_number": track_number, "ext": ext, - "id": fallback, + "id": "", } if template: @@ -1988,7 +2474,7 @@ def build_audio_filename(meta, ext, *, template=None, fallback_id=None): if track_number: return f"{artist}/{track_number} - {track}.{ext}" return f"{artist}/{track}.{ext}" - return f"{track or fallback}.{ext}" + return f"{track or 'media'}.{ext}" def build_output_filename(meta, fallback_id, ext, template, audio_mode): @@ -1999,15 +2485,27 @@ def build_output_filename(meta, fallback_id, ext, template, audio_mode): rendered = template % { "title": sanitize_for_filesystem(meta.get("title") or fallback_id), "uploader": sanitize_for_filesystem(meta.get("channel") or ""), - "upload_date": meta.get("upload_date") or "", + "upload_date": "", "ext": ext, - "id": fallback_id, + "id": "", } if rendered: return rendered except Exception: pass - return f"{pretty_filename(meta.get('title'), meta.get('channel'), meta.get('upload_date'))}_{fallback_id[:8]}.{ext}" + return f"{pretty_filename(meta.get('title'), meta.get('channel'), meta.get('upload_date'))}.{ext}" + + +def resolve_collision_path(path): + if not os.path.exists(path): + return path + stem, ext = os.path.splitext(path) + attempt = 2 + while True: + candidate = f"{stem} ({attempt}){ext}" + if not os.path.exists(candidate): + return candidate + attempt += 1 def atomic_move(src, dst): @@ -2034,6 +2532,7 @@ def embed_metadata(local_file, meta, video_id, thumbs_dir): title = meta.get("title") or video_id channel = meta.get("channel") or "" + channel_id = meta.get("channel_id") or "" artist = meta.get("artist") or channel album = meta.get("album") album_artist = meta.get("album_artist") @@ -2067,6 +2566,8 @@ def embed_metadata(local_file, meta, video_id, thumbs_dir): keywords = ", ".join([str(t) for t in tags if t]) if tags else "" comment = f"YouTubeID={video_id} URL={url}" + if channel_id: + comment = f"{comment} ChannelID={channel_id}" # Truncate potentially huge fields to avoid container/tag limits def _truncate(s: str, limit: int) -> str: @@ -2148,6 +2649,8 @@ def _add_common_metadata(cmd_list: list[str]): cmd_list.extend(["-metadata", f"date={date_tag}"]) if description: cmd_list.extend(["-metadata", f"description={description}"]) + if channel_id: + cmd_list.extend(["-metadata", f"source_channel_id={channel_id}"]) if keywords: cmd_list.extend(["-metadata", f"keywords={keywords}"]) if comment: @@ -2271,8 +2774,8 @@ def record_download_history(db_path, job, filepath, *, meta=None): INSERT INTO download_history ( video_id, title, filename, destination, source, status, created_at, completed_at, file_size_bytes, - input_url, canonical_url, external_id - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + input_url, canonical_url, external_id, channel_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( video_id, @@ -2287,6 +2790,7 @@ def record_download_history(db_path, job, filepath, *, meta=None): input_url, canonical_url, external_id, + (meta or {}).get("channel_id") if isinstance(meta, dict) else None, ), ) conn.commit() @@ -2313,6 +2817,8 @@ def is_retryable_error(error): return False if isinstance(error, PostprocessingError): return False + if isinstance(error, CookieFallbackError): + return False if isinstance(error, (DownloadError, ExtractorError)): message = str(error).lower() else: diff --git a/input/intent_router.py b/input/intent_router.py new file mode 100644 index 0000000..0e35324 --- /dev/null +++ b/input/intent_router.py @@ -0,0 +1,85 @@ +"""Intent routing helpers for raw homepage input.""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from typing import Optional +from urllib.parse import parse_qs, urlparse + + +class IntentType(Enum): + SPOTIFY_ALBUM = "spotify_album" + SPOTIFY_PLAYLIST = "spotify_playlist" + SPOTIFY_TRACK = "spotify_track" + SPOTIFY_ARTIST = "spotify_artist" + YOUTUBE_PLAYLIST = "youtube_playlist" + SEARCH = "search" + + +@dataclass +class Intent: + type: IntentType + identifier: str # ID extracted or original search string + + +def detect_intent(user_input: str) -> Intent: + """Detect intent from user input without network calls. + + Rules: + - Detect Spotify URLs for album/playlist/track/artist. + - Detect YouTube playlist URLs via ``list=`` query parameter. + - Otherwise treat input as plain ``SEARCH``. + - Extract clean IDs without query strings. + """ + raw = (user_input or "").strip() + if not raw: + return Intent(type=IntentType.SEARCH, identifier="") + + spotify_album = _extract_spotify_id(raw, "album") + if spotify_album: + return Intent(type=IntentType.SPOTIFY_ALBUM, identifier=spotify_album) + + spotify_playlist = _extract_spotify_id(raw, "playlist") + if spotify_playlist: + return Intent(type=IntentType.SPOTIFY_PLAYLIST, identifier=spotify_playlist) + + spotify_track = _extract_spotify_id(raw, "track") + if spotify_track: + return Intent(type=IntentType.SPOTIFY_TRACK, identifier=spotify_track) + + spotify_artist = _extract_spotify_id(raw, "artist") + if spotify_artist: + return Intent(type=IntentType.SPOTIFY_ARTIST, identifier=spotify_artist) + + youtube_playlist = _extract_youtube_playlist_id(raw) + if youtube_playlist: + return Intent(type=IntentType.YOUTUBE_PLAYLIST, identifier=youtube_playlist) + + return Intent(type=IntentType.SEARCH, identifier=raw) + + +def _extract_spotify_id(raw: str, resource: str) -> Optional[str]: + parsed = urlparse(raw) + if parsed.scheme and "spotify.com" in (parsed.netloc or "").lower(): + parts = [segment for segment in (parsed.path or "").split("/") if segment] + if len(parts) >= 2 and parts[0].lower() == resource: + return _clean_identifier(parts[1]) + return None + + +def _extract_youtube_playlist_id(raw: str) -> Optional[str]: + parsed = urlparse(raw) + if not parsed.scheme: + return None + netloc = (parsed.netloc or "").lower() + if "youtube.com" not in netloc and "youtu.be" not in netloc: + return None + values = parse_qs(parsed.query).get("list") + if not values: + return None + return _clean_identifier(values[0]) + + +def _clean_identifier(value: str) -> str: + return (value or "").split("?", 1)[0].strip().strip("/") diff --git a/media/ffprobe.py b/media/ffprobe.py new file mode 100644 index 0000000..caf9913 --- /dev/null +++ b/media/ffprobe.py @@ -0,0 +1,57 @@ +"""Wrapper utilities for retrieving media information using ffprobe.""" + +from __future__ import annotations + +import json +import subprocess + + +def get_media_duration(file_path: str) -> float: + """Return media duration in seconds using ``ffprobe`` JSON output. + + The function executes ``ffprobe`` for the provided file, parses the JSON + payload, and returns ``format.duration`` as a float. + + Raises: + RuntimeError: If ``ffprobe`` execution fails or the command is missing. + ValueError: If duration data is missing or not parseable as a float. + """ + command = [ + "ffprobe", + "-v", + "error", + "-print_format", + "json", + "-show_format", + file_path, + ] + + try: + completed = subprocess.run( + command, + capture_output=True, + text=True, + check=True, + timeout=15, + ) + except FileNotFoundError as exc: + raise RuntimeError("ffprobe is not installed or not available in PATH") from exc + except subprocess.TimeoutExpired as exc: + raise RuntimeError(f"ffprobe timed out while probing: {file_path}") from exc + except subprocess.CalledProcessError as exc: + stderr_text = (exc.stderr or "").strip() + raise RuntimeError(f"ffprobe failed for {file_path}: {stderr_text or exc}") from exc + + try: + payload = json.loads(completed.stdout or "{}") + except json.JSONDecodeError as exc: + raise ValueError(f"ffprobe returned invalid JSON for {file_path}") from exc + + duration_value = (payload.get("format") or {}).get("duration") + if duration_value in (None, ""): + raise ValueError(f"ffprobe did not return a duration for {file_path}") + + try: + return float(duration_value) + except (TypeError, ValueError) as exc: + raise ValueError(f"ffprobe returned a non-numeric duration for {file_path}") from exc diff --git a/media/path_builder.py b/media/path_builder.py new file mode 100644 index 0000000..3cc2d7f --- /dev/null +++ b/media/path_builder.py @@ -0,0 +1,46 @@ +"""Canonical music path construction utilities.""" + +from __future__ import annotations + +from pathlib import Path + +from metadata.naming import build_album_directory, build_track_filename, sanitize_component +from metadata.types import CanonicalMetadata + + +def sanitize_for_filesystem(value: str) -> str: + """Return a filesystem-safe string with invalid characters removed.""" + return sanitize_component(value) + + +def build_music_path(root: Path, metadata: CanonicalMetadata, ext: str) -> Path: + """Build and return a canonical music path without creating directories. + + Layout: + Music/ + {album_artist}/ + {album} ({year})/ + Disc {disc_num}/ + {track_num:02d} - {title}.{ext} + """ + album_artist = sanitize_for_filesystem(metadata.album_artist or metadata.artist or "Unknown Artist") + album_folder = build_album_directory(metadata) + + disc_num_raw = getattr(metadata, "disc_num", None) + disc_num = int(disc_num_raw) if isinstance(disc_num_raw, int) and disc_num_raw > 0 else 1 + + extension = str(ext or "").lstrip(".") + filename = build_track_filename( + { + "title": metadata.title, + "track_num": metadata.track_num, + "ext": extension, + } + ) + + return root / "Music" / album_artist / album_folder / f"Disc {disc_num}" / filename + + +def ensure_parent_dir(path: Path) -> None: + """Ensure the parent directory for a file path exists.""" + path.parent.mkdir(parents=True, exist_ok=True) diff --git a/media/validation.py b/media/validation.py new file mode 100644 index 0000000..797a05c --- /dev/null +++ b/media/validation.py @@ -0,0 +1,41 @@ +"""Media validation helpers.""" + +from __future__ import annotations + +import logging + +from media.ffprobe import get_media_duration + +logger = logging.getLogger(__name__) + + +def validate_duration(file_path: str, expected_ms: int, tolerance_seconds: float = 5.0) -> bool: + """Validate that a media file duration is within tolerance of an expected value. + + The function resolves the actual duration in seconds by calling + :func:`media.ffprobe.get_media_duration`, converts ``expected_ms`` from + milliseconds to seconds, and compares the absolute delta. + + Returns: + ``True`` when ``abs(actual_seconds - expected_seconds) <= tolerance_seconds``. + ``False`` when the duration falls outside tolerance or probing fails. + + Constraints: + - ``expected_ms`` and ``tolerance_seconds`` must be non-negative. + - Any ffprobe/probe parsing error is handled non-fatally and returns ``False``. + """ + if expected_ms < 0: + logger.warning("Duration validation failed: expected_ms must be non-negative") + return False + if tolerance_seconds < 0: + logger.warning("Duration validation failed: tolerance_seconds must be non-negative") + return False + + try: + actual_duration_seconds = get_media_duration(file_path) + except Exception: + logger.exception("Failed to probe media duration for path=%s", file_path) + return False + + expected_seconds = expected_ms / 1000.0 + return abs(actual_duration_seconds - expected_seconds) <= tolerance_seconds diff --git a/metadata/__init__.py b/metadata/__init__.py index 1fe6989..2138d87 100644 --- a/metadata/__init__.py +++ b/metadata/__init__.py @@ -1,3 +1,7 @@ -from .queue import enqueue_metadata +try: + from .queue import enqueue_metadata +except ModuleNotFoundError: # pragma: no cover - optional deps may be absent in test env + def enqueue_metadata(*_args, **_kwargs): + raise RuntimeError("metadata queue dependencies are unavailable") __all__ = ["enqueue_metadata"] diff --git a/metadata/canonical.py b/metadata/canonical.py index 16bab95..bfe4d3e 100644 --- a/metadata/canonical.py +++ b/metadata/canonical.py @@ -1,7 +1,11 @@ import os +import logging + +import requests from engine.paths import DATA_DIR from metadata.canonical_cache import JsonCache +from metadata.providers.musicbrainz import MusicBrainzMetadataProvider from metadata.providers.spotify import SpotifyMetadataProvider @@ -31,6 +35,39 @@ def _spotify_credentials(config): return client_id, client_secret +def _spotify_oauth_token(config): + token = os.environ.get("SPOTIFY_OAUTH_ACCESS_TOKEN") + if token: + return str(token).strip() or None + if not isinstance(config, dict): + return None + spotify_cfg = config.get("spotify") or {} + canonical_cfg = config.get("canonical_metadata") or {} + token = spotify_cfg.get("oauth_access_token") or canonical_cfg.get("spotify_oauth_access_token") + if token: + return str(token).strip() or None + return None + + +def _validate_spotify_premium(access_token): + token = str(access_token or "").strip() + if not token: + return False + try: + response = requests.get( + "https://api.spotify.com/v1/me", + headers={"Authorization": f"Bearer {token}"}, + timeout=10, + ) + except Exception: + logging.exception("Spotify premium validation failed") + return False + if response.status_code != 200: + return False + payload = response.json() if response.content else {} + return str(payload.get("product") or "").strip().lower() == "premium" + + def _min_confidence(config, default): if not isinstance(config, dict): return default @@ -65,17 +102,38 @@ def __init__(self, *, config=None): spotify_cache = JsonCache(os.path.join(cache_dir, "spotify.json"), ttl_seconds=ttl_seconds) spotify_id, spotify_secret = _spotify_credentials(config) + spotify_oauth_token = _spotify_oauth_token(config) spotify_min = _min_confidence(config, 0.92) + mb_min = _min_confidence(config, 0.70) + + self.musicbrainz = MusicBrainzMetadataProvider(min_confidence=mb_min) + self.spotify_enabled = bool( + spotify_id + and spotify_secret + and spotify_oauth_token + and _validate_spotify_premium(spotify_oauth_token) + ) self.spotify = SpotifyMetadataProvider( client_id=spotify_id, client_secret=spotify_secret, + access_token=spotify_oauth_token, cache=spotify_cache, min_confidence=spotify_min, ) def resolve_track(self, artist, track, *, album=None): - return self.spotify.resolve_track(artist, track, album=album) + mb = self.musicbrainz.resolve_track(artist, track, album=album) + if mb: + return mb + if self.spotify_enabled: + return self.spotify.resolve_track(artist, track, album=album) + return None def resolve_album(self, artist, album): - return self.spotify.resolve_album(artist, album) + mb = self.musicbrainz.resolve_album(artist, album) + if mb: + return mb + if self.spotify_enabled: + return self.spotify.resolve_album(artist, album) + return None diff --git a/metadata/merge.py b/metadata/merge.py new file mode 100644 index 0000000..45cf944 --- /dev/null +++ b/metadata/merge.py @@ -0,0 +1,140 @@ +"""Metadata merge logic for Spotify, MusicBrainz, and yt-dlp sources.""" + +from __future__ import annotations + +import logging +import re +from typing import Any + +from metadata.types import CanonicalMetadata + +_LOG = logging.getLogger(__name__) +_WS_RE = re.compile(r"\s+") +_TITLE_SPLIT_RE = re.compile(r"([\s\-\(\)\[\]/:&])") +_LOWER_WORDS = {"a", "an", "and", "as", "at", "by", "for", "in", "of", "on", "or", "the", "to", "vs"} + + +def merge_metadata(spotify: dict, mb: dict, ytdlp: dict) -> CanonicalMetadata: + """Merge metadata with precedence Spotify -> MusicBrainz -> yt-dlp and normalized outputs.""" + sp = spotify or {} + mbd = mb or {} + ytd = ytdlp or {} + + def pick(field: str, extractor) -> tuple[Any, str]: + for source_name, source in (("spotify", sp), ("musicbrainz", mbd), ("ytdlp", ytd)): + value = extractor(source) + if _has_value(value): + _LOG.info("metadata_field_source field=%s source=%s", field, source_name) + return value, source_name + _LOG.info("metadata_field_source field=%s source=missing", field) + return None, "missing" + + title, _ = pick("title", lambda s: s.get("title") or s.get("track")) + artist, _ = pick("artist", lambda s: s.get("artist")) + album, _ = pick("album", lambda s: s.get("album")) + album_artist, _ = pick("album_artist", lambda s: s.get("album_artist")) + track_num, _ = pick("track_num", lambda s: s.get("track_num") or s.get("track_number")) + disc_num, _ = pick("disc_num", lambda s: s.get("disc_num") or s.get("disc_number")) + date, _ = pick("date", lambda s: s.get("date") or s.get("release_date") or s.get("year")) + genre, _ = pick("genre", lambda s: s.get("genre")) + isrc, _ = pick("isrc", lambda s: s.get("isrc")) + mbid, _ = pick( + "mbid", + lambda s: s.get("mbid") or s.get("recording_id") or s.get("musicbrainz_recording_id"), + ) + artwork, _ = pick("artwork", lambda s: s.get("artwork")) + lyrics, _ = pick("lyrics", lambda s: s.get("lyrics")) + + return CanonicalMetadata( + title=_normalize_title(title) or "Unknown Title", + artist=_normalize_string(artist) or "Unknown Artist", + album=_normalize_title(album) or "Unknown Album", + album_artist=_normalize_string(album_artist) or _normalize_string(artist) or "Unknown Artist", + track_num=_parse_positive_int(track_num, default=1), + disc_num=_parse_positive_int(disc_num, default=1), + date=_normalize_string(date) or "Unknown", + genre=_normalize_title(_genre_to_string(genre)) or "Unknown", + isrc=_normalize_string(isrc), + mbid=_normalize_string(mbid), + artwork=_coerce_artwork_bytes(artwork), + lyrics=_normalize_string(lyrics), + ) + + +def _has_value(value: Any) -> bool: + if value is None: + return False + if isinstance(value, str): + return bool(value.strip()) + if isinstance(value, (bytes, bytearray)): + return len(value) > 0 + if isinstance(value, list): + return len(value) > 0 + return True + + +def _normalize_string(value: Any) -> str | None: + if value is None: + return None + text = _WS_RE.sub(" ", str(value)).strip() + return text or None + + +def _normalize_title(value: Any) -> str | None: + base = _normalize_string(value) + if not base: + return None + parts = _TITLE_SPLIT_RE.split(base) + out: list[str] = [] + major_seen = False + for token in parts: + if not token: + continue + if _TITLE_SPLIT_RE.fullmatch(token): + out.append(token) + continue + lower = token.lower() + if major_seen and lower in _LOWER_WORDS: + out.append(lower) + elif token.isupper() and len(token) > 1: + out.append(token) + else: + out.append(token[:1].upper() + token[1:].lower()) + major_seen = True + return "".join(out) + + +def _parse_positive_int(value: Any, *, default: int) -> int: + if value is None: + return default + text = str(value).strip() + if not text: + return default + if "/" in text: + text = text.split("/", 1)[0].strip() + try: + parsed = int(text) + except ValueError: + return default + return parsed if parsed > 0 else default + + +def _genre_to_string(value: Any) -> str | None: + if value is None: + return None + if isinstance(value, list): + parts = [_normalize_string(v) for v in value] + cleaned = [p for p in parts if p] + return ", ".join(cleaned) if cleaned else None + return _normalize_string(value) + + +def _coerce_artwork_bytes(value: Any) -> bytes | None: + if value is None: + return None + if isinstance(value, bytes): + return value or None + if isinstance(value, bytearray): + data = bytes(value) + return data or None + return None diff --git a/metadata/naming.py b/metadata/naming.py new file mode 100644 index 0000000..2dd43fc --- /dev/null +++ b/metadata/naming.py @@ -0,0 +1,47 @@ +"""Canonical music naming helpers used by runtime path construction.""" + +from __future__ import annotations + +import re +from typing import Any + +_INVALID_FS_CHARS_RE = re.compile(r'[<>:"/\\|?*]') +_MULTISPACE_RE = re.compile(r"\s+") + + +def _get_field(metadata: Any, field: str, default: Any = None) -> Any: + if isinstance(metadata, dict): + return metadata.get(field, default) + return getattr(metadata, field, default) + + +def sanitize_component(text: Any) -> str: + """Return an OS-safe filesystem component with stable fallback.""" + sanitized = _INVALID_FS_CHARS_RE.sub("", str(text or "")) + sanitized = _MULTISPACE_RE.sub(" ", sanitized).strip() + sanitized = sanitized.rstrip(" .") + return sanitized or "Unknown" + + +def build_album_directory(metadata: Any) -> str: + """Build canonical album directory name, including year when available.""" + album = sanitize_component(_get_field(metadata, "album") or "Unknown Album") + date_value = str(_get_field(metadata, "date") or "").strip() + year = date_value[:4] if len(date_value) >= 4 and date_value[:4].isdigit() else "" + return f"{album} ({year})" if year else album + + +def build_track_filename(metadata: Any) -> str: + """Build canonical track filename with zero-padded track number.""" + title = sanitize_component(_get_field(metadata, "title") or "Unknown Title") + + track_num_raw = _get_field(metadata, "track_num", None) + track_num = int(track_num_raw) if isinstance(track_num_raw, int) else 0 + if track_num < 0: + track_num = 0 + + ext = str(_get_field(metadata, "ext") or "").lstrip(".") + filename = f"{track_num:02d} - {title}" + if ext: + return f"{filename}.{ext}" + return filename diff --git a/metadata/normalize.py b/metadata/normalize.py new file mode 100644 index 0000000..d10242a --- /dev/null +++ b/metadata/normalize.py @@ -0,0 +1,220 @@ +"""Normalization helpers for structured music metadata.""" + +from __future__ import annotations + +import logging +import re +import unicodedata +from datetime import date +from typing import Any + +from metadata.types import CanonicalMetadata + +logger = logging.getLogger(__name__) + +_WHITESPACE_RE = re.compile(r"\s+") +_YEAR_RE = re.compile(r"^(\d{4})") +_DATE_RE = re.compile(r"^(\d{4})[-/](\d{1,2})[-/](\d{1,2})$") +_YEAR_MONTH_RE = re.compile(r"^(\d{4})[-/](\d{1,2})$") +_TITLE_SUFFIX_RE = re.compile( + r"\s*(?:\((?:official audio|official video|audio)\)|\[(?:hd)\])\s*$", + re.IGNORECASE, +) +_TOPIC_SUFFIX_RE = re.compile(r"\s*-\s*topic\s*$", re.IGNORECASE) +_TRAILING_HYPHENS_RE = re.compile(r"(?:\s*-\s*)+$") +_FEAT_SPLIT_RE = re.compile(r"^(?P
.+?)\s+(?:feat\.|ft\.)\s+(?P.+)$", re.IGNORECASE) +_TITLE_FEAT_RE = re.compile(r"\(\s*feat\.\s*([^)]+)\)", re.IGNORECASE) + + +def normalize_music_metadata(metadata: CanonicalMetadata) -> CanonicalMetadata: + """Return a normalized copy of ``CanonicalMetadata`` without mutating the input. + + Responsibilities: + - Normalize all string fields to Unicode NFC. + This matters for media-library grouping because visually identical Unicode + strings can have different binary forms; NFC avoids duplicate album/artist + buckets caused by mixed normalization forms. + - Strip leading/trailing whitespace. + - Collapse repeated internal whitespace to single spaces. + - Normalize ``track_num`` and ``disc_num`` to integers. + - Normalize ``date`` to ``YYYY`` or ``YYYY-MM-DD`` when parseable. + - Ensure ``album_artist`` is non-empty by falling back to ``artist``. + + The returned value is always a newly constructed ``CanonicalMetadata`` instance. + """ + # NFC normalization is applied via _normalize_text for stable player grouping. + title = clean_title(_normalize_text(metadata.title)) or "Unknown Title" + artist = _normalize_text(metadata.artist) or "Unknown Artist" + artist, title = normalize_featured_artists(artist, title) + album = _normalize_text(metadata.album) or "Unknown Album" + # Media players group albums by album_artist; blank/variant values fragment one album. + album_artist_raw = _normalize_optional_text(metadata.album_artist) + if not album_artist_raw: + # Fallback to track artist so all tracks in the same release can group together. + album_artist = artist + else: + album_artist = album_artist_raw + # When artist fields include comma-separated collaborators, keep primary artist for grouping. + album_artist = _primary_artist(album_artist) + genre = _normalize_genre(metadata.genre) or "Unknown" + normalized_date = _normalize_release_date(metadata.date) or "Unknown" + + isrc = _normalize_optional_text(metadata.isrc) + mbid = _normalize_optional_text(metadata.mbid) + lyrics = _normalize_optional_text(metadata.lyrics) + artwork = bytes(metadata.artwork) if metadata.artwork is not None else None + + track_num = _normalize_positive_int(metadata.track_num, default=1) + disc_num = _normalize_positive_int(metadata.disc_num, default=1) + + return CanonicalMetadata( + title=title, + artist=artist, + album=album, + album_artist=album_artist, + track_num=track_num, + disc_num=disc_num, + date=normalized_date, + genre=genre, + isrc=isrc, + mbid=mbid, + artwork=artwork, + lyrics=lyrics, + ) + + +def clean_title(title: str) -> str: + """Return a deterministically cleaned track title. + + Cleanup rules: + - Remove trailing ``(Official Audio)``, ``(Official Video)``, ``(Audio)``, and ``[HD]``. + - Remove trailing ``- Topic``. + - Remove trailing hyphen artifacts. + - Preserve other parenthetical context such as ``(Live)``. + """ + cleaned = _normalize_text(title) + while True: + updated = _TITLE_SUFFIX_RE.sub("", cleaned) + updated = _TOPIC_SUFFIX_RE.sub("", updated) + updated = _TRAILING_HYPHENS_RE.sub("", updated) + updated = _normalize_text(updated) if updated else "" + if updated == cleaned: + break + cleaned = updated + return cleaned + + +def normalize_featured_artists(artist: str, title: str) -> tuple[str, str]: + """Normalize featured artist credits between artist and title fields. + + If ``artist`` includes ``feat.``/``ft.`` credits, move the featured segment + into ``title`` as ``(feat. X)`` and keep only the main artist name in + ``artist``. Existing title feat credits are preserved and not duplicated. + Matching is case-insensitive. + """ + normalized_artist = _normalize_text(artist) + normalized_title = _normalize_text(title) + + match = _FEAT_SPLIT_RE.match(normalized_artist) + if not match: + return normalized_artist, normalized_title + + main_artist = _normalize_text(match.group("main")) + featured_segment = _normalize_text(match.group("feat")) + if not featured_segment: + return main_artist, normalized_title + + existing = {_normalize_text(item).lower() for item in _TITLE_FEAT_RE.findall(normalized_title)} + if featured_segment.lower() in existing: + return main_artist, normalized_title + + return main_artist, f"{normalized_title} (feat. {featured_segment})" + + +def _normalize_text(value: str) -> str: + return _WHITESPACE_RE.sub(" ", unicodedata.normalize("NFC", value).strip()) + + +def _normalize_optional_text(value: str | None) -> str | None: + if value is None: + return None + normalized = _normalize_text(value) + return normalized or None + + +def _primary_artist(value: str) -> str: + primary = value.split(",", 1)[0] + normalized = _normalize_text(primary) + return normalized or value + + +def _normalize_genre(value: Any) -> str | None: + if value is None: + return None + + raw_parts: list[str] + if isinstance(value, list): + raw_parts = [str(part) for part in value] + else: + raw_parts = re.split(r"[;,]", str(value)) + + seen: set[str] = set() + ordered: list[str] = [] + for part in raw_parts: + normalized = _normalize_text(part) + if not normalized: + continue + key = normalized.casefold() + if key in seen: + continue + seen.add(key) + ordered.append(normalized) + + if not ordered: + return None + return ", ".join(ordered) + + +def _normalize_positive_int(value: int, *, default: int) -> int: + try: + parsed = int(value) + except (TypeError, ValueError): + return default + return parsed if parsed > 0 else default + + +def _normalize_release_date(value: str) -> str | None: + normalized = _normalize_text(value) + if not normalized: + return None + + # YYYY + if normalized.isdigit() and len(normalized) == 4: + return normalized + + # YYYY-MM -> YYYY + year_month_match = _YEAR_MONTH_RE.match(normalized) + if year_month_match: + year_s, month_s = year_month_match.groups() + month = int(month_s) + if 1 <= month <= 12: + return year_s + + # YYYY-MM-DD (or slash-separated equivalent) -> YYYY-MM-DD + match = _DATE_RE.match(normalized) + if match: + year_s, month_s, day_s = match.groups() + try: + parsed = date(int(year_s), int(month_s), int(day_s)) + except ValueError: + return _YEAR_RE.match(normalized).group(1) if _YEAR_RE.match(normalized) else None + return parsed.isoformat() + + # Invalid formats: strip to first 4 digits when present. + year_match = _YEAR_RE.match(normalized) + if year_match: + return year_match.group(1) + + # No usable year; keep the source value but surface inconsistency. + logger.warning("unparseable release date; preserving original value=%s", normalized) + return normalized diff --git a/metadata/providers/artwork.py b/metadata/providers/artwork.py index 8307f87..97154ec 100644 --- a/metadata/providers/artwork.py +++ b/metadata/providers/artwork.py @@ -1,23 +1,23 @@ import io import logging -import requests from PIL import Image +from metadata.services.musicbrainz_service import get_musicbrainz_service def fetch_artwork(release_id, max_size_px=1500): if not release_id: return None - url = f"https://coverartarchive.org/release/{release_id}/front" + service = get_musicbrainz_service() try: - response = requests.get(url, timeout=10) - if response.status_code != 200: + payload = service.fetch_cover_art(release_id, timeout=10) + if not payload: return None except Exception: logging.debug("Artwork download failed for release %s", release_id) return None - content_type = response.headers.get("Content-Type", "image/jpeg") - data = response.content + content_type = payload.get("mime", "image/jpeg") + data = payload.get("data") try: image = Image.open(io.BytesIO(data)) if max_size_px: diff --git a/metadata/providers/canonical_musicbrainz.py b/metadata/providers/canonical_musicbrainz.py deleted file mode 100644 index d5092c1..0000000 --- a/metadata/providers/canonical_musicbrainz.py +++ /dev/null @@ -1,212 +0,0 @@ -import logging - -import musicbrainzngs - -from engine.search_scoring import token_overlap_score, tokenize -from metadata.providers.base import CanonicalMetadataProvider - - -_USER_AGENT_SET = False - - -def _ensure_user_agent(): - global _USER_AGENT_SET - if _USER_AGENT_SET: - return - logging.getLogger("musicbrainzngs").setLevel(logging.WARNING) - musicbrainzngs.set_useragent( - "retreivr", - "0.9.0", - "https://github.com/Retreivr/retreivr", - ) - _USER_AGENT_SET = True - - -def _release_year(value): - if not value: - return None - return str(value).split("-")[0] - - -def _score_track_match(artist, track, album, recording): - artist_name = _extract_artist(recording) - artist_score = token_overlap_score(tokenize(artist), tokenize(artist_name)) - track_score = token_overlap_score(tokenize(track), tokenize(recording.get("title"))) - if album: - album_score = token_overlap_score(tokenize(album), tokenize(_extract_album_title(recording))) - score = 0.55 * track_score + 0.35 * artist_score + 0.10 * album_score - else: - score = 0.60 * track_score + 0.40 * artist_score - return score - - -def _score_album_match(artist, album, release): - artist_name = _extract_release_artist(release) - artist_score = token_overlap_score(tokenize(artist), tokenize(artist_name)) - album_score = token_overlap_score(tokenize(album), tokenize(release.get("title"))) - return 0.6 * album_score + 0.4 * artist_score - - -def _extract_artist(rec): - credit = rec.get("artist-credit") or [] - if credit and isinstance(credit[0], dict): - artist = credit[0].get("artist", {}).get("name") - if artist: - return artist - return rec.get("artist-credit-phrase") - - -def _extract_release_artist(release): - if not release: - return None - credit = release.get("artist-credit") or [] - if credit and isinstance(credit[0], dict): - return credit[0].get("artist", {}).get("name") - return release.get("artist-credit-phrase") - - -def _extract_album_title(rec): - release_list = rec.get("release-list") or [] - if not release_list: - return None - return release_list[0].get("title") - - -def _parse_duration(value): - try: - if value is None: - return None - return int(round(int(value) / 1000)) - except Exception: - return None - - -def _cover_art_url(release_id): - if not release_id: - return None - return f"https://coverartarchive.org/release/{release_id}/front" - - -class MusicBrainzMetadataProvider(CanonicalMetadataProvider): - def __init__(self, *, cache=None, min_confidence=0.90): - self.cache = cache - self.min_confidence = float(min_confidence or 0.90) - - def resolve_track(self, artist, track, album=None): - if not artist or not track: - return None - cache_key = None - if self.cache: - cache_key = f"mb:track:{artist}|{track}|{album or ''}" - cached = self.cache.get(cache_key) - if cached: - return cached - _ensure_user_agent() - query = {"artist": artist, "recording": track} - if album: - query["release"] = album - try: - result = musicbrainzngs.search_recordings(limit=8, **query) - except Exception: - logging.exception("MusicBrainz search failed") - return None - recordings = result.get("recording-list") or [] - best = None - best_score = 0.0 - for rec in recordings: - score = _score_track_match(artist, track, album, rec) - if score > best_score: - best = rec - best_score = score - if not best or best_score < self.min_confidence: - return None - release_list = best.get("release-list") or [] - release = release_list[0] if release_list else {} - release_id = release.get("id") - canonical = { - "kind": "track", - "provider": "musicbrainz", - "artist": _extract_artist(best) or artist, - "album": release.get("title") or album, - "track": best.get("title") or track, - "release_year": _release_year(release.get("date")), - "duration_sec": _parse_duration(best.get("length")), - "artwork": [ - {"url": _cover_art_url(release_id), "width": None, "height": None} - ] - if release_id - else [], - "external_ids": { - "musicbrainz_recording_id": best.get("id"), - "musicbrainz_release_id": release_id, - }, - } - if self.cache and cache_key: - self.cache.set(cache_key, canonical) - return canonical - - def resolve_album(self, artist, album): - if not artist or not album: - return None - cache_key = None - if self.cache: - cache_key = f"mb:album:{artist}|{album}" - cached = self.cache.get(cache_key) - if cached: - return cached - _ensure_user_agent() - query = {"artist": artist, "release": album} - try: - result = musicbrainzngs.search_releases(limit=5, **query) - except Exception: - logging.exception("MusicBrainz album search failed") - return None - releases = result.get("release-list") or [] - best = None - best_score = 0.0 - for release in releases: - score = _score_album_match(artist, album, release) - if score > best_score: - best = release - best_score = score - if not best or best_score < self.min_confidence: - return None - release_id = best.get("id") - tracks = [] - if release_id: - try: - release_data = musicbrainzngs.get_release_by_id(release_id, includes=["recordings"]) - media = (release_data.get("release") or {}).get("medium-list") or [] - for medium in media: - for track_data in medium.get("track-list") or []: - recording = track_data.get("recording") or {} - tracks.append( - { - "title": track_data.get("title") or recording.get("title"), - "duration_sec": _parse_duration(track_data.get("length") or recording.get("length")), - "track_number": track_data.get("position") or track_data.get("number"), - "disc_number": medium.get("position"), - } - ) - except Exception: - logging.debug("MusicBrainz release lookup failed for %s", release_id) - canonical = { - "kind": "album", - "provider": "musicbrainz", - "artist": _extract_release_artist(best) or artist, - "album": best.get("title") or album, - "release_year": _release_year(best.get("date")), - "artwork": [ - {"url": _cover_art_url(release_id), "width": None, "height": None} - ] - if release_id - else [], - "external_ids": { - "musicbrainz_release_id": release_id, - }, - "track_count": int(best.get("track-count") or 0) or None, - "tracks": tracks, - } - if self.cache and cache_key: - self.cache.set(cache_key, canonical) - return canonical diff --git a/metadata/providers/musicbrainz.py b/metadata/providers/musicbrainz.py index e21a6e5..274e19e 100644 --- a/metadata/providers/musicbrainz.py +++ b/metadata/providers/musicbrainz.py @@ -1,36 +1,17 @@ import logging -import musicbrainzngs +from engine.search_scoring import token_overlap_score, tokenize +from metadata.providers.base import CanonicalMetadataProvider +from metadata.services.musicbrainz_service import get_musicbrainz_service -_USER_AGENT_SET = False -_RELEASE_CACHE = {} - - -def _ensure_user_agent(): - global _USER_AGENT_SET - if _USER_AGENT_SET: - return - logging.getLogger("musicbrainzngs").setLevel(logging.WARNING) - musicbrainzngs.set_useragent( - "retreivr", - "0.9.0", - "https://github.com/Retreivr/retreivr", - ) - _USER_AGENT_SET = True def search_recordings(artist, title, album=None, limit=5): if not artist or not title: return [] - _ensure_user_agent() - query = { - "artist": artist, - "recording": title, - } - if album: - query["release"] = album + service = get_musicbrainz_service() try: - result = musicbrainzngs.search_recordings(limit=limit, **query) + result = service.search_recordings(artist, title, album=album, limit=limit) except Exception: logging.exception("MusicBrainz search failed") return [] @@ -57,8 +38,8 @@ def _recording_to_candidate(rec): release = release_list[0] release_id = release.get("id") release_date = release.get("date") - if release_id and recording_id: - track_number = _find_track_number(release_id, recording_id) + # Avoid per-candidate release lookups here; defer to best-candidate resolution. + track_number = None year = release_date.split("-")[0] if release_date else None return { "recording_id": recording_id, @@ -100,20 +81,18 @@ def _parse_duration(value): return None -def _find_track_number(release_id, recording_id): - if release_id in _RELEASE_CACHE: - release_data = _RELEASE_CACHE[release_id] - else: - _ensure_user_agent() +def _find_track_number(release_id, recording_id, *, release_lookup_cache=None): + cache = release_lookup_cache if isinstance(release_lookup_cache, dict) else {} + release_data = cache.get(release_id) + service = get_musicbrainz_service() + if release_data is None: try: - release_data = musicbrainzngs.get_release_by_id( - release_id, - includes=["recordings"], - ) - _RELEASE_CACHE[release_id] = release_data + release_data = service.get_release(release_id, includes=["recordings"]) except Exception: logging.debug("MusicBrainz release lookup failed for %s", release_id) return None + if cache is not None: + cache[release_id] = release_data media = (release_data.get("release") or {}).get("medium-list") or [] for medium in media: tracks = medium.get("track-list") or [] @@ -122,3 +101,114 @@ def _find_track_number(release_id, recording_id): if recording.get("id") == recording_id: return track.get("position") or track.get("number") return None + + +def _year(value): + if not value: + return None + return str(value).split("-")[0] + + +def _score_track_match(artist, track, album, candidate): + artist_score = token_overlap_score(tokenize(artist), tokenize(candidate.get("artist"))) + track_score = token_overlap_score(tokenize(track), tokenize(candidate.get("title"))) + if album: + album_score = token_overlap_score(tokenize(album), tokenize(candidate.get("album"))) + return (0.55 * track_score) + (0.35 * artist_score) + (0.10 * album_score) + return (0.60 * track_score) + (0.40 * artist_score) + + +def _score_album_match(artist, album, candidate): + artist_score = token_overlap_score(tokenize(artist), tokenize(candidate.get("artist_credit"))) + album_score = token_overlap_score(tokenize(album), tokenize(candidate.get("title"))) + return (0.60 * album_score) + (0.40 * artist_score) + + +class MusicBrainzMetadataProvider(CanonicalMetadataProvider): + def __init__(self, *, min_confidence=0.70): + self.min_confidence = float(min_confidence or 0.70) + + def resolve_track(self, artist, track, *, album=None): + if not artist or not track: + return None + candidates = search_recordings(artist, track, album=album, limit=8) + best_item = None + best_score = 0.0 + for item in candidates: + score = _score_track_match(artist, track, album, item) + if score > best_score: + best_score = score + best_item = item + if not best_item or best_score < self.min_confidence: + return None + track_number = best_item.get("track_number") + if not track_number and best_item.get("release_id") and best_item.get("recording_id"): + track_number = _find_track_number( + best_item.get("release_id"), + best_item.get("recording_id"), + release_lookup_cache={}, + ) + return { + "kind": "track", + "provider": "musicbrainz", + "artist": best_item.get("artist") or artist, + "album": best_item.get("album") or album, + "track": best_item.get("title") or track, + "release_year": _year(best_item.get("year")), + "album_type": None, + "duration_sec": best_item.get("duration"), + "artwork": [], + "external_ids": { + "musicbrainz_recording_id": best_item.get("recording_id"), + "musicbrainz_release_id": best_item.get("release_id"), + "isrc": None, + }, + "track_number": track_number, + "disc_number": None, + "album_track_count": None, + } + + def resolve_album(self, artist, album): + if not artist or not album: + return None + service = get_musicbrainz_service() + candidates = service.search_release_groups(f"{artist} {album}", limit=5) + best_item = None + best_score = 0.0 + for item in candidates: + score = _score_album_match(artist, album, item) + if score > best_score: + best_score = score + best_item = item + if not best_item or best_score < self.min_confidence: + return None + + release_group_id = best_item.get("release_group_id") + selection = service.pick_best_release_with_reason(release_group_id) + release_id = selection.get("release_id") if isinstance(selection, dict) else None + tracks_payload = service.fetch_release_tracks(release_id) if release_id else [] + tracks = [] + for item in tracks_payload: + tracks.append( + { + "title": item.get("title"), + "duration_sec": int((item.get("duration_ms") or 0) / 1000) if item.get("duration_ms") else None, + "track_number": item.get("track_number"), + "disc_number": item.get("disc_number"), + } + ) + return { + "kind": "album", + "provider": "musicbrainz", + "artist": best_item.get("artist_credit") or artist, + "album": best_item.get("title") or album, + "release_year": _year(best_item.get("first_release_date")), + "album_type": best_item.get("primary_type"), + "artwork": [], + "external_ids": { + "musicbrainz_release_group_id": release_group_id, + "musicbrainz_release_id": release_id, + }, + "track_count": len(tracks) if tracks else None, + "tracks": tracks, + } diff --git a/metadata/providers/spotify.py b/metadata/providers/spotify.py index 525bf5e..1a3111f 100644 --- a/metadata/providers/spotify.py +++ b/metadata/providers/spotify.py @@ -42,18 +42,21 @@ def _score_album_match(artist, album, candidate): class SpotifyMetadataProvider(CanonicalMetadataProvider): - def __init__(self, *, client_id, client_secret, cache=None, min_confidence=0.92): + def __init__(self, *, client_id, client_secret, access_token=None, cache=None, min_confidence=0.92): self.client_id = client_id self.client_secret = client_secret + self.access_token = (access_token or "").strip() or None self.cache = cache self.min_confidence = float(min_confidence or 0.92) self._token = None self._token_expires_at = 0 def _has_credentials(self): - return bool(self.client_id and self.client_secret) + return bool(self.access_token or (self.client_id and self.client_secret)) def _get_token(self): + if self.access_token: + return self.access_token if not self._has_credentials(): return None now = time.time() diff --git a/metadata/queue.py b/metadata/queue.py index 417fce2..524cbaa 100644 --- a/metadata/queue.py +++ b/metadata/queue.py @@ -52,6 +52,8 @@ def enqueue_metadata(file_path, meta, config): with _LOCK: global _WORKER if _WORKER is None or not _WORKER.is_alive(): + # TODO(metadata/queue.py::enqueue_metadata): migrate MetadataWorker lifecycle into the + # unified engine download/metadata execution path (engine.job_queue postprocessing hook). _WORKER = MetadataWorker(_QUEUE) _WORKER.start() logging.info("Music metadata worker started") diff --git a/metadata/services/__init__.py b/metadata/services/__init__.py new file mode 100644 index 0000000..c860484 --- /dev/null +++ b/metadata/services/__init__.py @@ -0,0 +1,3 @@ +from metadata.services.musicbrainz_service import MusicBrainzService, get_musicbrainz_service + +__all__ = ["MusicBrainzService", "get_musicbrainz_service"] diff --git a/metadata/services/musicbrainz_service.py b/metadata/services/musicbrainz_service.py new file mode 100644 index 0000000..f7cde4a --- /dev/null +++ b/metadata/services/musicbrainz_service.py @@ -0,0 +1,592 @@ +import logging +import os +import re +import threading +import time +from collections import OrderedDict +from datetime import datetime + +import musicbrainzngs +import requests + + +logger = logging.getLogger(__name__) +MUSICBRAINZ_USER_AGENT = os.getenv( + "MUSICBRAINZ_USER_AGENT", + "Retreivr/1.0 (+https://github.com/retreivr/retreivr)", +) + +_DEFAULT_MAX_CACHE_ENTRIES = 512 +_DEFAULT_CACHE_TTL_SECONDS = 6 * 60 * 60 +_DEFAULT_COVER_CACHE_TTL_SECONDS = 24 * 60 * 60 +_DEFAULT_MIN_INTERVAL_SECONDS = 1.0 +_SEARCH_TTL_SECONDS = 24 * 60 * 60 +_RELEASE_GROUP_TTL_SECONDS = 24 * 60 * 60 +_RELEASE_TRACKS_TTL_SECONDS = 7 * 24 * 60 * 60 +_NOISE_WORDS = { + "album", + "full", + "official", + "audio", + "music", + "track", + "single", + "version", + "deluxe", + "remastered", + "bonus", +} + + +class _TTLCache: + def __init__(self, *, max_entries=_DEFAULT_MAX_CACHE_ENTRIES, ttl_seconds=_DEFAULT_CACHE_TTL_SECONDS): + self.max_entries = int(max_entries) + self.ttl_seconds = int(ttl_seconds) + self._lock = threading.Lock() + self._entries = OrderedDict() + + def get(self, key): + now = time.time() + with self._lock: + value = self._entries.get(key) + if not value: + return None + expires_at, payload = value + if expires_at < now: + self._entries.pop(key, None) + return None + self._entries.move_to_end(key) + return payload + + def set(self, key, payload, *, ttl_seconds=None): + ttl = self.ttl_seconds if ttl_seconds is None else max(1, int(ttl_seconds)) + expires_at = time.time() + ttl + with self._lock: + self._entries[key] = (expires_at, payload) + self._entries.move_to_end(key) + while len(self._entries) > self.max_entries: + self._entries.popitem(last=False) + + +class MusicBrainzService: + def __init__(self, *, debug=None): + self._init_lock = threading.Lock() + self._initialized = False + self._cache = _TTLCache() + self._cover_cache = _TTLCache(ttl_seconds=_DEFAULT_COVER_CACHE_TTL_SECONDS) + self._request_lock = threading.Lock() + self._last_request_ts = 0.0 + if debug is None: + env_debug = str(os.environ.get("RETREIVR_MUSICBRAINZ_DEBUG", "")).strip().lower() + self._debug = env_debug in {"1", "true", "yes", "on"} + else: + self._debug = bool(debug) + self._metrics_lock = threading.Lock() + self._metrics = { + "total_requests": 0, + "cache_hits": 0, + "cache_misses": 0, + "retries": 0, + "cover_art_requests": 0, + "cover_art_failures": 0, + } + + def _debug_log(self, message, *args): + if self._debug: + logger.debug(message, *args) + + def _inc_metric(self, key, amount=1): + with self._metrics_lock: + self._metrics[key] = int(self._metrics.get(key, 0)) + int(amount) + + def _respect_rate_limit(self): + with self._request_lock: + now = time.monotonic() + wait_for = _DEFAULT_MIN_INTERVAL_SECONDS - (now - self._last_request_ts) + if wait_for > 0: + self._debug_log("[MUSICBRAINZ] rate-limit sleep %.3fs", wait_for) + time.sleep(wait_for) + self._last_request_ts = time.monotonic() + + def _ensure_initialized(self): + if self._initialized: + return + with self._init_lock: + if self._initialized: + return + logging.getLogger("musicbrainzngs").setLevel(logging.WARNING) + musicbrainzngs.set_useragent( + "retreivr", + "1.0", + "https://github.com/retreivr/retreivr", + ) + if hasattr(musicbrainzngs, "set_rate_limit"): + try: + musicbrainzngs.set_rate_limit(1.0, 1) + except TypeError: + try: + musicbrainzngs.set_rate_limit(limit_or_interval=1.0, new_requests=1) + except Exception: + pass + except Exception: + pass + self._initialized = True + + def _safe_int(self, value, default=0): + try: + return int(value) + except (TypeError, ValueError): + return default + + def _tokenize(self, text): + return [tok for tok in re.split(r"[^a-z0-9]+", (text or "").lower()) if tok] + + def _remove_noise_tokens(self, tokens): + return [tok for tok in tokens if tok not in _NOISE_WORDS] + + def _split_artist_album(self, query): + text = str(query or "").strip() + if not text: + return "", "" + lowered = text.lower() + for sep in (" - ", " – ", " — ", " by ", " : "): + idx = lowered.find(sep) + if idx > 0: + left = text[:idx].strip() + right = text[idx + len(sep) :].strip() + if left and right: + return left, right + raw_tokens = [tok for tok in re.split(r"\s+", text) if tok] + if len(raw_tokens) < 3: + return text, text + split_at = max(1, len(raw_tokens) // 2) + artist = " ".join(raw_tokens[:split_at]).strip() + album = " ".join(raw_tokens[split_at:]).strip() + return artist or text, album or text + + def _lucene_escape(self, text): + return str(text or "").replace("\\", "\\\\").replace('"', '\\"') + + def _artist_credit_text(self, artist_credit): + if not isinstance(artist_credit, list): + return "" + parts = [] + for part in artist_credit: + if isinstance(part, str): + parts.append(part) + continue + if isinstance(part, dict): + name = part.get("name") + if isinstance(name, str) and name.strip(): + parts.append(name.strip()) + join = part.get("joinphrase") + if isinstance(join, str) and join: + parts.append(join) + return "".join(parts).strip() + + def _token_overlap(self, query_tokens, text): + if not query_tokens or not text: + return 0.0 + a = set(query_tokens) + b = set(self._tokenize(text)) + if not a: + return 0.0 + return len(a & b) / len(a) + + def _parse_date(self, value): + text = str(value or "").strip() + if not text: + return None + for fmt in ("%Y-%m-%d", "%Y-%m", "%Y"): + try: + return datetime.strptime(text, fmt) + except ValueError: + continue + return None + + def _call_with_retry(self, fn, *, attempts=3, base_delay=0.3): + self._ensure_initialized() + last_error = None + for attempt in range(1, attempts + 1): + try: + self._respect_rate_limit() + self._inc_metric("total_requests") + return fn() + except Exception as exc: + last_error = exc + if attempt >= attempts: + break + self._inc_metric("retries") + delay = base_delay * (2 ** (attempt - 1)) + self._debug_log("[MUSICBRAINZ] retry attempt=%s delay=%.3fs error=%s", attempt, delay, exc) + time.sleep(base_delay * (2 ** (attempt - 1))) + if last_error: + raise last_error + return None + + def get_metrics(self): + with self._metrics_lock: + return dict(self._metrics) + + def cover_art_url(self, release_id): + rid = str(release_id or "").strip() + if not rid: + return None + return f"https://coverartarchive.org/release/{rid}/front" + + def search_recordings(self, artist, title, *, album=None, limit=5): + key = f"search_recordings:{artist}|{title}|{album or ''}|{int(limit or 5)}" + cached = self._cache.get(key) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", key) + return cached + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", key) + + query = {"artist": artist, "recording": title} + if album: + query["release"] = album + + payload = self._call_with_retry( + lambda: musicbrainzngs.search_recordings(limit=int(limit or 5), **query) + ) + self._cache.set(key, payload) + return payload + + def search_releases(self, artist, album, *, limit=5): + key = f"search_releases:{artist}|{album}|{int(limit or 5)}" + cached = self._cache.get(key) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", key) + return cached + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", key) + + query = {"artist": artist, "release": album} + payload = self._call_with_retry( + lambda: musicbrainzngs.search_releases(limit=int(limit or 5), **query) + ) + self._cache.set(key, payload) + return payload + + def get_release(self, release_id, *, includes=None): + rid = str(release_id or "").strip() + if not rid: + return None + includes_tuple = tuple(includes or ()) + key = f"get_release:{rid}|{','.join(includes_tuple)}" + cached = self._cache.get(key) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", key) + return cached + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", key) + + payload = self._call_with_retry( + lambda: musicbrainzngs.get_release_by_id( + rid, + includes=list(includes_tuple) if includes_tuple else [], + ) + ) + self._cache.set(key, payload) + return payload + + def get_recording(self, recording_id, *, includes=None): + rid = str(recording_id or "").strip() + if not rid: + return None + includes_tuple = tuple(includes or ()) + key = f"get_recording:{rid}|{','.join(includes_tuple)}" + cached = self._cache.get(key) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", key) + return cached + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", key) + + payload = self._call_with_retry( + lambda: musicbrainzngs.get_recording_by_id( + rid, + includes=list(includes_tuple) if includes_tuple else [], + ) + ) + self._cache.set(key, payload) + return payload + + def fetch_cover_art(self, release_id, *, timeout=10): + self._inc_metric("cover_art_requests") + url = self.cover_art_url(release_id) + if not url: + self._inc_metric("cover_art_failures") + return None + + cached = self._cover_cache.get(url) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", url) + return cached + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", url) + self._debug_log("[MUSICBRAINZ] cover art fetch url=%s", url) + + def _request(): + return requests.get(url, timeout=timeout) + + resp = self._call_with_retry(_request, attempts=3, base_delay=0.4) + if not resp or resp.status_code != 200: + self._inc_metric("cover_art_failures") + return None + payload = { + "url": url, + "data": resp.content, + "mime": resp.headers.get("Content-Type", "image/jpeg"), + } + self._cover_cache.set(url, payload) + return payload + + def search_release_groups(self, query, *, limit=10): + cleaned_query = str(query or "").strip() + if not cleaned_query: + return [] + query_tokens = self._tokenize(cleaned_query) + clean_tokens = self._remove_noise_tokens(query_tokens) or query_tokens + normalized_query = " ".join(clean_tokens).strip() or cleaned_query + artist_fragment, album_fragment = self._split_artist_album(normalized_query) + lucene_parts = ['primarytype:"album"'] + if artist_fragment: + lucene_parts.append(f'artist:"{self._lucene_escape(artist_fragment)}"') + if album_fragment: + lucene_parts.append(f'releasegroup:"{self._lucene_escape(album_fragment)}"') + else: + lucene_parts.append(f'"{self._lucene_escape(normalized_query)}"') + + cache_key = f"album_search:{cleaned_query}:{int(limit or 10)}" + cached = self._cache.get(cache_key) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", cache_key) + return cached + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", cache_key) + + payload = self._call_with_retry( + lambda: musicbrainzngs.search_release_groups( + query=" AND ".join(lucene_parts), + limit=max(10, min(int(limit or 10), 100)), + ) + ) + groups = payload.get("release-group-list", []) if isinstance(payload, dict) else [] + allow_live = "live" in clean_tokens + allow_compilation = "compilation" in clean_tokens + candidates = [] + for group in groups: + if not isinstance(group, dict): + continue + secondary_types_raw = group.get("secondary-type-list") or [] + secondary_types = [str(value) for value in secondary_types_raw if isinstance(value, str)] + secondary_lower = {value.lower() for value in secondary_types} + artist_credit = self._artist_credit_text(group.get("artist-credit")) + base_score = self._safe_int(group.get("ext:score"), default=0) + overlap = self._token_overlap(clean_tokens, artist_credit) + adjusted = base_score + int(overlap * 30) + if overlap >= 0.5: + adjusted += 10 + if "live" in secondary_lower and not allow_live: + adjusted -= 25 + if "compilation" in secondary_lower and not allow_compilation: + adjusted -= 25 + if "soundtrack" in secondary_lower: + adjusted -= 20 + if "remix" in secondary_lower: + adjusted -= 20 + primary_type = str(group.get("primary-type") or "") + if primary_type and primary_type.lower() != "album": + adjusted -= 15 + adjusted = max(0, min(100, adjusted)) + candidates.append( + { + "release_group_id": group.get("id"), + "title": group.get("title"), + "artist_credit": artist_credit, + "first_release_date": group.get("first-release-date"), + "primary_type": group.get("primary-type"), + "secondary_types": secondary_types, + "score": int(adjusted), + "track_count": None, + } + ) + candidates.sort(key=lambda c: c.get("score", 0), reverse=True) + candidates = candidates[: max(1, min(int(limit or 10), 50))] + top_title = candidates[0]["title"] if candidates else "-" + top_score = candidates[0]["score"] if candidates else 0 + logger.info("[MUSIC] candidates_count=%s top=%s (%s) query=%s", len(candidates), top_title, top_score, query) + self._cache.set(cache_key, candidates, ttl_seconds=_SEARCH_TTL_SECONDS) + return candidates + + def pick_best_release_with_reason(self, release_group_id, *, prefer_country=None): + rgid = str(release_group_id or "").strip() + if not rgid: + return {"release_id": None, "reason": "missing_release_group_id"} + cache_key = f"release_group:{rgid}" + cached = self._cache.get(cache_key) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", cache_key) + releases = cached + else: + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", cache_key) + payload = self._call_with_retry( + lambda: musicbrainzngs.search_releases( + releasegroup=rgid, + limit=100, + ) + ) + releases = payload.get("release-list", []) if isinstance(payload, dict) else [] + self._cache.set(cache_key, releases, ttl_seconds=_RELEASE_GROUP_TTL_SECONDS) + if not releases: + return {"release_id": None, "reason": "no_releases"} + parsed_dates = [(self._parse_date(r.get("date")), r) for r in releases if isinstance(r, dict)] + date_values = [d for d, _ in parsed_dates if d is not None] + earliest = min(date_values) if date_values else None + preferred_country = (prefer_country or "").strip().upper() or None + ranked = [] + for release in releases: + if not isinstance(release, dict): + continue + score = 0.0 + reasons = [] + status = str(release.get("status") or "").strip().lower() + if status == "official": + score += 40 + reasons.append("official") + elif status: + reasons.append(f"status:{status}") + release_date = self._parse_date(release.get("date")) + if release_date and earliest: + delta_days = max(0, (release_date - earliest).days) + score += max(0.0, 25.0 - (delta_days / 365.0)) + if delta_days == 0: + reasons.append("earliest") + country = str(release.get("country") or "").strip().upper() + if preferred_country and country == preferred_country: + score += 10 + reasons.append(f"country:{country}") + track_count = self._safe_int(release.get("track-count"), default=0) + if track_count > 0: + score += 1 + ranked.append((score, release, ",".join(reasons) or "fallback")) + if not ranked: + return {"release_id": None, "reason": "no_ranked_release"} + ranked.sort(key=lambda item: item[0], reverse=True) + best_score, best_release, reason = ranked[0] + return { + "release_id": best_release.get("id"), + "reason": f"{reason},score={best_score:.2f}", + "release": best_release, + } + + def pick_best_release(self, release_group_id): + return self.pick_best_release_with_reason(release_group_id).get("release_id") + + def fetch_release_tracks(self, release_id): + rid = str(release_id or "").strip() + if not rid: + return [] + cache_key = f"release_tracks:{rid}" + cached = self._cache.get(cache_key) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", cache_key) + return cached + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", cache_key) + payload = self._call_with_retry( + lambda: musicbrainzngs.get_release_by_id( + rid, + includes=["recordings", "artist-credits"], + ) + ) + release_payload = payload.get("release", {}) if isinstance(payload, dict) else {} + media = release_payload.get("medium-list", []) if isinstance(release_payload, dict) else [] + artist_credit = self._artist_credit_text(release_payload.get("artist-credit")) + album_title = release_payload.get("title") + release_date = release_payload.get("date") + tracks = [] + for disc in media: + if not isinstance(disc, dict): + continue + disc_number = self._safe_int(disc.get("position"), default=0) or None + for track in disc.get("track-list", []) or []: + if not isinstance(track, dict): + continue + recording = track.get("recording") or {} + track_artist = self._artist_credit_text(recording.get("artist-credit")) or artist_credit + tracks.append( + { + "title": recording.get("title") or track.get("title"), + "track_number": self._safe_int(track.get("position"), default=0) or None, + "disc_number": disc_number, + "artist": track_artist, + "album": album_title, + "release_date": release_date, + "duration_ms": self._safe_int(recording.get("length"), default=0) or None, + "artwork_url": None, + } + ) + self._cache.set(cache_key, tracks, ttl_seconds=_RELEASE_TRACKS_TTL_SECONDS) + return tracks + + def fetch_release_group_cover_art_url(self, release_group_id, *, timeout=8): + rgid = str(release_group_id or "").strip() + if not rgid: + return None + key = f"cover_art_release_group:{rgid}" + cached = self._cover_cache.get(key) + if cached is not None: + self._inc_metric("cache_hits") + self._debug_log("[MUSICBRAINZ] cache hit key=%s", key) + return cached + self._inc_metric("cache_misses") + self._debug_log("[MUSICBRAINZ] cache miss key=%s", key) + self._inc_metric("cover_art_requests") + + def _request(): + self._debug_log("[MUSICBRAINZ] cover art fetch release_group=%s", rgid) + return requests.get( + f"https://coverartarchive.org/release-group/{rgid}", + timeout=timeout, + headers={"User-Agent": MUSICBRAINZ_USER_AGENT}, + ) + + resp = self._call_with_retry(_request, attempts=3, base_delay=0.4) + if not resp or resp.status_code != 200: + self._inc_metric("cover_art_failures") + return None + payload = resp.json() if resp.content else {} + images = payload.get("images", []) if isinstance(payload, dict) else [] + cover_url = None + if images: + first = images[0] if isinstance(images[0], dict) else {} + thumbs = first.get("thumbnails", {}) if isinstance(first.get("thumbnails"), dict) else {} + cover_url = thumbs.get("small") or thumbs.get("250") or first.get("image") + self._cover_cache.set(key, cover_url, ttl_seconds=_DEFAULT_COVER_CACHE_TTL_SECONDS) + return cover_url + + +_MUSICBRAINZ_SERVICE = None +_MUSICBRAINZ_SERVICE_LOCK = threading.Lock() + + +def get_musicbrainz_service(): + global _MUSICBRAINZ_SERVICE + if _MUSICBRAINZ_SERVICE is not None: + return _MUSICBRAINZ_SERVICE + with _MUSICBRAINZ_SERVICE_LOCK: + if _MUSICBRAINZ_SERVICE is None: + _MUSICBRAINZ_SERVICE = MusicBrainzService() + return _MUSICBRAINZ_SERVICE diff --git a/metadata/tagger.py b/metadata/tagger.py index 708fd85..2c6b99b 100644 --- a/metadata/tagger.py +++ b/metadata/tagger.py @@ -1,9 +1,18 @@ import logging import os -from mutagen import File as MutagenFile -from mutagen.id3 import APIC, ID3, TCON, TDRC, TIT2, TPE1, TPE2, TALB, TRCK, TXXX, USLT -from mutagen.mp4 import MP4, MP4Cover +try: + from mutagen import File as MutagenFile +except ImportError: # pragma: no cover - optional dependency in tests + MutagenFile = None +try: + from mutagen.id3 import APIC, ID3, TCON, TDRC, TIT2, TPE1, TPE2, TALB, TRCK, TXXX, USLT +except ImportError: # pragma: no cover - optional dependency in tests + APIC = ID3 = TCON = TDRC = TIT2 = TPE1 = TPE2 = TALB = TRCK = TXXX = USLT = None +try: + from mutagen.mp4 import MP4, MP4Cover +except ImportError: # pragma: no cover - optional dependency in tests + MP4 = MP4Cover = None def apply_tags(file_path, tags, artwork, *, source_title=None, allow_overwrite=False, dry_run=False): @@ -21,6 +30,8 @@ def apply_tags(file_path, tags, artwork, *, source_title=None, allow_overwrite=F def _apply_id3_tags(file_path, tags, artwork, source_title, allow_overwrite): + if ID3 is None: + raise RuntimeError("mutagen id3 support is required for MP3 tagging") try: audio = ID3(file_path) except Exception: @@ -43,27 +54,35 @@ def _apply_id3_tags(file_path, tags, artwork, source_title, allow_overwrite): if allow_overwrite: audio.delall("USLT") if allow_overwrite or not audio.getall("USLT"): - audio.add(USLT(encoding=3, lang="eng", desc="Lyrics", text=str(lyrics))) - changed = True + try: + audio.add(USLT(encoding=3, lang="eng", desc="Lyrics", text=str(lyrics))) + changed = True + except Exception: + logging.warning("Failed to write lyrics tag for %s", file_path, exc_info=True) if artwork and (allow_overwrite or not audio.getall("APIC")): if allow_overwrite: for frame in audio.getall("APIC"): audio.delall("APIC") - changed = True - audio.add( - APIC( - encoding=3, - mime=artwork.get("mime") or "image/jpeg", - type=3, - desc="cover", - data=artwork.get("data"), + try: + audio.add( + APIC( + encoding=3, + mime=artwork.get("mime") or "image/jpeg", + type=3, + desc="cover", + data=artwork.get("data"), + ) ) - ) + changed = True + except Exception: + logging.warning("Failed to embed artwork for %s", file_path, exc_info=True) if changed: audio.save(file_path) def _apply_mp4_tags(file_path, tags, artwork, source_title, allow_overwrite): + if MP4 is None: + raise RuntimeError("mutagen mp4 support is required for MP4 tagging") audio = MP4(file_path) mp4_tags = audio.tags or {} changed = False @@ -101,6 +120,8 @@ def _apply_mp4_tags(file_path, tags, artwork, source_title, allow_overwrite): def _apply_generic_tags(file_path, tags, artwork, source_title, allow_overwrite): + if MutagenFile is None: + raise RuntimeError("mutagen is required for generic tagging") audio = MutagenFile(file_path) if not audio: logging.warning("Music metadata tagging skipped: unsupported file %s", file_path) diff --git a/metadata/tagging_service.py b/metadata/tagging_service.py new file mode 100644 index 0000000..2a65925 --- /dev/null +++ b/metadata/tagging_service.py @@ -0,0 +1,62 @@ +"""Canonical tagging service for runtime music metadata writes.""" + +from __future__ import annotations + +from metadata.tagger import apply_tags as _apply_tags +from metadata.types import CanonicalMetadata + + +def apply_tags( + file_path, + tags, + artwork, + *, + source_title=None, + allow_overwrite=False, + dry_run=False, +): + """Pass-through for dict-based tag payloads used by metadata worker flows.""" + return _apply_tags( + file_path, + tags, + artwork, + source_title=source_title, + allow_overwrite=allow_overwrite, + dry_run=dry_run, + ) + + +def tag_file( + path: str, + metadata: CanonicalMetadata, + *, + source_title: str | None = None, + allow_overwrite: bool = True, + dry_run: bool = False, +) -> None: + """Apply canonical metadata tags to a media file using the unified tagger backend.""" + tags = { + "artist": metadata.artist, + "album": metadata.album, + "title": metadata.title, + "album_artist": metadata.album_artist, + "track_number": metadata.track_num, + "year": metadata.date, + "genre": metadata.genre, + "recording_id": metadata.mbid, + "lyrics": metadata.lyrics, + } + artwork = None + if metadata.artwork: + artwork = { + "data": bytes(metadata.artwork), + "mime": "image/jpeg", + } + _apply_tags( + path, + tags, + artwork, + source_title=source_title, + allow_overwrite=allow_overwrite, + dry_run=dry_run, + ) diff --git a/metadata/types.py b/metadata/types.py new file mode 100644 index 0000000..d40e2d7 --- /dev/null +++ b/metadata/types.py @@ -0,0 +1,117 @@ +"""Structured metadata types for music processing.""" + +from __future__ import annotations + + +class MusicMetadata: + """Validated, structured music metadata container.""" + + title: str + artist: str + album: str + album_artist: str + track_num: int + disc_num: int + date: str + genre: str + isrc: str | None + mbid: str | None + artwork: bytes | None + lyrics: str | None + + def __init__( + self, + *, + title: str, + artist: str, + album: str, + album_artist: str, + track_num: int, + disc_num: int, + date: str, + genre: str, + isrc: str | None = None, + mbid: str | None = None, + artwork: bytes | None = None, + lyrics: str | None = None, + ) -> None: + """Initialize and validate metadata values.""" + self.title = self._require_non_empty_str("title", title) + self.artist = self._require_non_empty_str("artist", artist) + self.album = self._require_non_empty_str("album", album) + self.album_artist = self._require_non_empty_str("album_artist", album_artist) + self.track_num = self._require_positive_int("track_num", track_num) + self.disc_num = self._require_positive_int("disc_num", disc_num) + self.date = self._require_non_empty_str("date", date) + self.genre = self._require_non_empty_str("genre", genre) + self.isrc = self._optional_str("isrc", isrc) + self.mbid = self._optional_str("mbid", mbid) + self.artwork = self._optional_bytes("artwork", artwork) + self.lyrics = self._optional_str("lyrics", lyrics) + + @staticmethod + def _require_non_empty_str(field: str, value: str) -> str: + if not isinstance(value, str): + raise TypeError(f"{field} must be a string") + cleaned = value.strip() + if not cleaned: + raise ValueError(f"{field} must be a non-empty string") + return cleaned + + @staticmethod + def _require_positive_int(field: str, value: int) -> int: + if not isinstance(value, int): + raise TypeError(f"{field} must be an integer") + if value <= 0: + raise ValueError(f"{field} must be > 0") + return value + + @staticmethod + def _optional_str(field: str, value: str | None) -> str | None: + if value is None: + return None + if not isinstance(value, str): + raise TypeError(f"{field} must be a string or None") + cleaned = value.strip() + return cleaned or None + + @staticmethod + def _optional_bytes(field: str, value: bytes | None) -> bytes | None: + if value is None: + return None + if not isinstance(value, (bytes, bytearray)): + raise TypeError(f"{field} must be bytes or None") + return bytes(value) + + def __repr__(self) -> str: + """Return a concise debug representation of this metadata.""" + return ( + "MusicMetadata(" + f"title={self.title!r}, artist={self.artist!r}, album={self.album!r}, " + f"album_artist={self.album_artist!r}, track_num={self.track_num!r}, " + f"disc_num={self.disc_num!r}, date={self.date!r}, genre={self.genre!r}, " + f"isrc={self.isrc!r}, mbid={self.mbid!r}, " + f"artwork={'' if self.artwork is not None else None}, " + f"lyrics={self.lyrics!r})" + ) + + +class CanonicalMetadata(MusicMetadata): + """Canonical structured metadata model used across runtime pipelines.""" + + def __repr__(self) -> str: + return ( + "CanonicalMetadata(" + f"title={self.title!r}, artist={self.artist!r}, album={self.album!r}, " + f"album_artist={self.album_artist!r}, track_num={self.track_num!r}, " + f"disc_num={self.disc_num!r}, date={self.date!r}, genre={self.genre!r}, " + f"isrc={self.isrc!r}, mbid={self.mbid!r}, " + f"artwork={'' if self.artwork is not None else None}, " + f"lyrics={self.lyrics!r})" + ) + + +# Backward-compatible alias for existing imports. +MusicMetadata = CanonicalMetadata + +__all__ = ["CanonicalMetadata", "MusicMetadata"] diff --git a/metadata/worker.py b/metadata/worker.py index cee3f7b..348df7d 100644 --- a/metadata/worker.py +++ b/metadata/worker.py @@ -7,7 +7,7 @@ from .providers import acoustid as acoustid_provider from .providers import artwork as artwork_provider from .providers import musicbrainz as musicbrainz_provider -from .tagger import apply_tags +from .tagging_service import apply_tags from .lyric_enrichment import fetch_lyrics diff --git a/playlist/export.py b/playlist/export.py new file mode 100644 index 0000000..f2938bf --- /dev/null +++ b/playlist/export.py @@ -0,0 +1,62 @@ +"""Playlist export helpers.""" + +from __future__ import annotations + +import os +import re +from pathlib import Path +from typing import Iterable + +_INVALID_FS_CHARS_RE = re.compile(r'[<>:"/\\|?*]') +_MULTISPACE_RE = re.compile(r"\s+") +_DEFAULT_MUSIC_ROOT = Path("Music") + + +def write_m3u(playlist_root: Path, playlist_name: str, track_paths: Iterable[Path]) -> Path: + """Create or overwrite an M3U playlist file. + + Rules: + - Playlist files live under ``playlist_root``. + - Filename format is ``{playlist_name}.m3u``. + - Paths are written relative to configured music root. + - Missing tracks are skipped. + - Writes are atomic (temp file then replace). + """ + root = Path(playlist_root) + root.mkdir(parents=True, exist_ok=True) + + safe_name = sanitize_playlist_name(playlist_name) or "playlist" + target_path = root / f"{safe_name}.m3u" + temp_path = root / f".{safe_name}.m3u.tmp" + + music_root = _configured_music_root().resolve() + lines: list[str] = ["#EXTM3U"] + for track_path in track_paths: + candidate = Path(track_path) + if not candidate.exists(): + continue + try: + rel_path = candidate.resolve().relative_to(music_root) + except ValueError: + continue + lines.append(rel_path.as_posix()) + + content = "\n".join(lines) + "\n" + temp_path.write_text(content, encoding="utf-8") + temp_path.replace(target_path) + return target_path + + +def _configured_music_root() -> Path: + value = (os.environ.get("RETREIVR_MUSIC_ROOT") or "").strip() + if value: + return Path(value) + return _DEFAULT_MUSIC_ROOT + + +def sanitize_playlist_name(name: str) -> str: + """Return a filesystem-safe playlist name.""" + value = name + text = _INVALID_FS_CHARS_RE.sub("", str(value)) + text = _MULTISPACE_RE.sub(" ", text).strip() + return text.rstrip(" .") diff --git a/playlist/rebuild.py b/playlist/rebuild.py new file mode 100644 index 0000000..5e1a607 --- /dev/null +++ b/playlist/rebuild.py @@ -0,0 +1,50 @@ +"""Playlist rebuild helpers.""" + +from __future__ import annotations + +import os +from contextlib import contextmanager +from pathlib import Path +from typing import Iterable, Iterator + +from playlist.export import write_m3u + + +def rebuild_playlist_from_tracks( + playlist_name: str, + playlist_root: Path, + music_root: Path, + track_file_paths: Iterable[str], +) -> Path: + """Rebuild a playlist M3U file from canonical track file paths. + + Args: + playlist_name: Playlist display name used to derive M3U filename. + playlist_root: Directory where the resulting M3U file is stored. + music_root: Root directory used for relative path entries. + track_file_paths: Absolute canonical file paths loaded from storage. + + Returns: + Final path to the rebuilt M3U file. + """ + normalized_playlist_name = str(playlist_name or "").strip() or "playlist" + track_paths = [Path(path) for path in track_file_paths if str(path).strip()] + with _music_root_env(music_root): + return write_m3u( + playlist_root=playlist_root, + playlist_name=normalized_playlist_name, + track_paths=track_paths, + ) + + +@contextmanager +def _music_root_env(music_root: Path) -> Iterator[None]: + previous = os.environ.get("RETREIVR_MUSIC_ROOT") + os.environ["RETREIVR_MUSIC_ROOT"] = str(music_root) + try: + yield + finally: + if previous is None: + os.environ.pop("RETREIVR_MUSIC_ROOT", None) + else: + os.environ["RETREIVR_MUSIC_ROOT"] = previous diff --git a/requirements.txt b/requirements.txt index 4e0bc46..2763b4e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,5 @@ pyacoustid>=1.3.0 mutagen>=1.47.0 rapidfuzz>=3.8.1 Pillow>=10.4.0 -lyricsgenius>=3.0.1 \ No newline at end of file +lyricsgenius>=3.0.1 +requests>=2.31.0 \ No newline at end of file diff --git a/retreivr.sqlite3 b/retreivr.sqlite3 new file mode 100644 index 0000000..e69de29 diff --git a/scheduler/__init__.py b/scheduler/__init__.py new file mode 100644 index 0000000..693093e --- /dev/null +++ b/scheduler/__init__.py @@ -0,0 +1,2 @@ +"""Scheduler integration package.""" + diff --git a/scheduler/jobs/__init__.py b/scheduler/jobs/__init__.py new file mode 100644 index 0000000..959bffd --- /dev/null +++ b/scheduler/jobs/__init__.py @@ -0,0 +1,2 @@ +"""Scheduler job implementations.""" + diff --git a/scheduler/jobs/spotify_playlist_watch.py b/scheduler/jobs/spotify_playlist_watch.py new file mode 100644 index 0000000..363ceea --- /dev/null +++ b/scheduler/jobs/spotify_playlist_watch.py @@ -0,0 +1,1131 @@ +"""Scheduler job for Spotify playlist snapshot monitoring.""" + +from __future__ import annotations + +import asyncio +import hashlib +import logging +import os +import sqlite3 +import time +from pathlib import Path +from dataclasses import dataclass +from datetime import datetime +from typing import Any, Callable +from urllib.parse import urlparse +from zoneinfo import ZoneInfo + +from db.downloaded_tracks import has_downloaded_isrc +from metadata.merge import merge_metadata +from playlist.rebuild import rebuild_playlist_from_tracks +from spotify.client import SpotifyPlaylistClient, get_playlist_items +from spotify.diff import diff_playlist +from spotify.oauth_store import SpotifyOAuthStore +from spotify.resolve import resolve_spotify_track + +logger = logging.getLogger(__name__) + +SPOTIFY_LIKED_SONGS_PLAYLIST_ID = "__spotify_liked_songs__" +SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID = "__spotify_saved_albums__" +SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID = "__spotify_user_playlists__" + + +@dataclass(frozen=True) +class PlaylistRunSummary: + added: int = 0 + skipped: int = 0 + failed: int = 0 + completed: int = 0 + + def to_dict(self) -> dict[str, int]: + return { + "added": int(self.added), + "skipped": int(self.skipped), + "failed": int(self.failed), + "completed": int(self.completed), + } + + +def normalize_spotify_playlist_identifier(value: str) -> str: + """Normalize Spotify playlist input into a bare playlist ID. + + Accepts any of: + - https://open.spotify.com/playlist/{id} + - spotify:playlist:{id} + - {id} + """ + if not value: + return "" + + raw = str(value).strip() + if not raw: + return "" + + try: + parsed = urlparse(raw) + if "open.spotify.com" in parsed.netloc and "/playlist/" in parsed.path: + return parsed.path.split("/playlist/")[-1].split("?")[0] + except Exception: + pass + + if raw.startswith("spotify:playlist:"): + return raw.split(":")[-1] + + return raw + + +def _load_previous_snapshot(db: Any, playlist_id: str) -> tuple[str | None, list[dict[str, Any]]]: + if not hasattr(db, "get_latest_snapshot"): + return None, [] + latest = db.get_latest_snapshot(playlist_id) + if latest is None: + return None, [] + if isinstance(latest, tuple) and len(latest) == 2: + snapshot_id, items = latest + return snapshot_id, list(items or []) + if isinstance(latest, dict): + return latest.get("snapshot_id"), list(latest.get("items") or []) + return None, [] + + +def get_liked_songs_playlist_name() -> str: + """Return the virtual playlist display name for Spotify Liked Songs.""" + return "Spotify - Liked Songs" + + +def run_liked_songs_sync() -> None: + """Placeholder for future OAuth-based liked songs sync. + + Will: + - Fetch /me/tracks + - Diff snapshot + - Enqueue new tracks + - Rebuild M3U + Currently not implemented. + """ + logging.info("Liked Songs sync not enabled (OAuth required)") + + +def _run_async(coro): + try: + asyncio.get_running_loop() + except RuntimeError: + return asyncio.run(coro) + return None + + +def _resolve_db_path() -> str: + return os.environ.get("RETREIVR_DB_PATH", os.path.join(os.getcwd(), "retreivr.sqlite3")) + + +def _load_downloaded_track_paths(playlist_id: str) -> list[str]: + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(_resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + cur = conn.cursor() + cur.execute( + """ + SELECT file_path + FROM downloaded_music_tracks + WHERE playlist_id=? + ORDER BY downloaded_at ASC, id ASC + """, + (playlist_id,), + ) + rows = cur.fetchall() + return [str(row["file_path"]) for row in rows if row["file_path"]] + except sqlite3.Error: + logging.exception("Failed to load downloaded tracks for playlist %s", playlist_id) + return [] + finally: + try: + if conn is not None: + conn.close() + except Exception: + pass + + +def _load_downloaded_track_paths_for_playlist_ids(playlist_ids: list[str]) -> list[str]: + cleaned = [str(pid).strip() for pid in playlist_ids if str(pid).strip()] + if not cleaned: + return [] + + conn: sqlite3.Connection | None = None + try: + conn = sqlite3.connect(_resolve_db_path(), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + cur = conn.cursor() + placeholders = ", ".join(["?"] * len(cleaned)) + cur.execute( + f""" + SELECT file_path + FROM downloaded_music_tracks + WHERE playlist_id IN ({placeholders}) + ORDER BY downloaded_at ASC, id ASC + """, + tuple(cleaned), + ) + rows = cur.fetchall() + return [str(row["file_path"]) for row in rows if row["file_path"]] + except sqlite3.Error: + logging.exception("Failed to load downloaded tracks for playlist IDs: %s", cleaned) + return [] + finally: + try: + if conn is not None: + conn.close() + except Exception: + pass + + +def _resolve_playlist_dirs(config: dict[str, Any] | None) -> tuple[Path, Path]: + cfg = config or {} + music_root = Path(str(cfg.get("music_download_folder") or "Music")) + playlist_root = Path( + str( + cfg.get("playlists_folder") + or cfg.get("playlist_export_folder") + or (music_root / "Playlists") + ) + ) + return playlist_root, music_root + + +def _spotify_client_credentials_from_config(config: dict[str, Any] | None) -> tuple[str, str]: + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + client_id = str(spotify_cfg.get("client_id") or cfg.get("SPOTIFY_CLIENT_ID") or "").strip() + client_secret = str(spotify_cfg.get("client_secret") or cfg.get("SPOTIFY_CLIENT_SECRET") or "").strip() + return client_id, client_secret + + +def _resolve_db_path_from_runtime(db: Any) -> str: + if hasattr(db, "db_path"): + value = str(getattr(db, "db_path") or "").strip() + if value: + return value + return _resolve_db_path() + + +def _is_spotify_downtime_active(config: dict[str, Any] | None) -> bool: + """Return True when watch-policy downtime window is currently active.""" + cfg = config or {} + policy = cfg.get("watch_policy") or {} + if not isinstance(policy, dict): + return False + downtime = policy.get("downtime") or {} + if not isinstance(downtime, dict) or not downtime.get("enabled"): + return False + + start = str(downtime.get("start") or "").strip() + end = str(downtime.get("end") or "").strip() + if not start or not end: + return False + + tz_name = str(downtime.get("timezone") or "").strip() + now = datetime.now() + if tz_name: + try: + now = datetime.now(ZoneInfo(tz_name)) + except Exception: + now = datetime.now() + + try: + start_hour, start_minute = [int(part) for part in start.split(":", 1)] + end_hour, end_minute = [int(part) for part in end.split(":", 1)] + except Exception: + return False + + current_minutes = now.hour * 60 + now.minute + start_minutes = start_hour * 60 + start_minute + end_minutes = end_hour * 60 + end_minute + + if start_minutes == end_minutes: + return True + if start_minutes < end_minutes: + return start_minutes <= current_minutes < end_minutes + return current_minutes >= start_minutes or current_minutes < end_minutes + + +def _configured_watch_playlist_ids(config: dict[str, Any] | None) -> list[str]: + """Read configured Spotify watch playlists and return normalized unique IDs.""" + cfg = config or {} + spotify_cfg = (cfg.get("spotify") or {}) if isinstance(cfg, dict) else {} + raw_values = spotify_cfg.get("watch_playlists") + if raw_values is None: + raw_values = cfg.get("watch_playlists", []) if isinstance(cfg, dict) else [] + if not isinstance(raw_values, list): + return [] + + playlist_ids: list[str] = [] + seen: set[str] = set() + for raw in raw_values: + pid = normalize_spotify_playlist_identifier(str(raw or "")) + if not pid or pid in seen: + continue + seen.add(pid) + playlist_ids.append(pid) + return playlist_ids + + +def _best_effort_rebuild_playlist_m3u( + *, + playlist_id: str, + playlist_name: str, + config: dict[str, Any] | None, +) -> None: + """Rebuild playlist M3U from downloaded canonical paths without raising errors.""" + try: + track_paths = _load_downloaded_track_paths(playlist_id) + playlist_root, music_root = _resolve_playlist_dirs(config) + rebuild_playlist_from_tracks( + playlist_name=(playlist_name or playlist_id).strip() or playlist_id, + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=track_paths, + ) + logging.info("Playlist M3U updated: %s (%d tracks)", playlist_name, len(track_paths)) + except Exception: + logging.exception("Playlist M3U rebuild failed for playlist %s", playlist_id) + + +def _enqueue_added_track(queue: Any, item: dict[str, Any]) -> Any: + if callable(queue): + return queue(item) + for method_name in ("enqueue", "put", "add", "enqueue_track"): + method = getattr(queue, method_name, None) + if callable(method): + return method(item) + raise TypeError("queue does not expose a supported enqueue method") + + +def _safe_position(value: Any, fallback: int) -> int: + try: + return int(value) + except Exception: + return int(fallback) + + +def _normalize_playlist_items(items: list[dict[str, Any]] | None) -> list[dict[str, Any]]: + normalized_rows: list[tuple[int, int, dict[str, Any]]] = [] + for idx, raw in enumerate(items or []): + if not isinstance(raw, dict): + continue + track_id = str(raw.get("spotify_track_id") or "").strip() + if not track_id: + continue + position = _safe_position(raw.get("position"), idx) + normalized_rows.append( + ( + position, + idx, + { + **raw, + "spotify_track_id": track_id, + "position": position, + "added_at": raw.get("added_at"), + }, + ) + ) + normalized_rows.sort(key=lambda entry: (entry[0], entry[1])) + return [entry[2] for entry in normalized_rows] + + +def _playlist_snapshot_hash(items: list[dict[str, Any]]) -> str: + digest_source = "\n".join( + f"{idx}|{item.get('spotify_track_id')}|{item.get('position')}|{item.get('added_at') or ''}" + for idx, item in enumerate(items) + ).encode("utf-8") + return hashlib.sha256(digest_source).hexdigest() + + +def _classify_enqueue_result(result: Any) -> tuple[int, int]: + if isinstance(result, tuple): + created = None + if len(result) >= 2: + created = result[1] + if created is False: + return 0, 1 + return 1, 0 + if isinstance(result, dict): + created = result.get("created") + if created is False: + return 0, 1 + return 1, 0 + + +async def spotify_playlists_watch_job( + config, + db, + queue, + spotify_client, + search_service, + ignore_downtime: bool = False, +): + """Run configured Spotify playlist sync jobs. + + If ignore_downtime is True, do not skip based on downtime. + """ + del search_service # currently unused for playlist snapshot syncing + + cfg = config if isinstance(config, dict) else {} + downtime_cfg = ((cfg.get("watch_policy") or {}).get("downtime") or {}) if isinstance(cfg.get("watch_policy"), dict) else {} + downtime_start = str(downtime_cfg.get("start") or "").strip() or "?" + downtime_end = str(downtime_cfg.get("end") or "").strip() or "?" + downtime_active = _is_spotify_downtime_active(cfg) + aggregate_summary = PlaylistRunSummary() + started_at = time.monotonic() + if downtime_active and not ignore_downtime: + logger.info( + f"Spotify sync waiting for downtime to end " + f"(downtime {downtime_start} -> {downtime_end})" + ) + result = { + "status": "skipped", + "reason": "downtime", + "synced_playlists": 0, + "run_summary": aggregate_summary.to_dict(), + } + logger.info( + "[SCHEDULER] spotify_playlists_watch_job duration_sec=%.3f status=%s synced=%s", + time.monotonic() - started_at, + result.get("status"), + result.get("synced_playlists"), + ) + return result + + playlist_ids = _configured_watch_playlist_ids(cfg) + if not playlist_ids: + result = { + "status": "skipped", + "reason": "no_playlists", + "synced_playlists": 0, + "run_summary": aggregate_summary.to_dict(), + } + logger.info( + "[SCHEDULER] spotify_playlists_watch_job duration_sec=%.3f status=%s synced=%s", + time.monotonic() - started_at, + result.get("status"), + result.get("synced_playlists"), + ) + return result + + synced = 0 + errors: list[str] = [] + for playlist_id in playlist_ids: + try: + result = playlist_watch_job( + spotify_client=spotify_client, + db=db, + queue=queue, + playlist_id=playlist_id, + playlist_name=playlist_id, + config=cfg, + ) + summary = result.get("run_summary") if isinstance(result, dict) else None + if isinstance(summary, dict): + aggregate_summary = PlaylistRunSummary( + added=aggregate_summary.added + int(summary.get("added") or 0), + skipped=aggregate_summary.skipped + int(summary.get("skipped") or 0), + failed=aggregate_summary.failed + int(summary.get("failed") or 0), + completed=aggregate_summary.completed + int(summary.get("completed") or 0), + ) + synced += 1 + except Exception as exc: + errors.append(f"{playlist_id}: {exc}") + logging.exception("Scheduled Spotify playlist sync failed for playlist %s", playlist_id) + aggregate_summary = PlaylistRunSummary( + added=aggregate_summary.added, + skipped=aggregate_summary.skipped, + failed=aggregate_summary.failed + 1, + completed=aggregate_summary.completed, + ) + + logging.info( + "Spotify playlist sync completed: %d/%d playlists processed", + synced, + len(playlist_ids), + ) + logger.info("Spotify playlist polling sync completed") + + result = { + "status": "updated", + "synced_playlists": synced, + "errors": errors, + "run_summary": aggregate_summary.to_dict(), + } + logger.info( + "[SCHEDULER] spotify_playlists_watch_job duration_sec=%.3f status=%s synced=%s", + time.monotonic() - started_at, + result.get("status"), + result.get("synced_playlists"), + ) + return result + + +async def enqueue_spotify_track(queue, spotify_track: dict, search_service, playlist_id: str): + """Resolve a Spotify track, merge metadata, build payload, and enqueue it. + + Idempotency skip is applied only when a non-empty ISRC exists and that + `(playlist_id, isrc)` has already been recorded as downloaded. Tracks with + missing/empty ISRC are always treated as normal enqueue candidates. + """ + track_isrc = str((spotify_track or {}).get("isrc") or "").strip() + if track_isrc and has_downloaded_isrc(playlist_id, track_isrc): + logging.info("skip duplicate isrc=%s playlist=%s", track_isrc, playlist_id) + return + + # TODO(scheduler/jobs/spotify_playlist_watch.py::enqueue_spotify_track): replace spotify.resolve.resolve_spotify_track + # with the unified engine search resolution path (engine.search_engine SearchResolutionService adapters). + resolved_media = await resolve_spotify_track(spotify_track, search_service) + merged_metadata = merge_metadata(spotify_track or {}, {}, resolved_media.get("extra") or {}) + payload = { + "playlist_id": playlist_id, + "spotify_track_id": (spotify_track or {}).get("spotify_track_id"), + "resolved_media": resolved_media, + "music_metadata": merged_metadata, + } + queue.enqueue(payload) + + +async def spotify_liked_songs_watch_job( + config, + db, + queue, + spotify_client, + search_service, + ignore_downtime: bool = False, +): + """Sync Spotify Liked Songs using OAuth-backed `/v1/me/tracks` snapshots.""" + cfg = config if isinstance(config, dict) else {} + downtime_cfg = ((cfg.get("watch_policy") or {}).get("downtime") or {}) if isinstance(cfg.get("watch_policy"), dict) else {} + downtime_start = str(downtime_cfg.get("start") or "").strip() or "?" + downtime_end = str(downtime_cfg.get("end") or "").strip() or "?" + downtime_active = _is_spotify_downtime_active(cfg) + if downtime_active and not ignore_downtime: + logger.info( + f"Spotify sync waiting for downtime to end " + f"(downtime {downtime_start} -> {downtime_end})" + ) + return {"status": "skipped", "reason": "downtime", "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, "enqueued": 0} + + client_id, client_secret = _spotify_client_credentials_from_config(config if isinstance(config, dict) else None) + if not client_id or not client_secret: + logging.info("Liked Songs sync skipped: Spotify credentials not configured") + return {"status": "skipped", "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, "enqueued": 0} + + oauth_store = SpotifyOAuthStore(Path(_resolve_db_path_from_runtime(db))) + token = oauth_store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + logging.info("Liked Songs sync skipped: no valid Spotify OAuth token") + return {"status": "skipped", "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, "enqueued": 0} + + liked_client: Any = spotify_client + if isinstance(liked_client, SpotifyPlaylistClient): + liked_client._provided_access_token = token.access_token + elif not hasattr(liked_client, "get_liked_songs"): + liked_client = SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ) + + try: + current_snapshot_id, current_items = await liked_client.get_liked_songs() + except Exception as exc: + logging.exception("Liked Songs fetch failed") + return { + "status": "error", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "error": f"spotify_fetch_failed: {exc}", + } + + try: + previous_snapshot_id, previous_items = _load_previous_snapshot(db, SPOTIFY_LIKED_SONGS_PLAYLIST_ID) + except Exception as exc: + logging.exception("Liked Songs snapshot load failed") + return { + "status": "error", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "error": f"snapshot_read_failed: {exc}", + } + + if previous_snapshot_id == current_snapshot_id: + logger.info("Spotify liked songs sync completed") + return { + "status": "unchanged", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": 0, + } + + diff = diff_playlist(previous_items, current_items) + added_items = list(diff["added"]) + enqueued = 0 + enqueue_errors: list[str] = [] + for track in added_items: + try: + await enqueue_spotify_track( + queue, + track, + search_service, + playlist_id=SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + ) + enqueued += 1 + except Exception as exc: + track_id = track.get("spotify_track_id") + enqueue_errors.append(f"{track_id}: {exc}") + logging.exception("Failed to enqueue Liked Songs track %s", track_id) + + try: + db.store_snapshot( + SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + str(current_snapshot_id), + current_items, + ) + except Exception as exc: + logging.exception("Liked Songs snapshot store failed") + return { + "status": "error", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "error": f"snapshot_store_failed: {exc}", + "enqueued": enqueued, + "added_count": len(added_items), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } + + _best_effort_rebuild_playlist_m3u( + playlist_id=SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + playlist_name=get_liked_songs_playlist_name(), + config=config if isinstance(config, dict) else None, + ) + + logger.info("Spotify liked songs sync completed") + return { + "status": "updated", + "playlist_id": SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": enqueued, + "added_count": len(added_items), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } + + +async def spotify_saved_albums_watch_job( + config, + db, + queue, + spotify_client, + search_service, + ignore_downtime: bool = False, +): + """Sync Spotify Saved Albums via OAuth and enqueue newly added albums.""" + cfg = config if isinstance(config, dict) else {} + downtime_cfg = ((cfg.get("watch_policy") or {}).get("downtime") or {}) if isinstance(cfg.get("watch_policy"), dict) else {} + downtime_start = str(downtime_cfg.get("start") or "").strip() or "?" + downtime_end = str(downtime_cfg.get("end") or "").strip() or "?" + downtime_active = _is_spotify_downtime_active(cfg) + if downtime_active and not ignore_downtime: + logger.info( + f"Spotify sync waiting for downtime to end " + f"(downtime {downtime_start} -> {downtime_end})" + ) + return {"status": "skipped", "reason": "downtime", "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, "enqueued": 0} + + client_id, client_secret = _spotify_client_credentials_from_config(config if isinstance(config, dict) else None) + if not client_id or not client_secret: + logging.info("Saved Albums sync skipped: Spotify credentials not configured") + return {"status": "skipped", "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, "enqueued": 0} + + oauth_store = SpotifyOAuthStore(Path(_resolve_db_path_from_runtime(db))) + token = oauth_store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + logging.info("Saved Albums sync skipped: no valid Spotify OAuth token") + return {"status": "skipped", "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, "enqueued": 0} + + saved_albums_client: Any = spotify_client + if isinstance(saved_albums_client, SpotifyPlaylistClient): + saved_albums_client._provided_access_token = token.access_token + elif not hasattr(saved_albums_client, "get_saved_albums"): + saved_albums_client = SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ) + + try: + current_snapshot_id, current_albums = await saved_albums_client.get_saved_albums() + except Exception as exc: + logging.exception("Saved Albums fetch failed") + return { + "status": "error", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "error": f"spotify_fetch_failed: {exc}", + } + + current_snapshot_items: list[dict[str, Any]] = [] + album_map: dict[str, dict[str, Any]] = {} + for idx, album in enumerate(current_albums or []): + album_id = str((album or {}).get("album_id") or "").strip() + if not album_id: + continue + current_snapshot_items.append( + { + "spotify_track_id": album_id, + "position": idx, + "added_at": (album or {}).get("added_at"), + } + ) + album_map[album_id] = dict(album) + + try: + previous_snapshot_id, previous_items = _load_previous_snapshot(db, SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID) + except Exception as exc: + logging.exception("Saved Albums snapshot load failed") + return { + "status": "error", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "error": f"snapshot_read_failed: {exc}", + } + + if previous_snapshot_id == current_snapshot_id: + logger.info("Spotify saved albums sync completed") + return { + "status": "unchanged", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": 0, + } + + diff = diff_playlist(previous_items, current_snapshot_items) + added_albums = list(diff["added"]) + enqueued = 0 + enqueue_errors: list[str] = [] + + dispatcher_config = dict(config) if isinstance(config, dict) else {} + dispatcher_config["search_service"] = search_service + + # Local import avoids a module import cycle with api.intent_dispatcher. + from api.intent_dispatcher import run_spotify_album_sync + + for album_item in added_albums: + album_id = str((album_item or {}).get("spotify_track_id") or "").strip() + if not album_id: + continue + try: + await run_spotify_album_sync( + album_id=album_id, + config=dispatcher_config, + db=db, + queue=queue, + spotify_client=saved_albums_client, + ) + enqueued += 1 + except Exception as exc: + enqueue_errors.append(f"{album_id}: {exc}") + logging.exception("Saved Albums enqueue failed for album %s", album_id) + + try: + db.store_snapshot( + SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + str(current_snapshot_id), + current_snapshot_items, + ) + except Exception as exc: + logging.exception("Saved Albums snapshot store failed") + return { + "status": "error", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "error": f"snapshot_store_failed: {exc}", + "enqueued": enqueued, + "added_count": len(added_albums), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } + + # Best effort: rebuild a virtual "Saved Albums" M3U from album-scoped downloads. + try: + album_playlist_ids = [ + f"spotify_album_{str((item or {}).get('spotify_track_id') or '').strip()}" + for item in current_snapshot_items + if str((item or {}).get("spotify_track_id") or "").strip() + ] + track_paths = _load_downloaded_track_paths_for_playlist_ids(album_playlist_ids) + playlist_root, music_root = _resolve_playlist_dirs(config if isinstance(config, dict) else None) + rebuild_playlist_from_tracks( + playlist_name="Spotify - Saved Albums", + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=track_paths, + ) + logging.info("Playlist M3U updated: Spotify - Saved Albums (%d tracks)", len(track_paths)) + except Exception: + logging.exception("Saved Albums M3U rebuild failed") + + logger.info("Spotify saved albums sync completed") + return { + "status": "updated", + "playlist_id": SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": enqueued, + "added_count": len(added_albums), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + } + + +async def spotify_user_playlists_watch_job( + config, + db, + queue, + spotify_client, + search_service, + ignore_downtime: bool = False, +): + """Sync authenticated user's Spotify playlists and trigger sync for new playlists.""" + cfg = config if isinstance(config, dict) else {} + downtime_cfg = ((cfg.get("watch_policy") or {}).get("downtime") or {}) if isinstance(cfg.get("watch_policy"), dict) else {} + downtime_start = str(downtime_cfg.get("start") or "").strip() or "?" + downtime_end = str(downtime_cfg.get("end") or "").strip() or "?" + downtime_active = _is_spotify_downtime_active(cfg) + if downtime_active and not ignore_downtime: + logger.info( + f"Spotify sync waiting for downtime to end " + f"(downtime {downtime_start} -> {downtime_end})" + ) + return {"status": "skipped", "reason": "downtime", "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, "enqueued": 0} + + client_id, client_secret = _spotify_client_credentials_from_config(config if isinstance(config, dict) else None) + if not client_id or not client_secret: + logging.info("User Playlists sync skipped: Spotify credentials not configured") + return {"status": "skipped", "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, "enqueued": 0} + + oauth_store = SpotifyOAuthStore(Path(_resolve_db_path_from_runtime(db))) + token = oauth_store.get_valid_token(client_id, client_secret, config=config if isinstance(config, dict) else None) + if token is None: + logging.info("User Playlists sync skipped: no valid Spotify OAuth token") + return {"status": "skipped", "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, "enqueued": 0} + + playlists_client: Any = spotify_client + if isinstance(playlists_client, SpotifyPlaylistClient): + playlists_client._provided_access_token = token.access_token + elif not hasattr(playlists_client, "get_user_playlists"): + playlists_client = SpotifyPlaylistClient( + client_id=client_id, + client_secret=client_secret, + access_token=token.access_token, + ) + + try: + current_snapshot_id, current_playlists = await playlists_client.get_user_playlists() + except Exception as exc: + logging.exception("User Playlists fetch failed") + return { + "status": "error", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "error": f"spotify_fetch_failed: {exc}", + } + + current_snapshot_items: list[dict[str, Any]] = [] + playlist_name_by_id: dict[str, str] = {} + for idx, playlist in enumerate(current_playlists or []): + playlist_id = str((playlist or {}).get("id") or "").strip() + if not playlist_id: + continue + playlist_name_by_id[playlist_id] = str((playlist or {}).get("name") or "").strip() or playlist_id + current_snapshot_items.append( + { + "spotify_track_id": playlist_id, + "position": idx, + "added_at": None, + } + ) + + try: + previous_snapshot_id, previous_items = _load_previous_snapshot(db, SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID) + except Exception as exc: + logging.exception("User Playlists snapshot load failed") + return { + "status": "error", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "error": f"snapshot_read_failed: {exc}", + } + + if previous_snapshot_id == current_snapshot_id: + logger.info("Spotify user playlists sync completed") + return { + "status": "unchanged", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": 0, + } + + diff = diff_playlist(previous_items, current_snapshot_items) + added_playlists = list(diff["added"]) + synced = 0 + sync_errors: list[str] = [] + for playlist_item in added_playlists: + playlist_id = str((playlist_item or {}).get("spotify_track_id") or "").strip() + if not playlist_id: + continue + playlist_name = playlist_name_by_id.get(playlist_id, playlist_id) + try: + playlist_watch_job( + spotify_client=playlists_client, + db=db, + queue=queue, + playlist_id=playlist_id, + playlist_name=playlist_name, + config=config if isinstance(config, dict) else None, + ) + synced += 1 + except Exception as exc: + sync_errors.append(f"{playlist_id}: {exc}") + logging.exception("User Playlists sync failed for playlist %s", playlist_id) + + try: + db.store_snapshot( + SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + str(current_snapshot_id), + current_snapshot_items, + ) + except Exception as exc: + logging.exception("User Playlists snapshot store failed") + return { + "status": "error", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "error": f"snapshot_store_failed: {exc}", + "enqueued": synced, + "added_count": len(added_playlists), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": sync_errors, + } + + logger.info("Spotify user playlists sync completed") + return { + "status": "updated", + "playlist_id": SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + "snapshot_id": current_snapshot_id, + "enqueued": synced, + "added_count": len(added_playlists), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": sync_errors, + } + + +def playlist_watch_job( + spotify_client, + db, + queue, + playlist_id: str, + *, + playlist_name: str | None = None, + config: dict[str, Any] | None = None, + ignore_downtime: bool = False, +) -> dict[str, Any]: + """Fetch playlist snapshot, diff with DB state, enqueue added tracks, and persist new snapshot.""" + base_summary = PlaylistRunSummary() + started_at = time.monotonic() + cfg = config if isinstance(config, dict) else {} + downtime_cfg = ((cfg.get("watch_policy") or {}).get("downtime") or {}) if isinstance(cfg.get("watch_policy"), dict) else {} + downtime_start = str(downtime_cfg.get("start") or "").strip() or "?" + downtime_end = str(downtime_cfg.get("end") or "").strip() or "?" + downtime_active = _is_spotify_downtime_active(cfg) + if downtime_active and not ignore_downtime: + logger.info( + f"Spotify sync waiting for downtime to end " + f"(downtime {downtime_start} -> {downtime_end})" + ) + result = { + "status": "skipped", + "reason": "downtime", + "playlist_id": playlist_id, + "enqueued": 0, + "run_summary": base_summary.to_dict(), + } + logger.info( + "[SCHEDULER] playlist_watch_job duration_sec=%.3f status=%s playlist_id=%s", + time.monotonic() - started_at, + result.get("status"), + playlist_id, + ) + return result + + pid = normalize_spotify_playlist_identifier(playlist_id) + assert isinstance(pid, str) and len(pid) >= 8 + if not pid: + return {"status": "error", "playlist_id": playlist_id, "error": "playlist_id is required"} + logging.info("Fetching Spotify playlist %s", pid) + + try: + if hasattr(spotify_client, "get_playlist_items") and callable(spotify_client.get_playlist_items): + current_snapshot_id, current_items = spotify_client.get_playlist_items(pid) + else: + result = _run_async(get_playlist_items(spotify_client, pid)) + if result is None: + raise RuntimeError("Cannot run async Spotify fetch inside active event loop") + current_snapshot_id, current_items = result + except Exception as exc: + logging.exception("Spotify fetch failed for playlist %s", pid) + result = { + "status": "error", + "playlist_id": pid, + "error": f"spotify_fetch_failed: {exc}", + "run_summary": PlaylistRunSummary(failed=1).to_dict(), + } + logger.info( + "[SCHEDULER] playlist_watch_job duration_sec=%.3f status=%s playlist_id=%s", + time.monotonic() - started_at, + result.get("status"), + pid, + ) + return result + + try: + previous_snapshot_id, previous_items = _load_previous_snapshot(db, pid) + except Exception as exc: + logging.exception("Snapshot load failed for playlist %s", pid) + result = { + "status": "error", + "playlist_id": pid, + "error": f"snapshot_read_failed: {exc}", + "run_summary": PlaylistRunSummary(failed=1).to_dict(), + } + logger.info( + "[SCHEDULER] playlist_watch_job duration_sec=%.3f status=%s playlist_id=%s", + time.monotonic() - started_at, + result.get("status"), + pid, + ) + return result + + normalized_previous_items = _normalize_playlist_items(previous_items) + normalized_current_items = _normalize_playlist_items(current_items) + previous_hash = _playlist_snapshot_hash(normalized_previous_items) + current_hash = _playlist_snapshot_hash(normalized_current_items) + + if previous_snapshot_id == current_snapshot_id or previous_hash == current_hash: + logger.info("Spotify playlist polling sync completed") + unchanged_summary = PlaylistRunSummary( + skipped=len(normalized_current_items), + completed=0, + ) + result = { + "status": "unchanged", + "playlist_id": pid, + "snapshot_id": current_snapshot_id, + "snapshot_hash": current_hash, + "enqueued": 0, + "run_summary": unchanged_summary.to_dict(), + } + logger.info( + "[SCHEDULER] playlist_watch_job duration_sec=%.3f status=%s playlist_id=%s", + time.monotonic() - started_at, + result.get("status"), + pid, + ) + return result + + diff = diff_playlist(normalized_previous_items, normalized_current_items) + added_items = list(diff["added"]) + enqueued = 0 + skipped = 0 + enqueue_errors: list[str] = [] + for item in added_items: + try: + enqueue_result = _enqueue_added_track(queue, item) + completed_inc, skipped_inc = _classify_enqueue_result(enqueue_result) + enqueued += completed_inc + skipped += skipped_inc + except Exception as exc: + track_id = item.get("spotify_track_id") + enqueue_errors.append(f"{track_id}: {exc}") + logging.exception("Enqueue failed for added Spotify track %s", track_id) + + try: + db.store_snapshot(pid, str(current_snapshot_id), normalized_current_items) + except Exception as exc: + logging.exception("Snapshot store failed for playlist %s", pid) + summary = PlaylistRunSummary( + added=len(added_items), + skipped=skipped, + failed=len(enqueue_errors) + 1, + completed=enqueued, + ) + result = { + "status": "error", + "playlist_id": pid, + "snapshot_id": current_snapshot_id, + "snapshot_hash": current_hash, + "error": f"snapshot_store_failed: {exc}", + "enqueued": enqueued, + "added_count": len(added_items), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + "run_summary": summary.to_dict(), + } + logger.info( + "[SCHEDULER] playlist_watch_job duration_sec=%.3f status=%s playlist_id=%s", + time.monotonic() - started_at, + result.get("status"), + pid, + ) + return result + + _best_effort_rebuild_playlist_m3u( + playlist_id=pid, + playlist_name=(playlist_name or pid).strip() or pid, + config=config, + ) + + logger.info("Spotify playlist polling sync completed") + summary = PlaylistRunSummary( + added=len(added_items), + skipped=skipped, + failed=len(enqueue_errors), + completed=enqueued, + ) + result = { + "status": "updated", + "playlist_id": pid, + "snapshot_id": current_snapshot_id, + "snapshot_hash": current_hash, + "enqueued": enqueued, + "added_count": len(added_items), + "removed_count": len(diff["removed"]), + "moved_count": len(diff["moved"]), + "enqueue_errors": enqueue_errors, + "run_summary": summary.to_dict(), + } + logger.info( + "[SCHEDULER] playlist_watch_job duration_sec=%.3f status=%s playlist_id=%s", + time.monotonic() - started_at, + result.get("status"), + pid, + ) + return result + + +def run_spotify_playlist_watch_job( + *, + playlist_id: str, + spotify_client: SpotifyPlaylistClient, + snapshot_store: Any, + enqueue_track: Callable[[dict[str, Any]], None], +) -> dict[str, Any]: + """Compatibility wrapper around `playlist_watch_job` for existing call sites.""" + assert isinstance(playlist_id, str) and len(normalize_spotify_playlist_identifier(playlist_id)) >= 8 + return playlist_watch_job(spotify_client, snapshot_store, enqueue_track, playlist_id) diff --git a/spotify/__init__.py b/spotify/__init__.py new file mode 100644 index 0000000..aef6b28 --- /dev/null +++ b/spotify/__init__.py @@ -0,0 +1,6 @@ +"""Spotify integration modules.""" + +from spotify.client import SpotifyPlaylistClient +from spotify.diff import diff_playlist + +__all__ = ["SpotifyPlaylistClient", "diff_playlist"] diff --git a/spotify/client.py b/spotify/client.py new file mode 100644 index 0000000..9f813b1 --- /dev/null +++ b/spotify/client.py @@ -0,0 +1,512 @@ +"""Spotify API client for playlist snapshots and normalized playlist items.""" + +from __future__ import annotations + +import asyncio +import base64 +import hashlib +import os +import time +import urllib.parse +from typing import Any, TypedDict + +import requests + + +class NormalizedItem(TypedDict): + """Normalized Spotify playlist item record.""" + + spotify_track_id: str | None + position: int + added_at: str | None + artist: str | None + title: str | None + album: str | None + duration_ms: int | None + isrc: str | None + + +class SpotifyPlaylistClient: + """Client for reading playlist snapshots and playlist items from Spotify.""" + + _TOKEN_URL = "https://accounts.spotify.com/api/token" + _PLAYLIST_URL = "https://api.spotify.com/v1/playlists/{playlist_id}" + + def __init__( + self, + *, + client_id: str | None = None, + client_secret: str | None = None, + access_token: str | None = None, + timeout_sec: int = 20, + ) -> None: + self.client_id = client_id or os.environ.get("SPOTIFY_CLIENT_ID") + self.client_secret = client_secret or os.environ.get("SPOTIFY_CLIENT_SECRET") + self.timeout_sec = timeout_sec + self._provided_access_token = (access_token or "").strip() or None + self._access_token: str | None = None + self._access_token_expire_at: float = 0.0 + + def _get_access_token(self) -> str: + if self._provided_access_token: + return self._provided_access_token + + if not self.client_id or not self.client_secret: + raise RuntimeError("Spotify credentials are required") + + now = time.time() + if self._access_token and now < self._access_token_expire_at: + return self._access_token + + auth_payload = f"{self.client_id}:{self.client_secret}".encode("utf-8") + auth_header = base64.b64encode(auth_payload).decode("ascii") + response = requests.post( + self._TOKEN_URL, + data={"grant_type": "client_credentials"}, + headers={"Authorization": f"Basic {auth_header}"}, + timeout=self.timeout_sec, + ) + if response.status_code != 200: + raise RuntimeError(f"Spotify token request failed ({response.status_code})") + + payload = response.json() + token = payload.get("access_token") + if not token: + raise RuntimeError("Spotify token response missing access_token") + + expires_in = int(payload.get("expires_in") or 0) + self._access_token = token + self._access_token_expire_at = now + max(0, expires_in - 30) + return token + + def _request_json(self, url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + token = self._get_access_token() + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, params=params, headers=headers, timeout=self.timeout_sec) + if response.status_code == 401: + self._access_token = None + token = self._get_access_token() + headers = {"Authorization": f"Bearer {token}"} + response = requests.get(url, params=params, headers=headers, timeout=self.timeout_sec) + if response.status_code != 200: + raise RuntimeError(f"Spotify request failed ({response.status_code})") + return response.json() + + def get_playlist_items(self, playlist_id: str) -> tuple[str, list[NormalizedItem]]: + """Fetch playlist `snapshot_id` and normalized items in original playlist order.""" + playlist_id = (playlist_id or "").strip() + if not playlist_id: + raise ValueError("playlist_id is required") + + encoded_id = urllib.parse.quote(playlist_id, safe="") + fields = ( + "snapshot_id," + "tracks(items(added_at,track(id,name,duration_ms,external_ids(isrc),album(name),artists(name))),next)" + ) + payload = self._request_json( + self._PLAYLIST_URL.format(playlist_id=encoded_id), + params={"fields": fields, "limit": 100}, + ) + + snapshot_id = payload.get("snapshot_id") + if not snapshot_id: + raise RuntimeError("Spotify playlist response missing snapshot_id") + + items: list[NormalizedItem] = [] + absolute_position = 0 + tracks_page = payload.get("tracks") or {} + while True: + raw_items = tracks_page.get("items") or [] + for raw in raw_items: + track = raw.get("track") + if track is None: + absolute_position += 1 + continue + artists = track.get("artists") or [] + first_artist = artists[0].get("name") if artists and isinstance(artists[0], dict) else None + album = track.get("album") or {} + external_ids = track.get("external_ids") or {} + items.append( + { + "spotify_track_id": track.get("id"), + "position": absolute_position, + "added_at": raw.get("added_at"), + "artist": first_artist, + "title": track.get("name"), + "album": album.get("name"), + "duration_ms": track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + } + ) + absolute_position += 1 + + next_url = tracks_page.get("next") + if not next_url: + break + tracks_page = self._request_json(str(next_url)) + + return str(snapshot_id), items + + async def get_liked_songs(self) -> tuple[str, list[dict[str, Any]]]: + """Fetch the authenticated user's saved tracks from Spotify. + + Returns: + A tuple of ``(snapshot_id, items)`` where ``snapshot_id`` is a deterministic + SHA-256 hash of the ordered track-id sequence, and ``items`` is an ordered + list of normalized track dicts matching playlist ingestion structure. + """ + if not self._provided_access_token: + raise RuntimeError("Spotify OAuth access_token is required for liked songs") + + fields = ( + "items(added_at,track(id,name,duration_ms,external_ids(isrc)," + "artists(name),album(id,name,release_date))),next,total" + ) + offset = 0 + limit = 50 + position = 0 + items: list[dict[str, Any]] = [] + ordered_track_ids: list[str] = [] + + while True: + payload = await _request_json_with_retry( + self, + "https://api.spotify.com/v1/me/tracks", + params={"limit": limit, "offset": offset, "fields": fields}, + ) + raw_items = payload.get("items") or [] + + for raw in raw_items: + track = raw.get("track") + if not isinstance(track, dict): + continue + + track_id = track.get("id") + if not track_id: + continue + + artists = track.get("artists") or [] + artist_names = [ + str(artist.get("name")).strip() + for artist in artists + if isinstance(artist, dict) and artist.get("name") + ] + first_artist = artist_names[0] if artist_names else None + album = track.get("album") or {} + external_ids = track.get("external_ids") or {} + + items.append( + { + "spotify_track_id": track_id, + "position": position, + "added_at": raw.get("added_at"), + "artist": first_artist, + "title": track.get("name"), + "album": album.get("name"), + "duration_ms": track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + "artists": artist_names, + "album_id": album.get("id"), + "album_release_date": album.get("release_date"), + } + ) + ordered_track_ids.append(str(track_id)) + position += 1 + + next_url = payload.get("next") + if not next_url: + break + offset += limit + + snapshot_source = "\n".join(ordered_track_ids).encode("utf-8") + snapshot_id = hashlib.sha256(snapshot_source).hexdigest() + return snapshot_id, items + + async def get_saved_albums(self) -> tuple[str, list[dict[str, Any]]]: + """Fetch authenticated user's saved albums from Spotify. + + Returns: + A tuple ``(snapshot_id, items)`` where: + - ``snapshot_id`` is a deterministic SHA-256 hash of ordered album IDs. + - ``items`` is an ordered list of album dicts containing album metadata + and normalized ordered track lists suitable for album sync flows. + """ + if not self._provided_access_token: + raise RuntimeError("Spotify OAuth access_token is required for saved albums") + + offset = 0 + limit = 50 + saved_albums: list[dict[str, Any]] = [] + ordered_album_ids: list[str] = [] + + while True: + payload = await _request_json_with_retry( + self, + "https://api.spotify.com/v1/me/albums", + params={ + "limit": limit, + "offset": offset, + "fields": "items(added_at,album(id,name,artists(name),release_date,total_tracks)),next,total", + }, + ) + + for entry in payload.get("items") or []: + album = entry.get("album") + if not isinstance(album, dict): + continue + album_id = str(album.get("id") or "").strip() + if not album_id: + continue + ordered_album_ids.append(album_id) + saved_albums.append( + { + "album_id": album_id, + "added_at": entry.get("added_at"), + "name": album.get("name"), + "artists": [ + str(artist.get("name")).strip() + for artist in (album.get("artists") or []) + if isinstance(artist, dict) and artist.get("name") + ], + "release_date": album.get("release_date"), + "total_tracks": album.get("total_tracks"), + } + ) + + next_url = payload.get("next") + if not next_url: + break + offset += limit + + album_items: list[dict[str, Any]] = [] + for position, album_entry in enumerate(saved_albums): + album_id = str(album_entry.get("album_id") or "").strip() + encoded_album_id = urllib.parse.quote(album_id, safe="") + album_payload = await _request_json_with_retry( + self, + f"https://api.spotify.com/v1/albums/{encoded_album_id}", + params={ + "fields": ( + "id,name,artists(name),release_date,total_tracks," + "tracks(items(id,name,duration_ms,track_number,disc_number,artists(name),external_ids(isrc)),next)" + ) + }, + ) + + album_name = album_payload.get("name") or album_entry.get("name") + album_artists = [ + str(artist.get("name")).strip() + for artist in (album_payload.get("artists") or []) + if isinstance(artist, dict) and artist.get("name") + ] + tracks_page = album_payload.get("tracks") or {} + tracks: list[dict[str, Any]] = [] + track_position = 0 + while True: + for raw_track in tracks_page.get("items") or []: + if not isinstance(raw_track, dict): + continue + track_id = raw_track.get("id") + if not track_id: + continue + artists = raw_track.get("artists") or [] + first_artist = ( + artists[0].get("name") + if artists and isinstance(artists[0], dict) + else (album_artists[0] if album_artists else None) + ) + external_ids = raw_track.get("external_ids") or {} + tracks.append( + { + "spotify_track_id": track_id, + "position": track_position, + "artist": first_artist, + "title": raw_track.get("name"), + "album": album_name, + "duration_ms": raw_track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + "track_num": raw_track.get("track_number"), + "disc_num": raw_track.get("disc_number"), + } + ) + track_position += 1 + + next_tracks_url = tracks_page.get("next") + if not next_tracks_url: + break + tracks_page = await _request_json_with_retry(self, str(next_tracks_url)) + + album_items.append( + { + "album_id": album_id, + "position": position, + "added_at": album_entry.get("added_at"), + "name": album_name, + "artist": album_artists[0] if album_artists else None, + "artists": album_artists, + "release_date": album_payload.get("release_date") or album_entry.get("release_date"), + "total_tracks": album_payload.get("total_tracks") or album_entry.get("total_tracks"), + "tracks": tracks, + } + ) + + snapshot_source = "\n".join(ordered_album_ids).encode("utf-8") + snapshot_id = hashlib.sha256(snapshot_source).hexdigest() + return snapshot_id, album_items + + async def get_user_playlists(self) -> tuple[str, list[dict[str, Any]]]: + """Fetch authenticated user's playlists from Spotify. + + Returns: + A tuple ``(snapshot_id, items)`` where: + - ``snapshot_id`` is a deterministic SHA-256 hash of ordered playlist IDs. + - ``items`` is an ordered list of normalized playlist dicts with + keys: ``id``, ``name``, and ``track_count``. + """ + if not self._provided_access_token: + raise RuntimeError("Spotify OAuth access_token is required for user playlists") + + offset = 0 + limit = 50 + ordered_playlist_ids: list[str] = [] + items: list[dict[str, Any]] = [] + + while True: + payload = await _request_json_with_retry( + self, + "https://api.spotify.com/v1/me/playlists", + params={ + "limit": limit, + "offset": offset, + "fields": "items(id,name,tracks(total)),next,total", + }, + ) + raw_items = payload.get("items") or [] + for raw in raw_items: + if not isinstance(raw, dict): + continue + playlist_id = str(raw.get("id") or "").strip() + if not playlist_id: + continue + ordered_playlist_ids.append(playlist_id) + tracks = raw.get("tracks") or {} + items.append( + { + "id": playlist_id, + "name": raw.get("name"), + "track_count": int(tracks.get("total") or 0), + } + ) + + next_url = payload.get("next") + if not next_url: + break + offset += limit + + snapshot_source = "\n".join(ordered_playlist_ids).encode("utf-8") + snapshot_id = hashlib.sha256(snapshot_source).hexdigest() + return snapshot_id, items + + +async def _request_json_with_retry( + spotify_client: SpotifyPlaylistClient, + url: str, + params: dict[str, Any] | None = None, + *, + max_rate_limit_retries: int = 3, +) -> dict[str, Any]: + """Perform a Spotify GET request and retry on HTTP 429 responses.""" + unauthorized_retry_used = False + attempts = 0 + while True: + attempts += 1 + token = await asyncio.to_thread(spotify_client._get_access_token) + headers = {"Authorization": f"Bearer {token}"} + response = await asyncio.to_thread( + requests.get, + url, + params=params, + headers=headers, + timeout=spotify_client.timeout_sec, + ) + + if response.status_code == 401 and not unauthorized_retry_used: + unauthorized_retry_used = True + spotify_client._access_token = None + continue + + if response.status_code == 429: + if attempts > max_rate_limit_retries + 1: + raise RuntimeError("Spotify request failed (429: rate limit exceeded retries)") + retry_after = response.headers.get("Retry-After", "1") + try: + sleep_sec = float(retry_after) + except (TypeError, ValueError): + sleep_sec = 1.0 + await asyncio.sleep(max(0.0, sleep_sec)) + continue + + if response.status_code != 200: + raise RuntimeError(f"Spotify request failed ({response.status_code})") + return response.json() + + +async def get_playlist_items( + spotify_client: SpotifyPlaylistClient, + playlist_id: str, +) -> tuple[str, list[dict[str, Any]]]: + """Fetch all Spotify playlist tracks with pagination and return `(snapshot_id, ordered_items)`.""" + cleaned_playlist_id = (playlist_id or "").strip() + if not cleaned_playlist_id: + raise ValueError("playlist_id is required") + + encoded_id = urllib.parse.quote(cleaned_playlist_id, safe="") + fields = ( + "snapshot_id," + "tracks(items(added_at,track(id,name,duration_ms,external_ids(isrc),album(name),artists(name))),next)" + ) + payload = await _request_json_with_retry( + spotify_client, + spotify_client._PLAYLIST_URL.format(playlist_id=encoded_id), + params={"fields": fields, "limit": 100}, + ) + + snapshot_id = payload.get("snapshot_id") + if not snapshot_id: + raise RuntimeError("Spotify playlist response missing snapshot_id") + + ordered_items: list[dict[str, Any]] = [] + absolute_position = 0 + tracks_page = payload.get("tracks") or {} + while True: + raw_items = tracks_page.get("items") or [] + for raw in raw_items: + track = raw.get("track") + if track is None: + absolute_position += 1 + continue + + artists = track.get("artists") or [] + first_artist = artists[0].get("name") if artists and isinstance(artists[0], dict) else None + album = track.get("album") or {} + external_ids = track.get("external_ids") or {} + ordered_items.append( + { + "spotify_track_id": track.get("id"), + "position": absolute_position, + "added_at": raw.get("added_at"), + "artist": first_artist, + "title": track.get("name"), + "album": album.get("name"), + "duration_ms": track.get("duration_ms"), + "isrc": external_ids.get("isrc"), + } + ) + absolute_position += 1 + + next_url = tracks_page.get("next") + if not next_url: + break + tracks_page = await _request_json_with_retry(spotify_client, str(next_url)) + + return str(snapshot_id), ordered_items diff --git a/spotify/diff.py b/spotify/diff.py new file mode 100644 index 0000000..5e8c97e --- /dev/null +++ b/spotify/diff.py @@ -0,0 +1,46 @@ +"""Diff helpers for Spotify playlist snapshots.""" + +from __future__ import annotations + +from collections import defaultdict, deque + +def diff_playlist(prev: list[dict], curr: list[dict]) -> dict[str, list[dict]]: + """Return duplicate-aware `added`, `removed`, and `moved` playlist items.""" + prev_occurrences: dict[str | None, deque[int]] = defaultdict(deque) + for idx, item in enumerate(prev): + prev_occurrences[item.get("spotify_track_id")].append(idx) + + matched_curr_to_prev_index: dict[int, int] = {} + added: list[dict] = [] + for curr_idx, curr_item in enumerate(curr): + item_id = curr_item.get("spotify_track_id") + remaining = prev_occurrences.get(item_id) + if remaining: + matched_curr_to_prev_index[curr_idx] = remaining.popleft() + else: + added.append(curr_item) + + matched_prev_indices = set(matched_curr_to_prev_index.values()) + removed: list[dict] = [ + prev[prev_idx] for prev_idx in range(len(prev)) if prev_idx not in matched_prev_indices + ] + + moved: list[dict] = [] + for curr_idx, curr_item in enumerate(curr): + prev_idx = matched_curr_to_prev_index.get(curr_idx) + if prev_idx is None: + continue + prev_item = prev[prev_idx] + prev_pos = int(prev_item.get("position", prev_idx)) + curr_pos = int(curr_item.get("position", curr_idx)) + if prev_pos != curr_pos: + moved.append( + { + "spotify_track_id": curr_item.get("spotify_track_id"), + "from_position": prev_pos, + "to_position": curr_pos, + "item": curr_item, + } + ) + + return {"added": added, "removed": removed, "moved": moved} diff --git a/spotify/oauth_client.py b/spotify/oauth_client.py new file mode 100644 index 0000000..cd0c481 --- /dev/null +++ b/spotify/oauth_client.py @@ -0,0 +1,53 @@ +"""Spotify OAuth client helpers.""" + +from __future__ import annotations + +import os +import time +from urllib.parse import urlencode + +import requests + +SPOTIFY_AUTH_URL = "https://accounts.spotify.com/authorize" +SPOTIFY_TOKEN_URL = "https://accounts.spotify.com/api/token" + + +def build_auth_url(client_id: str, redirect_uri: str, scope: str, state: str) -> str: + """Build Spotify authorization URL.""" + params = { + "client_id": client_id, + "response_type": "code", + "redirect_uri": redirect_uri, + "scope": scope, + "state": state, + } + return f"{SPOTIFY_AUTH_URL}?{urlencode(params)}" + + +def refresh_access_token( + client_id: str, + client_secret: str, + refresh_token: str, +) -> dict: + """Exchange refresh token for a new Spotify access token payload. + + Returns: + Parsed JSON token response from Spotify. + + Raises: + Exception: When request fails or response code is non-200. + """ + response = requests.post( + SPOTIFY_TOKEN_URL, + data={ + "grant_type": "refresh_token", + "refresh_token": refresh_token, + "client_id": client_id, + "client_secret": client_secret, + }, + timeout=20, + ) + if response.status_code != 200: + detail = (response.text or "").strip() or f"status={response.status_code}" + raise Exception(f"spotify refresh failed: {detail}") + return response.json() diff --git a/spotify/oauth_store.py b/spotify/oauth_store.py new file mode 100644 index 0000000..20fa7f6 --- /dev/null +++ b/spotify/oauth_store.py @@ -0,0 +1,182 @@ +"""SQLite persistence for optional Spotify OAuth tokens.""" + +from __future__ import annotations + +import sqlite3 +import time +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +from spotify.oauth_client import refresh_access_token + + +@dataclass +class SpotifyOAuthToken: + access_token: str + refresh_token: str + expires_at: int # epoch seconds + scope: str + + +class SpotifyOAuthStore: + """Single-row SQLite storage for Spotify OAuth tokens.""" + + def __init__(self, db_path: Path): + self.db_path = Path(db_path) + self._ensure_table() + + def _connect(self) -> sqlite3.Connection: + conn = sqlite3.connect(str(self.db_path), check_same_thread=False, timeout=30) + conn.row_factory = sqlite3.Row + return conn + + def _ensure_table(self): + """Create token table when it does not already exist.""" + conn = self._connect() + try: + cur = conn.cursor() + cur.execute( + """ + CREATE TABLE IF NOT EXISTS spotify_oauth_tokens ( + id INTEGER PRIMARY KEY, + access_token TEXT NOT NULL, + refresh_token TEXT NOT NULL, + expires_at INTEGER NOT NULL, + scope TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """ + ) + conn.commit() + finally: + conn.close() + + def save(self, token: SpotifyOAuthToken) -> None: + """Upsert a single token row using fixed key ``id=1``.""" + updated_at = datetime.now(timezone.utc).isoformat() + conn = self._connect() + try: + cur = conn.cursor() + cur.execute( + """ + INSERT INTO spotify_oauth_tokens (id, access_token, refresh_token, expires_at, scope, updated_at) + VALUES (1, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + access_token=excluded.access_token, + refresh_token=excluded.refresh_token, + expires_at=excluded.expires_at, + scope=excluded.scope, + updated_at=excluded.updated_at + """, + ( + token.access_token, + token.refresh_token, + int(token.expires_at), + token.scope, + updated_at, + ), + ) + conn.commit() + finally: + conn.close() + + def load(self) -> Optional[SpotifyOAuthToken]: + """Load token from row ``id=1``; return ``None`` when absent.""" + conn = self._connect() + try: + cur = conn.cursor() + cur.execute( + """ + SELECT access_token, refresh_token, expires_at, scope + FROM spotify_oauth_tokens + WHERE id=1 + LIMIT 1 + """ + ) + row = cur.fetchone() + if not row: + return None + return SpotifyOAuthToken( + access_token=str(row["access_token"]), + refresh_token=str(row["refresh_token"]), + expires_at=int(row["expires_at"]), + scope=str(row["scope"]), + ) + finally: + conn.close() + + def clear(self) -> None: + """Delete stored token row.""" + conn = self._connect() + try: + cur = conn.cursor() + cur.execute("DELETE FROM spotify_oauth_tokens WHERE id=1") + conn.commit() + finally: + conn.close() + + def get_valid_token( + self, + client_id: str, + client_secret: str, + config: Optional[dict] = None, + ) -> Optional[SpotifyOAuthToken]: + """Return a valid token, refreshing and persisting it when expired. + + Behavior: + - If no token is stored, return ``None``. + - If token is not expired, return as-is. + - If expired, attempt refresh and persist updated token. + - If refresh fails, clear stored token and return ``None``. + """ + token = self.load() + if token is None: + return None + + now = int(time.time()) + if int(token.expires_at) > now: + return token + + try: + payload = refresh_access_token( + client_id=client_id, + client_secret=client_secret, + refresh_token=token.refresh_token, + ) + new_access_token = str(payload.get("access_token") or "").strip() + expires_in = payload.get("expires_in") + if not new_access_token or expires_in is None: + raise ValueError("refresh payload missing access_token or expires_in") + refreshed = SpotifyOAuthToken( + access_token=new_access_token, + refresh_token=str(payload.get("refresh_token") or token.refresh_token), + expires_at=now + int(expires_in), + scope=str(payload.get("scope") or token.scope), + ) + self.save(refreshed) + return refreshed + except Exception: + self.clear() + telegram_cfg = (config or {}).get("telegram") if isinstance(config, dict) else None + if isinstance(telegram_cfg, dict) and bool(telegram_cfg.get("enabled")): + try: + send_telegram_message( + config, + "Spotify OAuth token expired and refresh failed. Reconnect required.", + ) + except Exception: + # Notification path is best-effort only. + pass + return None + + +def send_telegram_message(config: Optional[dict], message: str) -> bool: + """Best-effort Telegram notification hook for OAuth lifecycle events.""" + try: + from engine.core import telegram_notify + + return bool(telegram_notify(config or {}, message)) + except Exception: + return False diff --git a/spotify/resolve.py b/spotify/resolve.py new file mode 100644 index 0000000..b1e2000 --- /dev/null +++ b/spotify/resolve.py @@ -0,0 +1,196 @@ +"""Spotify resolution stubs.""" + +from __future__ import annotations + +import logging +from typing import Any + +_LOG = logging.getLogger(__name__) + +_SOURCE_PRIORITY = ["youtube_music", "youtube", "soundcloud", "bandcamp"] + + +def log_resolution(spotify_id: str, best_candidate: dict, score: float, reason: str) -> None: + """Log a structured Spotify resolver decision. + + Example logging configuration: + ```python + import logging + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(name)s %(message)s", + ) + ``` + """ + media_url = (best_candidate or {}).get("media_url") + _LOG.info( + "resolver track_id=%s best_match=%s score=%s reason=%s", + spotify_id, + media_url, + score, + reason, + ) + + +def score_search_candidates(candidates: list[dict], spotify_track: dict) -> dict: + """Return the best candidate using deterministic title/artist/duration scoring. + + Scoring behavior: + - Title match: candidates whose `title` matches the Spotify track title are + preferred. Match is case-insensitive and whitespace-normalized. + - Artist match: candidates whose `artist` (or `artist_detected`) matches the + Spotify track artist are preferred with the same normalization rules. + - Duration proximity: candidates with duration closest to the Spotify track + are preferred. Duration tolerance is +/- 3 seconds (higher preference), + then increasing absolute difference. + + Tie-breaking strategy: + - If multiple candidates have the same score tuple, source order is used. + Lower index in `_SOURCE_PRIORITY` wins. + - If source priority is also equal, original list order is preserved. + + Expected candidate fields: + - `title` + - `artist` or `artist_detected` + - `duration` (seconds) or `duration_sec` or `duration_ms` + - `source` + + The returned value is the selected candidate dictionary. If `candidates` is + empty, an empty dictionary is returned. + """ + if not candidates: + return {} + + expected_title = _normalize_text(spotify_track.get("title") or spotify_track.get("name")) + expected_artist = _normalize_text(spotify_track.get("artist")) + expected_duration_sec = _to_seconds(spotify_track) + + scored: list[tuple[tuple[int, int, int, int], int, dict]] = [] + for idx, candidate in enumerate(candidates): + candidate_title = _normalize_text(candidate.get("title")) + candidate_artist = _normalize_text(candidate.get("artist") or candidate.get("artist_detected")) + candidate_duration_sec = _to_seconds(candidate) + + title_exact = int(bool(expected_title and candidate_title == expected_title)) + artist_exact = int(bool(expected_artist and candidate_artist == expected_artist)) + + if expected_duration_sec is None or candidate_duration_sec is None: + duration_delta = 10**9 + else: + duration_delta = abs(candidate_duration_sec - expected_duration_sec) + within_tolerance = int(duration_delta <= 3) + + source_rank = _source_rank(candidate.get("source")) + score_tuple = (title_exact, artist_exact, within_tolerance, -duration_delta) + scored.append((score_tuple, source_rank, candidate)) + + # Stable sort ensures original order for identical score + source rank. + scored.sort(key=lambda item: (item[0], item[1]), reverse=True) + return scored[0][2] + + +async def execute_search(search_service, query: str) -> list[dict]: + """Run an async search and return normalized result dictionaries. + + This helper calls `search_service.search(query)`, catches/logs search + failures, and returns a normalized `list[dict]` where every item contains: + `media_url`, `title`, `duration`, `source_id`, and `extra`. + """ + try: + raw_results = await search_service.search(query) + except Exception: + _LOG.exception("Search execution failed for query=%r", query) + return [] + + if not isinstance(raw_results, list): + return [] + + normalized: list[dict] = [] + for item in raw_results: + if not isinstance(item, dict): + continue + normalized.append( + { + "media_url": item.get("media_url"), + "title": item.get("title"), + "duration": item.get("duration"), + "source_id": item.get("source_id"), + "extra": item.get("extra"), + } + ) + return normalized + + +async def resolve_spotify_track(spotify_track: dict, search_service) -> dict: + """Resolve a Spotify track dictionary into the best available media candidate. + + This function builds a deterministic query from Spotify artist/title, runs + async search execution, scores candidates, and returns the best candidate. + If no candidates are returned, it returns an empty dictionary. + """ + artist = str(spotify_track.get("artist") or "").strip() + title = str(spotify_track.get("title") or spotify_track.get("name") or "").strip() + query = f"{artist} - {title} official audio".strip() + _LOG.info("Resolving Spotify track using query=%r", query) + + results = await execute_search(search_service, query) + if not results: + _LOG.info("No search results for query=%r", query) + return {} + + # `score_search_candidates` expects `source`, while execute_search output + # uses `source_id`; map for deterministic tie-breaking compatibility. + scoring_results = [ + {**candidate, "source": candidate.get("source_id")} for candidate in results + ] + best = score_search_candidates(scoring_results, spotify_track) + if not best: + _LOG.info("No candidate selected for query=%r", query) + return {} + + # Preserve the execute_search output key shape. + best_out = { + "media_url": best.get("media_url"), + "title": best.get("title"), + "duration": best.get("duration"), + "source_id": best.get("source_id"), + "extra": best.get("extra"), + } + _LOG.info( + "Resolved Spotify track query=%r source_id=%r media_url=%r", + query, + best_out.get("source_id"), + best_out.get("media_url"), + ) + return best_out + + +def _normalize_text(value: Any) -> str: + if value is None: + return "" + return " ".join(str(value).casefold().strip().split()) + + +def _to_seconds(data: dict) -> int | None: + if "duration_ms" in data and data.get("duration_ms") is not None: + try: + return int(round(float(data["duration_ms"]) / 1000.0)) + except (TypeError, ValueError): + return None + for key in ("duration", "duration_sec"): + if data.get(key) is None: + continue + try: + return int(round(float(data[key]))) + except (TypeError, ValueError): + return None + return None + + +def _source_rank(source: Any) -> int: + src = _normalize_text(source) + try: + return len(_SOURCE_PRIORITY) - _SOURCE_PRIORITY.index(src) + except ValueError: + return 0 diff --git a/spotify/search_queries.py b/spotify/search_queries.py new file mode 100644 index 0000000..8f2563c --- /dev/null +++ b/spotify/search_queries.py @@ -0,0 +1,51 @@ +"""Deterministic search-query builders for Spotify track lookups.""" + +from __future__ import annotations + + +def build_search_query(spotify_track: dict, prefer_official: bool = True) -> str: + """Build a deterministic search query in the form `Artist - Title {keywords}`. + + Behavior: + - Always starts with `Artist - Title`. + - Appends `official audio` when `prefer_official` is `True`. + - Appends `official music video` when `prefer_official` is `False`. + + Examples: + - `build_search_query({"artist": "Daft Punk", "title": "One More Time"})` + -> `"Daft Punk - One More Time official audio"` + - `build_search_query({"artist": "Daft Punk", "title": "One More Time"}, prefer_official=False)` + -> `"Daft Punk - One More Time official music video"` + """ + track = spotify_track or {} + artist = _extract_artist(track) or "Unknown Artist" + title = _extract_title(track) or "Unknown Title" + keywords = "official audio" if prefer_official else "official music video" + return f"{artist} - {title} {keywords}" + + +def _extract_artist(track: dict) -> str | None: + artists = track.get("artists") + if isinstance(artists, list): + names = [] + for entry in artists: + if isinstance(entry, dict): + name = entry.get("name") + else: + name = entry + if isinstance(name, str) and name.strip(): + names.append(name.strip()) + if names: + return ", ".join(names) + artist = track.get("artist") + if isinstance(artist, str) and artist.strip(): + return artist.strip() + return None + + +def _extract_title(track: dict) -> str | None: + for key in ("title", "name", "track"): + value = track.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return None diff --git a/tests/test_album_consistency.py b/tests/test_album_consistency.py new file mode 100644 index 0000000..626c020 --- /dev/null +++ b/tests/test_album_consistency.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from metadata.normalize import normalize_music_metadata +from metadata.types import MusicMetadata + + +def test_album_download_metadata_normalization_consistency() -> None: + track_three = MusicMetadata( + title="Song Three - Topic", + artist="Main Artist", + album="Album Name", + album_artist="Main Artist", + track_num=3, + disc_num=1, + date="2024/07/11", + genre="Pop", + isrc="USAAA1111113", + ) + track_three.album_artist = "" + + tracks = [ + MusicMetadata( + title="Song One (Official Audio)", + artist="Main Artist", + album="Album Name", + album_artist="Main Artist", + track_num=1, + disc_num=1, + date="2024-07", + genre="Pop", + isrc="USAAA1111111", + ), + MusicMetadata( + title="Song Two [HD]", + artist="Main Artist", + album="Album Name", + album_artist="Main Artist, Guest Artist", + track_num=2, + disc_num=1, + date="2024", + genre="Pop", + isrc="USAAA1111112", + ), + track_three, + ] + + normalized = [normalize_music_metadata(track) for track in tracks] + + assert {track.album_artist for track in normalized} == {"Main Artist"} + assert [track.title for track in normalized] == ["Song One", "Song Two", "Song Three"] + assert [track.date for track in normalized] == ["2024", "2024", "2024-07-11"] + assert [track.track_num for track in normalized] == [1, 2, 3] diff --git a/tests/test_api_intent_execute.py b/tests/test_api_intent_execute.py new file mode 100644 index 0000000..b9c01c9 --- /dev/null +++ b/tests/test_api_intent_execute.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import importlib +import sys + +import pytest + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +def _build_client(monkeypatch) -> TestClient: + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return TestClient(module.app) + + +def test_api_intent_execute_delegates_to_dispatcher(monkeypatch) -> None: + client = _build_client(monkeypatch) + + expected = { + "status": "accepted", + "intent_type": "spotify_album", + "identifier": "album-123", + "message": "album sync completed", + "enqueued_count": 7, + } + + async def _fake_dispatch_intent(*, intent_type, identifier, config, db, queue, spotify_client): + assert intent_type == "spotify_album" + assert identifier == "album-123" + assert config is not None + assert db is not None + assert queue is not None + assert spotify_client is not None + return expected + + monkeypatch.setattr("api.main.dispatch_intent", _fake_dispatch_intent) + + response = client.post( + "/api/intent/execute", + json={ + "intent_type": "spotify_album", + "identifier": "album-123", + }, + ) + + assert response.status_code == 200 + assert response.json() == expected + + +def test_api_intent_execute_invalid_intent_type_returns_400(monkeypatch) -> None: + client = _build_client(monkeypatch) + + response = client.post( + "/api/intent/execute", + json={ + "intent_type": "invalid_intent", + "identifier": "abc", + }, + ) + + assert response.status_code == 400 diff --git a/tests/test_canonical_resolver.py b/tests/test_canonical_resolver.py new file mode 100644 index 0000000..80f5a53 --- /dev/null +++ b/tests/test_canonical_resolver.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +from metadata.canonical import CanonicalMetadataResolver + + +def test_canonical_resolver_prefers_musicbrainz_first(monkeypatch) -> None: + calls = {"mb": 0, "sp": 0} + + class _MB: + def __init__(self, *, min_confidence=0.70): + pass + + def resolve_track(self, artist, track, *, album=None): + calls["mb"] += 1 + return {"provider": "musicbrainz", "artist": artist, "track": track} + + def resolve_album(self, artist, album): + return None + + class _SP: + def __init__(self, **kwargs): + pass + + def resolve_track(self, artist, track, album=None): + calls["sp"] += 1 + return {"provider": "spotify", "artist": artist, "track": track} + + def resolve_album(self, artist, album): + return None + + monkeypatch.setattr("metadata.canonical.MusicBrainzMetadataProvider", _MB) + monkeypatch.setattr("metadata.canonical.SpotifyMetadataProvider", _SP) + monkeypatch.setattr("metadata.canonical._validate_spotify_premium", lambda _token: True) + + resolver = CanonicalMetadataResolver( + config={ + "spotify": { + "client_id": "id", + "client_secret": "secret", + "oauth_access_token": "oauth-token", + } + } + ) + out = resolver.resolve_track("Artist", "Track") + + assert out["provider"] == "musicbrainz" + assert calls["mb"] == 1 + assert calls["sp"] == 0 + + +def test_canonical_resolver_does_not_use_spotify_without_premium_validated_oauth(monkeypatch) -> None: + calls = {"sp": 0} + + class _MB: + def __init__(self, *, min_confidence=0.70): + pass + + def resolve_track(self, artist, track, *, album=None): + return None + + def resolve_album(self, artist, album): + return None + + class _SP: + def __init__(self, **kwargs): + pass + + def resolve_track(self, artist, track, album=None): + calls["sp"] += 1 + return {"provider": "spotify", "artist": artist, "track": track} + + def resolve_album(self, artist, album): + return None + + monkeypatch.setattr("metadata.canonical.MusicBrainzMetadataProvider", _MB) + monkeypatch.setattr("metadata.canonical.SpotifyMetadataProvider", _SP) + monkeypatch.setattr("metadata.canonical._validate_spotify_premium", lambda _token: False) + + resolver = CanonicalMetadataResolver( + config={ + "spotify": { + "client_id": "id", + "client_secret": "secret", + "oauth_access_token": "oauth-token", + } + } + ) + out = resolver.resolve_track("Artist", "Track") + + assert out is None + assert calls["sp"] == 0 + + +def test_canonical_resolver_uses_spotify_fallback_when_oauth_and_premium_valid(monkeypatch) -> None: + calls = {"sp": 0} + + class _MB: + def __init__(self, *, min_confidence=0.70): + pass + + def resolve_track(self, artist, track, *, album=None): + return None + + def resolve_album(self, artist, album): + return None + + class _SP: + def __init__(self, **kwargs): + pass + + def resolve_track(self, artist, track, album=None): + calls["sp"] += 1 + return {"provider": "spotify", "artist": artist, "track": track} + + def resolve_album(self, artist, album): + return None + + monkeypatch.setattr("metadata.canonical.MusicBrainzMetadataProvider", _MB) + monkeypatch.setattr("metadata.canonical.SpotifyMetadataProvider", _SP) + monkeypatch.setattr("metadata.canonical._validate_spotify_premium", lambda _token: True) + + resolver = CanonicalMetadataResolver( + config={ + "spotify": { + "client_id": "id", + "client_secret": "secret", + "oauth_access_token": "oauth-token", + } + } + ) + out = resolver.resolve_track("Artist", "Track") + + assert out["provider"] == "spotify" + assert calls["sp"] == 1 diff --git a/tests/test_canonical_resolver_behavior.py b/tests/test_canonical_resolver_behavior.py new file mode 100644 index 0000000..8929328 --- /dev/null +++ b/tests/test_canonical_resolver_behavior.py @@ -0,0 +1,195 @@ +from __future__ import annotations + +import pytest + +pytest.importorskip("google.auth") + +from metadata.canonical import CanonicalMetadataResolver + + +def test_musicbrainz_only_resolution_without_spotify(monkeypatch) -> None: + calls = {"mb": 0, "sp": 0} + + class _MockMusicBrainzService: + def __init__(self, *, min_confidence=0.70): + self.min_confidence = min_confidence + + def resolve_track(self, artist, track, *, album=None): + calls["mb"] += 1 + return { + "provider": "musicbrainz", + "artist": artist, + "title": track, + "album": album, + "musicbrainz_recording_id": "mb-rec-1", + } + + def resolve_album(self, artist, album): + return None + + class _MockSpotifyProvider: + def __init__(self, **kwargs): + pass + + def resolve_track(self, artist, track, album=None): + calls["sp"] += 1 + return {"provider": "spotify", "artist": artist, "title": track} + + def resolve_album(self, artist, album): + return None + + monkeypatch.setattr("metadata.canonical.MusicBrainzMetadataProvider", _MockMusicBrainzService) + monkeypatch.setattr("metadata.canonical.SpotifyMetadataProvider", _MockSpotifyProvider) + monkeypatch.setattr("metadata.canonical._validate_spotify_premium", lambda _token: True) + + resolver = CanonicalMetadataResolver(config={}) + resolved = resolver.resolve_track("Artist Name", "Song Name", album="Album Name") + + assert resolved is not None + assert resolved["provider"] == "musicbrainz" + assert resolved["musicbrainz_recording_id"] == "mb-rec-1" + assert calls["mb"] == 1 + assert calls["sp"] == 0 + + +def test_spotify_fallback_only_when_oauth_and_premium_present(monkeypatch) -> None: + calls = {"mb": 0, "sp": 0} + + class _MockMusicBrainzService: + def __init__(self, *, min_confidence=0.70): + self.min_confidence = min_confidence + + def resolve_track(self, artist, track, *, album=None): + calls["mb"] += 1 + return None + + def resolve_album(self, artist, album): + return None + + class _MockSpotifyProvider: + def __init__(self, **kwargs): + pass + + def resolve_track(self, artist, track, album=None): + calls["sp"] += 1 + return { + "provider": "spotify", + "artist": artist, + "title": track, + "album": album, + "spotify_id": "sp-track-1", + } + + def resolve_album(self, artist, album): + return None + + monkeypatch.setattr("metadata.canonical.MusicBrainzMetadataProvider", _MockMusicBrainzService) + monkeypatch.setattr("metadata.canonical.SpotifyMetadataProvider", _MockSpotifyProvider) + monkeypatch.setattr("metadata.canonical._validate_spotify_premium", lambda _token: True) + + resolver = CanonicalMetadataResolver( + config={ + "spotify": { + "client_id": "client-id", + "client_secret": "client-secret", + "oauth_access_token": "oauth-token", + } + } + ) + resolved = resolver.resolve_track("Artist Name", "Song Name", album="Album Name") + + assert resolved is not None + assert resolved["provider"] == "spotify" + assert resolved["spotify_id"] == "sp-track-1" + assert calls["mb"] == 1 + assert calls["sp"] == 1 + + +def test_spotify_fallback_rejected_when_oauth_missing(monkeypatch) -> None: + calls = {"mb": 0, "sp": 0} + + class _MockMusicBrainzService: + def __init__(self, *, min_confidence=0.70): + self.min_confidence = min_confidence + + def resolve_track(self, artist, track, *, album=None): + calls["mb"] += 1 + return None + + def resolve_album(self, artist, album): + return None + + class _MockSpotifyProvider: + def __init__(self, **kwargs): + pass + + def resolve_track(self, artist, track, album=None): + calls["sp"] += 1 + return {"provider": "spotify", "artist": artist, "title": track} + + def resolve_album(self, artist, album): + return None + + monkeypatch.setattr("metadata.canonical.MusicBrainzMetadataProvider", _MockMusicBrainzService) + monkeypatch.setattr("metadata.canonical.SpotifyMetadataProvider", _MockSpotifyProvider) + monkeypatch.setattr("metadata.canonical._validate_spotify_premium", lambda _token: True) + + resolver = CanonicalMetadataResolver( + config={ + "spotify": { + "client_id": "client-id", + "client_secret": "client-secret", + "oauth_access_token": "", + } + } + ) + resolved = resolver.resolve_track("Artist Name", "Song Name", album="Album Name") + + assert resolved is None + assert calls["mb"] == 1 + assert calls["sp"] == 0 + + +def test_spotify_fallback_rejected_when_non_premium(monkeypatch) -> None: + calls = {"mb": 0, "sp": 0} + + class _MockMusicBrainzService: + def __init__(self, *, min_confidence=0.70): + self.min_confidence = min_confidence + + def resolve_track(self, artist, track, *, album=None): + calls["mb"] += 1 + return None + + def resolve_album(self, artist, album): + return None + + class _MockSpotifyProvider: + def __init__(self, **kwargs): + pass + + def resolve_track(self, artist, track, album=None): + calls["sp"] += 1 + return {"provider": "spotify", "artist": artist, "title": track} + + def resolve_album(self, artist, album): + return None + + monkeypatch.setattr("metadata.canonical.MusicBrainzMetadataProvider", _MockMusicBrainzService) + monkeypatch.setattr("metadata.canonical.SpotifyMetadataProvider", _MockSpotifyProvider) + monkeypatch.setattr("metadata.canonical._validate_spotify_premium", lambda _token: False) + + resolver = CanonicalMetadataResolver( + config={ + "spotify": { + "client_id": "client-id", + "client_secret": "client-secret", + "oauth_access_token": "oauth-token", + } + } + ) + resolved = resolver.resolve_track("Artist Name", "Song Name", album="Album Name") + + assert resolved is None + assert calls["mb"] == 1 + assert calls["sp"] == 0 diff --git a/tests/test_db_migration_channel_id.py b/tests/test_db_migration_channel_id.py new file mode 100644 index 0000000..18c3a3b --- /dev/null +++ b/tests/test_db_migration_channel_id.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import sqlite3 +import sys +from pathlib import Path +from types import ModuleType +from types import SimpleNamespace + + +engine_pkg = ModuleType("engine") +engine_pkg.__path__ = [str(Path(__file__).resolve().parents[1] / "engine")] +sys.modules.setdefault("engine", engine_pkg) + +metadata_pkg = ModuleType("metadata") +metadata_pkg.__path__ = [str(Path(__file__).resolve().parents[1] / "metadata")] +sys.modules.setdefault("metadata", metadata_pkg) +metadata_queue_mod = ModuleType("metadata.queue") +setattr(metadata_queue_mod, "enqueue_metadata", lambda *_args, **_kwargs: None) +sys.modules.setdefault("metadata.queue", metadata_queue_mod) + +google_mod = ModuleType("google") +google_auth_mod = ModuleType("google.auth") +google_auth_ex_mod = ModuleType("google.auth.exceptions") +setattr(google_auth_ex_mod, "RefreshError", Exception) +sys.modules.setdefault("google", google_mod) +sys.modules.setdefault("google.auth", google_auth_mod) +sys.modules.setdefault("google.auth.exceptions", google_auth_ex_mod) + +from engine.job_queue import ensure_download_history_table, record_download_history + + +def test_migration_adds_channel_id_and_preserves_history(tmp_path) -> None: + db_path = tmp_path / "legacy_history.db" + conn = sqlite3.connect(str(db_path)) + try: + cur = conn.cursor() + # Legacy schema without channel_id column. + cur.execute( + """ + CREATE TABLE download_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + video_id TEXT, + title TEXT, + filename TEXT, + destination TEXT, + source TEXT, + status TEXT, + created_at TEXT, + completed_at TEXT, + file_size_bytes INTEGER, + input_url TEXT, + canonical_url TEXT, + external_id TEXT + ) + """ + ) + cur.execute( + """ + INSERT INTO download_history ( + video_id, title, filename, destination, source, status, + created_at, completed_at, file_size_bytes, + input_url, canonical_url, external_id + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + "legacy123", + "Legacy Title", + "Legacy File.mp3", + str(tmp_path), + "youtube", + "completed", + "2026-01-01T00:00:00Z", + "2026-01-01T00:00:00Z", + 1234, + "https://www.youtube.com/watch?v=legacy123", + "https://www.youtube.com/watch?v=legacy123", + "legacy123", + ), + ) + conn.commit() + + ensure_download_history_table(conn) + + columns = [row[1] for row in conn.execute("PRAGMA table_info(download_history)").fetchall()] + assert "channel_id" in columns + + # Legacy row remains and channel_id is NULL/default. + legacy_row = conn.execute( + "SELECT video_id, title, channel_id FROM download_history WHERE video_id = ?", + ("legacy123",), + ).fetchone() + assert legacy_row == ("legacy123", "Legacy Title", None) + + preserved_count = conn.execute("SELECT COUNT(*) FROM download_history").fetchone()[0] + assert preserved_count == 1 + finally: + conn.close() + + # New writes should populate channel_id. + output_file = tmp_path / "new-track.mp3" + output_file.write_bytes(b"audio-bytes") + job = SimpleNamespace( + id="job-new-1", + url="https://www.youtube.com/watch?v=new123", + input_url="https://www.youtube.com/watch?v=new123", + external_id="new123", + source="youtube", + canonical_url="https://www.youtube.com/watch?v=new123", + origin="single", + origin_id="", + ) + meta = {"video_id": "new123", "title": "New Track", "channel_id": "UC_NEW_CHANNEL"} + record_download_history(str(db_path), job, str(output_file), meta=meta) + + conn = sqlite3.connect(str(db_path)) + try: + total_count = conn.execute("SELECT COUNT(*) FROM download_history").fetchone()[0] + assert total_count == 2 + + new_row = conn.execute( + """ + SELECT video_id, external_id, channel_id + FROM download_history + WHERE video_id = ? + ORDER BY id DESC + LIMIT 1 + """, + ("new123",), + ).fetchone() + assert new_row == ("new123", "new123", "UC_NEW_CHANNEL") + finally: + conn.close() diff --git a/tests/test_download_jobs_indexes.py b/tests/test_download_jobs_indexes.py new file mode 100644 index 0000000..8b466b6 --- /dev/null +++ b/tests/test_download_jobs_indexes.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import sqlite3 +import sys +from pathlib import Path +from types import ModuleType + + +engine_pkg = ModuleType("engine") +engine_pkg.__path__ = [str(Path(__file__).resolve().parents[1] / "engine")] +sys.modules.setdefault("engine", engine_pkg) + +metadata_pkg = ModuleType("metadata") +metadata_pkg.__path__ = [str(Path(__file__).resolve().parents[1] / "metadata")] +sys.modules.setdefault("metadata", metadata_pkg) +metadata_queue_mod = ModuleType("metadata.queue") +setattr(metadata_queue_mod, "enqueue_metadata", lambda *_args, **_kwargs: None) +sys.modules.setdefault("metadata.queue", metadata_queue_mod) + +google_mod = ModuleType("google") +google_auth_mod = ModuleType("google.auth") +google_auth_ex_mod = ModuleType("google.auth.exceptions") +setattr(google_auth_ex_mod, "RefreshError", Exception) +sys.modules.setdefault("google", google_mod) +sys.modules.setdefault("google.auth", google_auth_mod) +sys.modules.setdefault("google.auth.exceptions", google_auth_ex_mod) + +from engine.job_queue import ensure_download_jobs_table + + +def test_download_job_duplicate_detection_indexes_exist(tmp_path) -> None: + db_path = tmp_path / "jobs.sqlite" + conn = sqlite3.connect(str(db_path)) + try: + ensure_download_jobs_table(conn) + rows = conn.execute("PRAGMA index_list(download_jobs)").fetchall() + finally: + conn.close() + + index_names = {row[1] for row in rows} + assert "idx_download_jobs_canonical_dest_status_created" in index_names + assert "idx_download_jobs_url_dest_status_created" in index_names diff --git a/tests/test_downloaded_tracks.py b/tests/test_downloaded_tracks.py new file mode 100644 index 0000000..4c6b6f8 --- /dev/null +++ b/tests/test_downloaded_tracks.py @@ -0,0 +1,29 @@ +from db.downloaded_tracks import has_downloaded_isrc, record_downloaded_track + + +def test_record_downloaded_track_and_lookup(tmp_path, monkeypatch) -> None: + db_path = tmp_path / "downloaded_tracks.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + + record_downloaded_track( + playlist_id="playlist-a", + isrc="USABC1234567", + file_path="/music/playlist-a/01 - Track.mp3", + ) + + assert has_downloaded_isrc("playlist-a", "USABC1234567") is True + + +def test_has_downloaded_isrc_false_for_other_playlist_or_isrc(tmp_path, monkeypatch) -> None: + db_path = tmp_path / "downloaded_tracks.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + + record_downloaded_track( + playlist_id="playlist-a", + isrc="USABC1234567", + file_path="/music/playlist-a/01 - Track.mp3", + ) + + assert has_downloaded_isrc("playlist-b", "USABC1234567") is False + assert has_downloaded_isrc("playlist-a", "USZZZ9999999") is False + diff --git a/tests/test_full_validation_pipeline.py b/tests/test_full_validation_pipeline.py new file mode 100644 index 0000000..81024b7 --- /dev/null +++ b/tests/test_full_validation_pipeline.py @@ -0,0 +1,96 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from db.downloaded_tracks import has_downloaded_isrc +from download.worker import DownloadWorker, JOB_STATUS_VALIDATION_FAILED +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track + + +class _MockQueue: + def __init__(self) -> None: + self.items: list[dict[str, Any]] = [] + + def enqueue(self, payload: dict[str, Any]) -> None: + self.items.append(payload) + + +class _MockSearchService: + def __init__(self, results: list[dict[str, Any]]) -> None: + self._results = results + self.calls: list[str] = [] + + async def search(self, query: str) -> list[dict[str, Any]]: + self.calls.append(query) + return list(self._results) + + +class _MockDownloader: + def download(self, media_url: str) -> str: + return "/tmp/resolved-track.mp3" + + +def test_full_pipeline_validation_failure_does_not_enable_idempotent_skip(tmp_path, monkeypatch) -> None: + db_path = tmp_path / "validation_pipeline.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + + # Force worker validation to fail before tagging/recording. + monkeypatch.setattr("download.worker.validate_duration", lambda *_args, **_kwargs: False) + monkeypatch.setattr("download.worker.get_media_duration", lambda _path: 1.0) + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + + # Ensure queued payload metadata includes expected_ms for validation gating. + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.merge_metadata", + lambda spotify_data, _mb, _ytdlp: { + "title": spotify_data.get("title", "Unknown"), + "artist": spotify_data.get("artist", "Unknown"), + "album": "Unknown", + "album_artist": spotify_data.get("artist", "Unknown"), + "track_num": 1, + "disc_num": 1, + "date": "Unknown", + "genre": "Unknown", + "isrc": spotify_data.get("isrc"), + "expected_ms": spotify_data.get("duration_ms"), + }, + ) + + playlist_id = "playlist-validation" + spotify_track = { + "spotify_track_id": "sp-track-1", + "artist": "Artist One", + "title": "Track One", + "isrc": "USVAL1234567", + "duration_ms": 200_000, + } + search_service = _MockSearchService( + [ + { + "media_url": "https://example.test/track-one", + "title": "Track One", + "duration": 200, + "source_id": "youtube_music", + "extra": {}, + } + ] + ) + queue = _MockQueue() + + # First pass: enqueue + worker processing with forced validation failure. + asyncio.run(enqueue_spotify_track(queue, spotify_track, search_service, playlist_id)) + assert len(queue.items) == 1 + + worker = DownloadWorker(_MockDownloader()) + job = SimpleNamespace(payload=queue.items[0]) + worker.process_job(job) + assert job.status == JOB_STATUS_VALIDATION_FAILED + assert has_downloaded_isrc(playlist_id, spotify_track["isrc"]) is False + first_pass_calls = list(search_service.calls) + + # Second pass: should not be skipped because ISRC was never recorded. + asyncio.run(enqueue_spotify_track(queue, spotify_track, search_service, playlist_id)) + assert len(queue.items) == 2 + assert len(search_service.calls) == len(first_pass_calls) + 1 diff --git a/tests/test_idempotency_full_pipeline.py b/tests/test_idempotency_full_pipeline.py new file mode 100644 index 0000000..0389489 --- /dev/null +++ b/tests/test_idempotency_full_pipeline.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import asyncio +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from db.downloaded_tracks import has_downloaded_isrc +from download.worker import DownloadWorker +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track + + +class _MockQueue: + def __init__(self) -> None: + self.items: list[dict[str, Any]] = [] + + def enqueue(self, payload: dict[str, Any]) -> None: + self.items.append(payload) + + +class _MockSearchService: + def __init__(self, results_by_query: dict[str, list[dict[str, Any]]]) -> None: + self._results_by_query = results_by_query + self.calls: list[str] = [] + + async def search(self, query: str) -> list[dict[str, Any]]: + self.calls.append(query) + return list(self._results_by_query.get(query, [])) + + +class _MockDownloader: + def __init__(self) -> None: + self.calls: list[str] = [] + + def download(self, media_url: str) -> str: + self.calls.append(media_url) + tail = media_url.rsplit("/", 1)[-1] or "track" + path = Path(f"/tmp/{tail}.mp3") + path.write_bytes(b"mock-audio") + return str(path) + + +def test_idempotency_full_pipeline_two_tracks(tmp_path, monkeypatch) -> None: + db_path = tmp_path / "idempotency.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + monkeypatch.setattr("download.worker.tag_file", lambda _file_path, _metadata: None) + + playlist_id = "playlist-42" + tracks = [ + { + "spotify_track_id": "sp-track-1", + "artist": "Artist One", + "title": "Track One", + "isrc": "USAAA1111111", + "duration_ms": 200000, + }, + { + "spotify_track_id": "sp-track-2", + "artist": "Artist Two", + "title": "Track Two", + "isrc": "USBBB2222222", + "duration_ms": 210000, + }, + ] + search_service = _MockSearchService( + { + "Artist One - Track One official audio": [ + { + "media_url": "https://example.test/one", + "title": "Track One", + "duration": 200, + "source_id": "youtube_music", + "extra": {}, + } + ], + "Artist Two - Track Two official audio": [ + { + "media_url": "https://example.test/two", + "title": "Track Two", + "duration": 210, + "source_id": "youtube_music", + "extra": {}, + } + ], + } + ) + queue = _MockQueue() + + # First pass: enqueue and process both tracks, recording downloaded ISRCs. + for track in tracks: + asyncio.run(enqueue_spotify_track(queue, track, search_service, playlist_id)) + assert len(queue.items) == 2 + + downloader = _MockDownloader() + worker = DownloadWorker(downloader) + for payload in list(queue.items): + worker.process_job(SimpleNamespace(payload=payload)) + + assert has_downloaded_isrc(playlist_id, "USAAA1111111") is True + assert has_downloaded_isrc(playlist_id, "USBBB2222222") is True + first_pass_queries = list(search_service.calls) + assert len(first_pass_queries) == 2 + + # Second pass: same playlist + ISRC should be skipped before resolve/enqueue. + for track in tracks: + asyncio.run(enqueue_spotify_track(queue, track, search_service, playlist_id)) + + assert len(queue.items) == 2 + assert search_service.calls == first_pass_queries diff --git a/tests/test_idempotency_skip.py b/tests/test_idempotency_skip.py new file mode 100644 index 0000000..507f28b --- /dev/null +++ b/tests/test_idempotency_skip.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import asyncio +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track + + +class _MockQueue: + def __init__(self) -> None: + self.enqueued: list[dict[str, Any]] = [] + + def enqueue(self, payload: dict[str, Any]) -> None: + self.enqueued.append(payload) + + +class _MockSearchService: + def __init__(self, results: list[dict[str, Any]]) -> None: + self._results = results + self.calls: list[str] = [] + + async def search(self, query: str) -> list[dict[str, Any]]: + self.calls.append(query) + return list(self._results) + + +def test_enqueue_spotify_track_skips_when_isrc_already_downloaded(monkeypatch) -> None: + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.has_downloaded_isrc", + lambda playlist_id, isrc: True, + ) + queue = _MockQueue() + search_service = _MockSearchService( + [ + { + "media_url": "https://example.com/track", + "title": "Track One", + "duration": 210, + "source_id": "youtube_music", + "extra": {"lyrics": "la la"}, + } + ] + ) + spotify_track = { + "spotify_track_id": "sp-track-1", + "artist": "Artist One", + "title": "Track One", + "isrc": "USABC1234567", + "duration_ms": 210000, + } + + asyncio.run( + enqueue_spotify_track( + queue=queue, + spotify_track=spotify_track, + search_service=search_service, + playlist_id="playlist-a", + ) + ) + + assert queue.enqueued == [] + assert search_service.calls == [] + + +def test_enqueue_spotify_track_enqueues_when_isrc_not_downloaded(monkeypatch) -> None: + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.has_downloaded_isrc", + lambda playlist_id, isrc: False, + ) + queue = _MockQueue() + search_service = _MockSearchService( + [ + { + "media_url": "https://example.com/track", + "title": "Track Two", + "duration": 205, + "source_id": "youtube_music", + "extra": {"genre": "Pop"}, + } + ] + ) + spotify_track = { + "spotify_track_id": "sp-track-2", + "artist": "Artist Two", + "title": "Track Two", + "isrc": "USZZZ9999999", + "duration_ms": 205000, + } + + asyncio.run( + enqueue_spotify_track( + queue=queue, + spotify_track=spotify_track, + search_service=search_service, + playlist_id="playlist-b", + ) + ) + + assert len(queue.enqueued) == 1 + payload = queue.enqueued[0] + assert payload["playlist_id"] == "playlist-b" + assert payload["spotify_track_id"] == "sp-track-2" + assert payload["resolved_media"]["media_url"] == "https://example.com/track" + assert search_service.calls == ["Artist Two - Track Two official audio"] + diff --git a/tests/test_integration_full_music_flow.py b/tests/test_integration_full_music_flow.py new file mode 100644 index 0000000..4ed8a04 --- /dev/null +++ b/tests/test_integration_full_music_flow.py @@ -0,0 +1,282 @@ +from __future__ import annotations + +import importlib +import json +import sqlite3 +import sys +from pathlib import Path +from types import SimpleNamespace + +import pytest + +from download.worker import DownloadWorker, JOB_STATUS_COMPLETED + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +@pytest.fixture() +def isolated_runtime(tmp_path: Path, monkeypatch): + db_path = tmp_path / "integration_music.sqlite" + music_root = tmp_path / "library_root" + music_root.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + + return { + "tmp_path": tmp_path, + "db_path": db_path, + "music_root": music_root, + } + + +@pytest.fixture() +def api_module(monkeypatch): + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return module + + +@pytest.fixture() +def api_client(api_module) -> TestClient: + return TestClient(api_module.app) + + +def test_integration_full_music_pipeline( + isolated_runtime, + api_module, + api_client: TestClient, + monkeypatch, +) -> None: + db_path = str(isolated_runtime["db_path"]) + music_root = isolated_runtime["music_root"] + conn = sqlite3.connect(db_path) + try: + api_module.ensure_download_jobs_table(conn) + finally: + conn.close() + + store = api_module.DownloadJobStore(db_path) + api_module.app.state.worker_engine = SimpleNamespace(store=store) + api_module.app.state.search_request_overrides = {} + api_module.app.state.music_cover_art_cache = {} + + canonical_metadata = { + "title": "My Song", + "artist": "The Artist", + "album": "The Album", + "album_artist": "The Artist", + "track_num": 1, + "disc_num": 1, + "date": "2024-01-15", + "genre": "Rock", + "mbid": "mbid-track-123", + "isrc": "USABC1234567", + "lyrics": None, + "artwork": None, + } + + class _FakeMusicBrainzService: + def search_release_groups(self, query, limit=5): + return [ + { + "release_group_id": "rg-123", + "title": "The Album", + "artist_credit": "The Artist", + "first_release_date": "2024-01-15", + "primary_type": "Album", + "secondary_types": [], + "score": 99, + "track_count": 10, + } + ] + + def search_recordings(self, artist, title, *, album=None, limit=1): + return { + "recording-list": [ + { + "id": "rec-123", + "title": title, + "artist-credit": [{"artist": {"name": artist}}], + "release-list": [{"id": "rel-123", "title": album or "The Album", "date": "2024-01-15"}], + } + ] + } + + def get_release(self, release_id, *, includes=None): + return { + "release": { + "id": release_id, + "medium-list": [ + { + "track-list": [ + {"position": "1", "recording": {"id": "rec-123"}}, + ] + } + ], + } + } + + fake_mb = _FakeMusicBrainzService() + monkeypatch.setattr(api_module, "_mb_service", lambda: fake_mb) + + class _FakeSearchService: + def __init__(self, module): + self.adapters = {"youtube_music": object()} + self._module = module + + def create_search_request(self, payload): + _ = fake_mb.search_recordings( + payload.get("artist"), + payload.get("track") or payload.get("album") or "", + album=payload.get("album"), + limit=1, + ) + _ = fake_mb.get_release("rel-123", includes=["recordings"]) + self._module._IntentQueueAdapter().enqueue( + { + "media_intent": "music_track", + "artist": canonical_metadata["artist"], + "track": canonical_metadata["title"], + "album": canonical_metadata["album"], + "track_number": canonical_metadata["track_num"], + "disc_number": canonical_metadata["disc_num"], + "release_date": canonical_metadata["date"], + "duration_ms": 210000, + "playlist_id": "integration_playlist", + } + ) + return "req-integration-1" + + api_module.app.state.search_service = _FakeSearchService(api_module) + + response = api_client.post( + "/api/search/requests", + json={ + "query": "The Artist My Song", + "intent": "track", + "artist": "The Artist", + "track": "My Song", + "music_mode": True, + "search_only": False, + }, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["request_id"] == "req-integration-1" + assert payload["music_mode"] is True + assert isinstance(payload["music_candidates"], list) + + conn = sqlite3.connect(db_path) + try: + row = conn.execute( + """ + SELECT id, source, media_intent, output_template + FROM download_jobs + ORDER BY created_at DESC + LIMIT 1 + """ + ).fetchone() + finally: + conn.close() + assert row is not None + assert row[1] == "youtube_music" + assert row[2] == "music_track" + output_template = json.loads(row[3]) if row[3] else {} + assert "spotify" not in json.dumps(output_template).lower() + + claimed = store.claim_next_job("youtube_music") + assert claimed is not None + + class _FakeDownloader: + def __init__(self, root: Path): + self._root = root + + def download(self, media_url: str) -> str: + temp_file = self._root / "tmp-yt-source-abc123.mp3" + temp_file.write_bytes(b"fake-audio") + return str(temp_file) + + captured_tags = {} + + def _capture_tag_file(path: str, metadata_obj) -> None: + captured_tags["path"] = path + captured_tags["title"] = metadata_obj.title + captured_tags["artist"] = metadata_obj.artist + captured_tags["album"] = metadata_obj.album + captured_tags["year"] = metadata_obj.date + captured_tags["mbid"] = metadata_obj.mbid + captured_tags["isrc"] = metadata_obj.isrc + + monkeypatch.setattr("download.worker.tag_file", _capture_tag_file) + + job_for_worker = SimpleNamespace( + payload={ + "playlist_id": "integration_playlist", + "music_root": str(music_root), + "resolved_media": {"media_url": claimed.url}, + "music_metadata": canonical_metadata, + } + ) + worker = DownloadWorker(_FakeDownloader(isolated_runtime["tmp_path"])) + worker_result = worker.process_job(job_for_worker) + + assert worker_result["status"] == JOB_STATUS_COMPLETED + output_file = Path(worker_result["file_path"]) + assert output_file.exists() is True + assert "spotify" not in output_file.name.lower() + assert "youtube" not in output_file.name.lower() + assert "abc123" not in output_file.name.lower() + assert output_file.name == "01 - My Song.mp3" + assert captured_tags["title"] == "My Song" + assert captured_tags["artist"] == "The Artist" + assert captured_tags["album"] == "The Album" + assert captured_tags["year"] == "2024-01-15" + assert captured_tags["mbid"] == "mbid-track-123" + assert captured_tags["isrc"] == "USABC1234567" + assert "spotify" not in json.dumps(captured_tags).lower() + + api_module.record_download_history( + db_path, + claimed, + str(output_file), + meta={ + "title": canonical_metadata["title"], + "video_id": "vid-canonical-001", + }, + ) + + conn = sqlite3.connect(db_path) + try: + history_row = conn.execute( + """ + SELECT title, filename, source, status + FROM download_history + ORDER BY id DESC + LIMIT 1 + """ + ).fetchone() + downloaded_row = conn.execute( + """ + SELECT playlist_id, isrc, file_path + FROM downloaded_music_tracks + ORDER BY id DESC + LIMIT 1 + """ + ).fetchone() + finally: + conn.close() + + assert history_row is not None + assert history_row[0] == "My Song" + assert history_row[1] == "01 - My Song.mp3" + assert history_row[2] == "youtube_music" + assert history_row[3] == "completed" + + assert downloaded_row is not None + assert downloaded_row[0] == "integration_playlist" + assert downloaded_row[1] == "USABC1234567" + assert downloaded_row[2].endswith("01 - My Song.mp3") diff --git a/tests/test_integration_spotify_intent.py b/tests/test_integration_spotify_intent.py new file mode 100644 index 0000000..6b821e5 --- /dev/null +++ b/tests/test_integration_spotify_intent.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +import importlib +import json +import sqlite3 +import sys +from pathlib import Path +from types import SimpleNamespace + +import pytest + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +@pytest.fixture() +def api_module(monkeypatch, tmp_path: Path): + db_path = tmp_path / "spotify_intent.sqlite" + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + module.app.state.paths = SimpleNamespace(db_path=str(db_path)) + module.app.state.worker_engine = SimpleNamespace(store=module.DownloadJobStore(str(db_path))) + module.app.state.search_service = SimpleNamespace() + module.app.state.search_request_overrides = {} + module.app.state.music_cover_art_cache = {} + return module + + +@pytest.fixture() +def api_client(api_module) -> TestClient: + return TestClient(api_module.app) + + +def test_spotify_playlist_intent_ingestion_enqueues_music_track_jobs( + api_module, + api_client: TestClient, + monkeypatch, +) -> None: + class _FakeSpotifyClient: + def get_playlist_items(self, playlist_id: str): + assert playlist_id == "PL12345678" + return "snapshot-1", [ + { + "spotify_track_id": "sp-track-1", + "position": 0, + "added_at": "2026-02-17T00:00:00Z", + "artist": "Intent Artist", + "title": "Intent Song", + "album": "Intent Album", + "duration_ms": 212000, + "isrc": "USINT1234567", + } + ] + + monkeypatch.setattr(api_module, "_read_config_or_404", lambda: {"spotify_playlists": []}) + monkeypatch.setattr(api_module, "_build_spotify_client_with_optional_oauth", lambda _cfg: _FakeSpotifyClient()) + + response = api_client.post( + "/api/intent/execute", + json={"intent_type": "spotify_playlist", "identifier": "PL12345678"}, + ) + assert response.status_code == 200 + body = response.json() + assert body["status"] == "accepted" + assert body["intent_type"] == "spotify_playlist" + assert body["enqueued_count"] == 1 + + conn = sqlite3.connect(api_module.app.state.paths.db_path) + try: + row = conn.execute( + """ + SELECT origin, origin_id, media_intent, source, url, output_template + FROM download_jobs + ORDER BY created_at DESC + LIMIT 1 + """ + ).fetchone() + finally: + conn.close() + + assert row is not None + assert row[0] == "spotify_playlist" + assert row[1] == "PL12345678" + assert row[2] == "music_track" + assert row[3] == "youtube_music" + assert row[4].startswith("https://music.youtube.com/search?q=") + + output_template = json.loads(row[5]) if row[5] else {} + assert output_template.get("artist") == "Intent Artist" + assert output_template.get("track") == "Intent Song" + assert output_template.get("album") == "Intent Album" + assert output_template.get("duration_ms") == 212000 + assert "spotify" not in json.dumps(output_template).lower() + + +def test_spotify_oauth_premium_still_prefers_musicbrainz_first(monkeypatch) -> None: + from metadata.canonical import CanonicalMetadataResolver + + calls = {"mb": 0, "spotify": 0} + + class _FakeMusicBrainzProvider: + def __init__(self, *, min_confidence=0.70): + _ = min_confidence + + def resolve_track(self, artist, track, *, album=None): + calls["mb"] += 1 + return { + "kind": "track", + "provider": "musicbrainz", + "artist": artist, + "track": track, + "album": album, + "external_ids": {"musicbrainz_recording_id": "mbid-123"}, + } + + def resolve_album(self, artist, album): + return None + + class _FakeSpotifyProvider: + def __init__(self, **kwargs): + _ = kwargs + + def resolve_track(self, artist, track, album=None): + calls["spotify"] += 1 + return { + "kind": "track", + "provider": "spotify", + "artist": artist, + "track": track, + "album": album, + "external_ids": {"spotify_id": "sp-123"}, + } + + def resolve_album(self, artist, album): + return None + + monkeypatch.setattr("metadata.canonical.MusicBrainzMetadataProvider", _FakeMusicBrainzProvider) + monkeypatch.setattr("metadata.canonical.SpotifyMetadataProvider", _FakeSpotifyProvider) + monkeypatch.setattr("metadata.canonical._validate_spotify_premium", lambda _token: True) + + resolver = CanonicalMetadataResolver( + config={ + "spotify": { + "client_id": "client-id", + "client_secret": "client-secret", + "oauth_access_token": "premium-oauth-token", + } + } + ) + + result = resolver.resolve_track("Intent Artist", "Intent Song", album="Intent Album") + assert result is not None + assert result.get("provider") == "musicbrainz" + assert calls["mb"] == 1 + assert calls["spotify"] == 0 + assert "spotify" not in json.dumps(result).lower() diff --git a/tests/test_integration_youtube_download.py b/tests/test_integration_youtube_download.py new file mode 100644 index 0000000..582b81c --- /dev/null +++ b/tests/test_integration_youtube_download.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +import importlib +import sqlite3 +import sys +from pathlib import Path +from types import SimpleNamespace + +import pytest + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +@pytest.fixture() +def api_module(monkeypatch, tmp_path: Path): + db_path = tmp_path / "youtube_integration.sqlite" + downloads_dir = tmp_path / "downloads" + downloads_dir.mkdir(parents=True, exist_ok=True) + + monkeypatch.setenv("RETREIVR_DB_PATH", str(db_path)) + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + module.app.state.paths = SimpleNamespace( + db_path=str(db_path), + temp_downloads_dir=str(tmp_path / "temp"), + thumbs_dir=str(tmp_path / "thumbs"), + ) + module.app.state.run_id = None + return module, db_path, downloads_dir, tmp_path + + +@pytest.fixture() +def api_client(api_module) -> TestClient: + module, _db, _downloads, _tmp = api_module + return TestClient(module.app) + + +def test_direct_youtube_download_naming_collision_and_history_persist( + api_module, + api_client: TestClient, + monkeypatch, +) -> None: + module, db_path, downloads_dir, tmp_path = api_module + source_url = "https://www.youtube.com/watch?v=abc123xyz99" + video_id = "abc123xyz99" + channel_id = "UC_FAKE_CHANNEL_001" + + monkeypatch.setattr(module, "_read_config_or_404", lambda: {"final_format": "mp3"}) + + sanitize_calls = {"count": 0} + from engine import job_queue as jq + + original_sanitize = jq.sanitize_for_filesystem + + def _sanitize_spy(value, maxlen=180): + sanitize_calls["count"] += 1 + return original_sanitize(value, maxlen=maxlen) + + monkeypatch.setattr("engine.job_queue.sanitize_for_filesystem", _sanitize_spy) + + def _fake_download_with_ytdlp(*args, **kwargs): + raw_temp_output = tmp_path / "raw-ytdlp-output.mp3" + raw_temp_output.write_bytes(b"fake-mp3") + meta = { + "title": "Great Song (Official Video)", + "channel": "Artist/Channel:Name", + "upload_date": "20240201", + "video_id": video_id, + "channel_id": channel_id, + } + return str(raw_temp_output), meta + + monkeypatch.setattr(module, "download_with_ytdlp", _fake_download_with_ytdlp) + + async def _fake_start_run_with_config( + config, + *, + single_url=None, + playlist_id=None, + playlist_account=None, + playlist_mode=None, + destination=None, + final_format_override=None, + js_runtime=None, + music_mode=None, + run_source="api", + skip_downtime=False, + run_id_override=None, + now=None, + delivery_mode=None, + ): + assert single_url == source_url + assert delivery_mode == "server" + raw_temp_output_str, meta = module.download_with_ytdlp(single_url) + raw_temp_output = Path(raw_temp_output_str) + + # This naming path must strip source IDs and upload date by design. + clean_name = module.build_output_filename( + meta, + fallback_id=video_id, + ext="mp3", + template=None, + audio_mode=False, + ) + base_target = downloads_dir / clean_name + base_target.parent.mkdir(parents=True, exist_ok=True) + base_target.write_bytes(b"existing-file") + final_target = Path(module.resolve_collision_path(str(base_target))) + + module.atomic_move(str(raw_temp_output), str(final_target)) + + job = SimpleNamespace( + id="job-youtube-integration-1", + url=single_url, + input_url=single_url, + source="youtube", + external_id=video_id, + canonical_url=module.canonicalize_url("youtube", single_url, video_id), + origin="manual", + origin_id="manual", + ) + module.record_download_history( + str(db_path), + job, + str(final_target), + meta=meta, + ) + + module.app.state.run_id = "run-youtube-integration-1" + return "started", None + + monkeypatch.setattr(module, "_start_run_with_config", _fake_start_run_with_config) + + response = api_client.post( + "/api/run", + json={ + "single_url": source_url, + "delivery_mode": "server", + "final_format_override": "mp3", + }, + ) + assert response.status_code == 202 + body = response.json() + assert body["status"] == "started" + assert body["run_id"] == "run-youtube-integration-1" + + conn = sqlite3.connect(str(db_path)) + try: + row = conn.execute( + """ + SELECT video_id, channel_id, input_url, canonical_url, filename + FROM download_history + ORDER BY id DESC + LIMIT 1 + """ + ).fetchone() + finally: + conn.close() + + assert row is not None + assert row[0] == video_id + assert row[1] == channel_id + assert row[2] == source_url + assert row[3] == module.canonicalize_url("youtube", source_url, video_id) + assert row[4].endswith(" (2).mp3") + assert video_id not in row[4] + assert "20240201" not in row[4] + + assert sanitize_calls["count"] > 0 diff --git a/tests/test_intent_dispatcher.py b/tests/test_intent_dispatcher.py new file mode 100644 index 0000000..bacff3c --- /dev/null +++ b/tests/test_intent_dispatcher.py @@ -0,0 +1,161 @@ +from __future__ import annotations + +import asyncio +from typing import Any + +from api.intent_dispatcher import execute_intent + + +def test_execute_intent_spotify_artist_requires_selection() -> None: + result = asyncio.run( + execute_intent( + intent_type="spotify_artist", + identifier="artist-123", + config={}, + db=object(), + queue=object(), + spotify_client=object(), + ) + ) + + assert result["status"] == "accepted" + assert result["intent_type"] == "spotify_artist" + assert result["identifier"] == "artist-123" + assert "selection" in result["message"].lower() + assert result["enqueued_count"] == 0 + + +def test_execute_intent_spotify_playlist_triggers_playlist_sync(monkeypatch) -> None: + calls: list[dict[str, Any]] = [] + + def _fake_playlist_watch_job(spotify_client, db, queue, playlist_id, *, playlist_name=None, config=None): + calls.append( + { + "spotify_client": spotify_client, + "db": db, + "queue": queue, + "playlist_id": playlist_id, + "playlist_name": playlist_name, + "config": config, + } + ) + return {"status": "updated", "enqueued": 2} + + monkeypatch.setattr("api.intent_dispatcher.playlist_watch_job", _fake_playlist_watch_job) + + db = object() + queue = object() + spotify_client = object() + result = asyncio.run( + execute_intent( + intent_type="spotify_playlist", + identifier="playlist-abc", + config={"spotify_playlists": []}, + db=db, + queue=queue, + spotify_client=spotify_client, + ) + ) + + assert len(calls) == 1 + assert calls[0]["playlist_id"] == "playlist-abc" + assert calls[0]["db"] is db + assert calls[0]["queue"] is queue + assert calls[0]["spotify_client"] is spotify_client + assert result["status"] == "accepted" + assert result["enqueued_count"] == 2 + + +def test_execute_intent_spotify_album_triggers_album_sync(monkeypatch) -> None: + calls: list[dict[str, Any]] = [] + + async def _fake_album_sync(album_id, config, db, queue, spotify_client): + calls.append( + { + "album_id": album_id, + "config": config, + "db": db, + "queue": queue, + "spotify_client": spotify_client, + } + ) + return { + "status": "accepted", + "intent_type": "spotify_album", + "identifier": album_id, + "message": "album sync completed", + "enqueued_count": 4, + } + + monkeypatch.setattr("api.intent_dispatcher.run_spotify_album_sync", _fake_album_sync) + + db = object() + queue = object() + spotify_client = object() + result = asyncio.run( + execute_intent( + intent_type="spotify_album", + identifier="album-xyz", + config={"search_service": object()}, + db=db, + queue=queue, + spotify_client=spotify_client, + ) + ) + + assert len(calls) == 1 + assert calls[0]["album_id"] == "album-xyz" + assert calls[0]["db"] is db + assert calls[0]["queue"] is queue + assert calls[0]["spotify_client"] is spotify_client + assert result["status"] == "accepted" + assert result["enqueued_count"] == 4 + + +def test_execute_intent_spotify_track_enqueues_once(monkeypatch) -> None: + calls: list[dict[str, Any]] = [] + + monkeypatch.setattr( + "api.intent_dispatcher._fetch_spotify_track", + lambda _spotify_client, track_id: { + "spotify_track_id": track_id, + "artist": "Artist", + "title": "Title", + "album": "Album", + "duration_ms": 123000, + "isrc": "USABC1234567", + }, + ) + + async def _fake_enqueue_spotify_track(*, queue, spotify_track, search_service, playlist_id): + calls.append( + { + "queue": queue, + "spotify_track": spotify_track, + "search_service": search_service, + "playlist_id": playlist_id, + } + ) + + monkeypatch.setattr("api.intent_dispatcher.enqueue_spotify_track", _fake_enqueue_spotify_track) + + queue = object() + search_service = object() + result = asyncio.run( + execute_intent( + intent_type="spotify_track", + identifier="track-777", + config={"search_service": search_service}, + db=object(), + queue=queue, + spotify_client=object(), + ) + ) + + assert len(calls) == 1 + assert calls[0]["queue"] is queue + assert calls[0]["spotify_track"]["spotify_track_id"] == "track-777" + assert calls[0]["search_service"] is search_service + assert calls[0]["playlist_id"] == "spotify_track_track-777" + assert result["status"] == "accepted" + assert result["enqueued_count"] == 1 diff --git a/tests/test_intent_execute_endpoint.py b/tests/test_intent_execute_endpoint.py new file mode 100644 index 0000000..5a7a7da --- /dev/null +++ b/tests/test_intent_execute_endpoint.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import importlib +import sys + +import pytest + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +def _build_client(monkeypatch) -> TestClient: + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return TestClient(module.app) + + +def test_intent_execute_accepts_valid_spotify_album_intent(monkeypatch) -> None: + client = _build_client(monkeypatch) + + response = client.post( + "/api/intent/execute", + json={ + "intent_type": "spotify_album", + "identifier": "1A2B3C4D5E", + }, + ) + + assert response.status_code == 200 + payload = response.json() + assert payload["status"] == "accepted" + assert payload["intent_type"] == "spotify_album" + assert payload["identifier"] == "1A2B3C4D5E" + + +def test_intent_execute_rejects_invalid_intent_type(monkeypatch) -> None: + client = _build_client(monkeypatch) + + response = client.post( + "/api/intent/execute", + json={ + "intent_type": "not_real_intent", + "identifier": "abc", + }, + ) + + assert response.status_code == 400 diff --git a/tests/test_intent_queue_adapter.py b/tests/test_intent_queue_adapter.py new file mode 100644 index 0000000..209ca80 --- /dev/null +++ b/tests/test_intent_queue_adapter.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +import importlib +import sys +from types import SimpleNamespace + +import pytest + +pytest.importorskip("fastapi") + + +def _load_module(monkeypatch): + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + return importlib.import_module("api.main") + + +def test_intent_queue_adapter_enqueues_resolved_media_payload(monkeypatch) -> None: + module = _load_module(monkeypatch) + captured = [] + + class _Store: + def enqueue_job(self, **kwargs): + captured.append(kwargs) + return "job-1" + + module.app.state.worker_engine = SimpleNamespace(store=_Store()) + adapter = module._IntentQueueAdapter() + adapter.enqueue( + { + "playlist_id": "pl-1", + "spotify_track_id": "trk-1", + "resolved_media": { + "media_url": "https://example.test/audio", + "source_id": "youtube", + "duration_ms": 180000, + }, + "music_metadata": { + "title": "Song", + "artist": "Artist", + "album": "Album", + "track_num": 1, + "disc_num": 1, + "isrc": "USABC123", + }, + } + ) + + assert len(captured) == 1 + job = captured[0] + assert job["origin"] == "spotify_playlist" + assert job["origin_id"] == "pl-1" + assert job["url"] == "https://example.test/audio" + assert job["media_intent"] == "track" + assert job["media_type"] == "music" + assert job["output_template"]["track"] == "Song" + + +def test_intent_queue_adapter_converts_watch_payload_to_music_track_job(monkeypatch) -> None: + module = _load_module(monkeypatch) + captured = [] + + class _Store: + def enqueue_job(self, **kwargs): + captured.append(kwargs) + return "job-2" + + module.app.state.worker_engine = SimpleNamespace(store=_Store()) + adapter = module._IntentQueueAdapter() + adapter.enqueue( + { + "playlist_id": "pl-2", + "spotify_track_id": "trk-2", + "artist": "Example Artist", + "title": "Example Track", + "album": "Example Album", + "duration_ms": 205000, + } + ) + + assert len(captured) == 1 + job = captured[0] + assert job["origin"] == "spotify_playlist" + assert job["origin_id"] == "pl-2" + assert job["media_intent"] == "music_track" + assert job["source"] == "youtube_music" + assert job["url"].startswith("https://music.youtube.com/search?q=") + assert job["output_template"]["artist"] == "Example Artist" + assert job["output_template"]["track"] == "Example Track" + assert job["output_template"]["album"] == "Example Album" + + +def test_intent_queue_adapter_skips_non_searchable_payload(monkeypatch, caplog) -> None: + module = _load_module(monkeypatch) + + class _Store: + def enqueue_job(self, **kwargs): # pragma: no cover - should not be called + raise AssertionError("enqueue_job should not be called") + + module.app.state.worker_engine = SimpleNamespace(store=_Store()) + adapter = module._IntentQueueAdapter() + adapter.enqueue({"playlist_id": "pl-3"}) + + assert "no media URL or searchable artist/title available" in caplog.text diff --git a/tests/test_intent_router.py b/tests/test_intent_router.py new file mode 100644 index 0000000..08bb0fb --- /dev/null +++ b/tests/test_intent_router.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from input.intent_router import IntentType, detect_intent + + +def test_detect_spotify_album_with_query_string() -> None: + intent = detect_intent("https://open.spotify.com/album/1A2B3C4D5E?si=abc123") + assert intent.type == IntentType.SPOTIFY_ALBUM + assert intent.identifier == "1A2B3C4D5E" + + +def test_detect_spotify_playlist_url() -> None: + intent = detect_intent("https://open.spotify.com/playlist/37i9dQZF1DX1lVhptIYRda") + assert intent.type == IntentType.SPOTIFY_PLAYLIST + assert intent.identifier == "37i9dQZF1DX1lVhptIYRda" + + +def test_detect_spotify_track_url() -> None: + intent = detect_intent("https://open.spotify.com/track/6rqhFgbbKwnb9MLmUQDhG6") + assert intent.type == IntentType.SPOTIFY_TRACK + assert intent.identifier == "6rqhFgbbKwnb9MLmUQDhG6" + + +def test_detect_spotify_artist_url() -> None: + intent = detect_intent("https://open.spotify.com/artist/1dfeR4HaWDbWqFHLkxsg1d") + assert intent.type == IntentType.SPOTIFY_ARTIST + assert intent.identifier == "1dfeR4HaWDbWqFHLkxsg1d" + + +def test_detect_youtube_playlist_url() -> None: + intent = detect_intent("https://www.youtube.com/watch?v=abc123&list=PL1234567890XYZ") + assert intent.type == IntentType.YOUTUBE_PLAYLIST + assert intent.identifier == "PL1234567890XYZ" + + +def test_detect_plain_text_search() -> None: + intent = detect_intent("best synthwave tracks") + assert intent.type == IntentType.SEARCH + assert intent.identifier == "best synthwave tracks" + + +def test_detect_malformed_url_falls_back_to_search() -> None: + intent = detect_intent("https://open.spotify.com/album") + assert intent.type == IntentType.SEARCH + assert intent.identifier == "https://open.spotify.com/album" diff --git a/tests/test_job_queue_naming.py b/tests/test_job_queue_naming.py new file mode 100644 index 0000000..36d7109 --- /dev/null +++ b/tests/test_job_queue_naming.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import sqlite3 +import sys +from pathlib import Path +from types import SimpleNamespace +from types import ModuleType + +engine_pkg = ModuleType("engine") +engine_pkg.__path__ = [str(Path(__file__).resolve().parents[1] / "engine")] +sys.modules.setdefault("engine", engine_pkg) + +metadata_pkg = ModuleType("metadata") +metadata_pkg.__path__ = [str(Path(__file__).resolve().parents[1] / "metadata")] +sys.modules.setdefault("metadata", metadata_pkg) +metadata_queue_mod = ModuleType("metadata.queue") +setattr(metadata_queue_mod, "enqueue_metadata", lambda *_args, **_kwargs: None) +sys.modules.setdefault("metadata.queue", metadata_queue_mod) + +google_mod = ModuleType("google") +google_auth_mod = ModuleType("google.auth") +google_auth_ex_mod = ModuleType("google.auth.exceptions") +setattr(google_auth_ex_mod, "RefreshError", Exception) +sys.modules.setdefault("google", google_mod) +sys.modules.setdefault("google.auth", google_auth_mod) +sys.modules.setdefault("google.auth.exceptions", google_auth_ex_mod) + +from engine.job_queue import ( + build_output_filename, + record_download_history, + resolve_collision_path, +) + + +def test_video_filename_omits_id_and_upload_date() -> None: + name = build_output_filename( + { + "title": "Example Track", + "channel": "Artist Channel", + "upload_date": "20240131", + }, + "abc12345", + "mp4", + None, + False, + ) + assert name == "Example Track - Artist Channel.mp4" + assert "abc12345" not in name + assert "20240131" not in name + + +def test_template_id_and_date_tokens_are_blank() -> None: + name = build_output_filename( + {"title": "Song", "channel": "Artist", "upload_date": "20240131"}, + "vid123", + "webm", + "%(title)s__%(id)s__%(upload_date)s.%(ext)s", + False, + ) + assert name == "Song____.webm" + assert "vid123" not in name + assert "20240131" not in name + + +def test_collision_path_appends_counter(tmp_path) -> None: + first = tmp_path / "Track.mp3" + first.write_bytes(b"a") + second = tmp_path / "Track (2).mp3" + second.write_bytes(b"b") + + resolved = resolve_collision_path(str(first)) + assert resolved == str(tmp_path / "Track (3).mp3") + + +def test_record_download_history_persists_channel_id(tmp_path) -> None: + db_path = str(tmp_path / "downloads.db") + file_path = tmp_path / "out.mp3" + file_path.write_bytes(b"audio") + + job = SimpleNamespace( + id="job1", + url="https://www.youtube.com/watch?v=xyz987", + input_url="https://www.youtube.com/watch?v=xyz987", + external_id="xyz987", + source="youtube", + canonical_url="https://www.youtube.com/watch?v=xyz987", + origin="single", + origin_id="", + ) + meta = {"video_id": "xyz987", "title": "Song", "channel_id": "UC123456"} + + record_download_history(db_path, job, str(file_path), meta=meta) + + conn = sqlite3.connect(db_path) + try: + row = conn.execute( + "SELECT video_id, external_id, channel_id FROM download_history ORDER BY id DESC LIMIT 1" + ).fetchone() + finally: + conn.close() + + assert row == ("xyz987", "xyz987", "UC123456") diff --git a/tests/test_liked_songs_virtual_playlist.py b/tests/test_liked_songs_virtual_playlist.py new file mode 100644 index 0000000..932ec6c --- /dev/null +++ b/tests/test_liked_songs_virtual_playlist.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from scheduler.jobs.spotify_playlist_watch import SPOTIFY_LIKED_SONGS_PLAYLIST_ID +from playlist.rebuild import rebuild_playlist_from_tracks + + +def test_liked_songs_virtual_playlist_rebuild_creates_m3u(tmp_path) -> None: + assert SPOTIFY_LIKED_SONGS_PLAYLIST_ID == "__spotify_liked_songs__" + + music_root = tmp_path / "Music" + playlist_root = tmp_path / "Playlists" + track_one = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / "01 - Song One.mp3" + track_two = music_root / "Artist B" / "Album B (2021)" / "Disc 1" / "02 - Song Two.mp3" + track_one.parent.mkdir(parents=True, exist_ok=True) + track_two.parent.mkdir(parents=True, exist_ok=True) + track_one.write_bytes(b"a") + track_two.write_bytes(b"b") + + result_path = rebuild_playlist_from_tracks( + playlist_name="Spotify - Liked Songs", + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=[str(track_one), str(track_two)], + ) + + assert result_path.exists() is True + assert result_path.name == "Spotify - Liked Songs.m3u" + content = result_path.read_text(encoding="utf-8") + assert "Artist A/Album A (2020)/Disc 1/01 - Song One.mp3" in content + assert "Artist B/Album B (2021)/Disc 1/02 - Song Two.mp3" in content diff --git a/tests/test_media_validation.py b/tests/test_media_validation.py new file mode 100644 index 0000000..f9aca71 --- /dev/null +++ b/tests/test_media_validation.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import shutil +import wave +from pathlib import Path + +import pytest + +from media.validation import validate_duration + +_FFPROBE_AVAILABLE = shutil.which("ffprobe") is not None + + +def _write_silent_wav(path: Path, duration_seconds: float, sample_rate: int = 44_100) -> None: + nframes = int(duration_seconds * sample_rate) + with wave.open(str(path), "wb") as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(sample_rate) + wav_file.writeframes(b"\x00\x00" * nframes) + + +@pytest.mark.skipif(not _FFPROBE_AVAILABLE, reason="ffprobe is required for duration probe tests") +def test_validate_duration_returns_true_within_tolerance(tmp_path: Path) -> None: + audio_path = tmp_path / "short.wav" + _write_silent_wav(audio_path, duration_seconds=1.0) + + assert validate_duration(str(audio_path), expected_ms=1_000, tolerance_seconds=0.5) is True + + +@pytest.mark.skipif(not _FFPROBE_AVAILABLE, reason="ffprobe is required for duration probe tests") +def test_validate_duration_returns_false_when_duration_differs_significantly(tmp_path: Path) -> None: + audio_path = tmp_path / "short.wav" + _write_silent_wav(audio_path, duration_seconds=1.0) + + assert validate_duration(str(audio_path), expected_ms=10_000, tolerance_seconds=1.0) is False + + +def test_validate_duration_returns_false_when_probe_fails(monkeypatch, tmp_path: Path) -> None: + audio_path = tmp_path / "missing-or-invalid.wav" + + def _raise_probe_error(_file_path: str) -> float: + raise RuntimeError("ffprobe failed") + + monkeypatch.setattr("media.validation.get_media_duration", _raise_probe_error) + + assert validate_duration(str(audio_path), expected_ms=1_000, tolerance_seconds=0.5) is False diff --git a/tests/test_metadata_naming.py b/tests/test_metadata_naming.py new file mode 100644 index 0000000..933142c --- /dev/null +++ b/tests/test_metadata_naming.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from metadata.naming import build_album_directory, build_track_filename, sanitize_component + + +def test_sanitize_component_strips_unsafe_chars_and_trailing_dot_space() -> None: + assert sanitize_component(' A<>:"/\\|?*rtist. ') == "Artist" + + +def test_build_track_filename_zero_pads_track_number() -> None: + filename = build_track_filename({"title": "Song", "track_num": 7, "ext": "mp3"}) + assert filename == "07 - Song.mp3" + + +def test_build_track_filename_missing_track_number_defaults_to_00() -> None: + filename = build_track_filename({"title": "Song", "track_num": None, "ext": "flac"}) + assert filename == "00 - Song.flac" + + +def test_build_album_directory_missing_album_and_year_fields() -> None: + album_dir = build_album_directory({"album": None, "date": ""}) + assert album_dir == "Unknown Album" diff --git a/tests/test_metadata_normalization.py b/tests/test_metadata_normalization.py new file mode 100644 index 0000000..03cd102 --- /dev/null +++ b/tests/test_metadata_normalization.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +import unicodedata + +from metadata.normalize import normalize_music_metadata +from metadata.types import MusicMetadata + + +def _metadata(**overrides) -> MusicMetadata: + base = { + "title": "Song", + "artist": "Artist", + "album": "Album", + "album_artist": "Album Artist", + "track_num": 1, + "disc_num": 1, + "date": "2024", + "genre": "Pop", + "isrc": "USABC1234567", + "mbid": "mbid-1", + "artwork": None, + "lyrics": None, + } + base.update(overrides) + return MusicMetadata(**base) + + +def test_title_cleanup_rules() -> None: + metadata = _metadata(title=" Song Name (Official Audio) - Topic - ") + + normalized = normalize_music_metadata(metadata) + + assert normalized.title == "Song Name" + + +def test_featured_artist_moves_into_title() -> None: + metadata = _metadata(artist="Main Artist ft. Guest Artist", title="My Track") + + normalized = normalize_music_metadata(metadata) + + assert normalized.artist == "Main Artist" + assert normalized.title == "My Track (feat. Guest Artist)" + + +def test_album_artist_fallback_and_primary_artist_grouping() -> None: + missing_album_artist = _metadata(artist="Lead Artist") + missing_album_artist.album_artist = "" + + normalized_missing = normalize_music_metadata(missing_album_artist) + assert normalized_missing.album_artist == "Lead Artist" + + multi_album_artist = _metadata(album_artist="Lead Artist, Guest One, Guest Two") + normalized_multi = normalize_music_metadata(multi_album_artist) + assert normalized_multi.album_artist == "Lead Artist" + + +def test_date_normalization_cases() -> None: + year_only = normalize_music_metadata(_metadata(date="2024")) + year_month = normalize_music_metadata(_metadata(date="2024-07")) + full_date = normalize_music_metadata(_metadata(date="2024-07-09")) + invalid_with_year = normalize_music_metadata(_metadata(date="2024-99-99")) + invalid_no_year = normalize_music_metadata(_metadata(date="Unknown date string")) + + assert year_only.date == "2024" + assert year_month.date == "2024" + assert full_date.date == "2024-07-09" + assert invalid_with_year.date == "2024" + assert invalid_no_year.date == "Unknown date string" + + +def test_genre_deduplication_and_casing_from_first_occurrence() -> None: + metadata = _metadata(genre=" Pop ; pop, ROCK, Rock , Jazz ") + + normalized = normalize_music_metadata(metadata) + + assert normalized.genre == "Pop, ROCK, Jazz" + + +def test_unicode_normalization_nfc_applies_to_core_grouping_fields() -> None: + decomposed = "Cafe\u0301" + metadata = _metadata( + title=f"{decomposed} Song", + artist=decomposed, + album=decomposed, + album_artist=decomposed, + genre=decomposed, + ) + + normalized = normalize_music_metadata(metadata) + + expected = unicodedata.normalize("NFC", decomposed) + assert expected == "Café" + assert normalized.title == "Café Song" + assert normalized.artist == "Café" + assert normalized.album == "Café" + assert normalized.album_artist == "Café" + assert normalized.genre == "Café" diff --git a/tests/test_metadata_tagging.py b/tests/test_metadata_tagging.py new file mode 100644 index 0000000..6566feb --- /dev/null +++ b/tests/test_metadata_tagging.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +from pathlib import Path + +from metadata.tagging_service import tag_file +from metadata.types import MusicMetadata + + +def _metadata(*, artwork: bytes | None = b"img", lyrics: str | None = "line one") -> MusicMetadata: + return MusicMetadata( + title="Test Title", + artist="Test Artist", + album="Test Album", + album_artist="Test Artist", + track_num=1, + disc_num=1, + date="2026-02-16", + genre="Rock", + isrc="USABC1234567", + mbid="mbid-123", + artwork=artwork, + lyrics=lyrics, + ) + + +def test_tag_file_writes_expected_id3_frames(monkeypatch, tmp_path: Path) -> None: + path = tmp_path / "track.mp3" + path.write_bytes(b"") + + import metadata.tagger as tagging + + class FakeAudio: + def __init__(self) -> None: + self.frames = [] + self.saved = None + + def add(self, frame) -> None: + self.frames.append(frame) + + def getall(self, frame_id: str): + return [frame for frame in self.frames if frame.name == frame_id] + + def delall(self, frame_id: str) -> None: + self.frames = [frame for frame in self.frames if frame.name != frame_id] + + def save(self, save_path: str) -> None: + self.saved = save_path + + class FakeFrame: + def __init__(self, name: str, **kwargs) -> None: + self.name = name + for key, value in kwargs.items(): + setattr(self, key, value) + + def _factory(name: str): + def _ctor(**kwargs): + return FakeFrame(name, **kwargs) + + return _ctor + + audio = FakeAudio() + + monkeypatch.setattr(tagging, "ID3", lambda: audio) + monkeypatch.setattr(tagging, "TIT2", _factory("TIT2")) + monkeypatch.setattr(tagging, "TPE1", _factory("TPE1")) + monkeypatch.setattr(tagging, "TALB", _factory("TALB")) + monkeypatch.setattr(tagging, "TPE2", _factory("TPE2")) + monkeypatch.setattr(tagging, "TRCK", _factory("TRCK")) + monkeypatch.setattr(tagging, "TDRC", _factory("TDRC")) + monkeypatch.setattr(tagging, "TCON", _factory("TCON")) + monkeypatch.setattr(tagging, "TXXX", _factory("TXXX")) + monkeypatch.setattr(tagging, "USLT", _factory("USLT")) + monkeypatch.setattr(tagging, "APIC", _factory("APIC")) + + tag_file(str(path), _metadata()) + + by_name = {frame.name: frame for frame in audio.frames} + assert by_name["TIT2"].text[0] == "Test Title" + assert by_name["TPE1"].text[0] == "Test Artist" + assert by_name["TALB"].text[0] == "Test Album" + assert by_name["TRCK"].text[0] == "1" + txxx_descs = {frame.desc for frame in audio.frames if frame.name == "TXXX"} + assert "SOURCE" in txxx_descs + assert "MBID" in txxx_descs + assert by_name["USLT"].text == "line one" + assert by_name["APIC"].data == b"img" + assert audio.saved == str(path) + + +def test_tag_file_lyrics_and_artwork_fail_non_fatally(monkeypatch, tmp_path: Path) -> None: + path = tmp_path / "track.mp3" + path.write_bytes(b"") + + import metadata.tagger as tagging + + class FakeAudio: + def __init__(self) -> None: + self.frames = [] + self.saved = False + + def add(self, frame) -> None: + self.frames.append(frame) + + def getall(self, frame_id: str): + return [frame for frame in self.frames if frame.name == frame_id] + + def delall(self, frame_id: str) -> None: + self.frames = [frame for frame in self.frames if frame.name != frame_id] + + def save(self, save_path: str) -> None: + self.saved = True + + class FakeFrame: + def __init__(self, name: str, **kwargs) -> None: + self.name = name + for key, value in kwargs.items(): + setattr(self, key, value) + + def _factory(name: str): + def _ctor(**kwargs): + return FakeFrame(name, **kwargs) + + return _ctor + + def _raise(*_args, **_kwargs): + raise RuntimeError("frame failure") + + audio = FakeAudio() + monkeypatch.setattr(tagging, "ID3", lambda: audio) + monkeypatch.setattr(tagging, "TIT2", _factory("TIT2")) + monkeypatch.setattr(tagging, "TPE1", _factory("TPE1")) + monkeypatch.setattr(tagging, "TALB", _factory("TALB")) + monkeypatch.setattr(tagging, "TPE2", _factory("TPE2")) + monkeypatch.setattr(tagging, "TRCK", _factory("TRCK")) + monkeypatch.setattr(tagging, "TDRC", _factory("TDRC")) + monkeypatch.setattr(tagging, "TCON", _factory("TCON")) + monkeypatch.setattr(tagging, "TXXX", _factory("TXXX")) + monkeypatch.setattr(tagging, "USLT", _raise) + monkeypatch.setattr(tagging, "APIC", _raise) + + # Should not raise even when lyrics/artwork frame construction fails. + tag_file(str(path), _metadata()) + + assert audio.saved is True + assert any(frame.name == "TIT2" for frame in audio.frames) diff --git a/tests/test_music_album_candidates_endpoints.py b/tests/test_music_album_candidates_endpoints.py new file mode 100644 index 0000000..0ac163c --- /dev/null +++ b/tests/test_music_album_candidates_endpoints.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import importlib +import sys + +import pytest + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +def _build_client(monkeypatch) -> TestClient: + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return TestClient(module.app) + + +def test_album_candidates_endpoints_share_canonical_search(monkeypatch) -> None: + client = _build_client(monkeypatch) + + payload = [ + { + "release_group_id": "rg-1", + "title": "Album One", + "artist_credit": "Artist One", + "first_release_date": "2001-01-01", + "primary_type": "Album", + "secondary_types": [], + "score": 95, + "track_count": None, + } + ] + + monkeypatch.setattr("api.main._search_music_album_candidates", lambda query, limit: payload if query else []) + + get_resp = client.get("/api/music/albums/search", params={"q": "Album One", "limit": 10}) + assert get_resp.status_code == 200 + assert get_resp.json() == payload + + post_resp = client.post("/api/music/album/candidates", json={"query": "Album One"}) + assert post_resp.status_code == 200 + assert post_resp.json() == { + "status": "ok", + "album_candidates": [ + { + "album_id": "rg-1", + "title": "Album One", + "artist": "Artist One", + "first_released": "2001-01-01", + "track_count": None, + "score": 95, + } + ], + } + + +def test_album_search_get_empty_query_returns_empty_list(monkeypatch) -> None: + client = _build_client(monkeypatch) + monkeypatch.setattr("api.main._search_music_album_candidates", lambda query, limit: []) + resp = client.get("/api/music/albums/search", params={"q": ""}) + assert resp.status_code == 200 + assert resp.json() == [] + + +def test_album_candidates_post_empty_query_returns_legacy_envelope(monkeypatch) -> None: + client = _build_client(monkeypatch) + monkeypatch.setattr("api.main._search_music_album_candidates", lambda query, limit: []) + resp = client.post("/api/music/album/candidates", json={"query": ""}) + assert resp.status_code == 200 + assert resp.json() == {"status": "ok", "album_candidates": []} diff --git a/tests/test_music_album_download_endpoint.py b/tests/test_music_album_download_endpoint.py new file mode 100644 index 0000000..f99239a --- /dev/null +++ b/tests/test_music_album_download_endpoint.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import importlib +import sys + +import pytest + +fastapi = pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + + +def _build_client(monkeypatch) -> TestClient: + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return TestClient(module.app) + + +def test_album_download_returns_error_when_no_tracks_from_musicbrainz(monkeypatch) -> None: + client = _build_client(monkeypatch) + class _MB: + def fetch_release_tracks(self, _rid): + return None + + monkeypatch.setattr("api.main._mb_service", lambda: _MB()) + + response = client.post( + "/api/music/album/download", + json={"release_group_id": "rg-1"}, + ) + + assert response.status_code == 200 + assert response.json() == {"error": "unable to fetch tracks"} + + +def test_album_download_enqueues_tracks_without_legacy_fallback(monkeypatch) -> None: + client = _build_client(monkeypatch) + class _MB: + def pick_best_release_with_reason(self, _rgid, prefer_country=None): + return {"release_id": "rel-1", "reason": "test"} + + def fetch_release_tracks(self, _rid): + return [ + { + "artist": "Artist", + "album": "Album", + "title": "Track A", + "track_number": 1, + "disc_number": 1, + "release_date": "2024-01-01", + "duration_ms": 123000, + "artwork_url": None, + }, + { + "artist": "Artist", + "album": "Album", + "title": "Track B", + "track_number": 2, + "disc_number": 1, + "release_date": "2024-01-01", + "duration_ms": 125000, + "artwork_url": None, + }, + ] + + monkeypatch.setattr("api.main._mb_service", lambda: _MB()) + + enqueued: list[dict] = [] + + def _capture_enqueue(self, payload: dict) -> None: + enqueued.append(payload) + + monkeypatch.setattr("api.main._IntentQueueAdapter.enqueue", _capture_enqueue) + + response = client.post( + "/api/music/album/download", + json={"release_group_id": "rg-1"}, + ) + + assert response.status_code == 200 + assert response.json() == {"status": "ok", "tracks_enqueued": 2} + assert len(enqueued) == 2 + assert all(item.get("media_intent") == "music_track" for item in enqueued) diff --git a/tests/test_music_metadata_merge.py b/tests/test_music_metadata_merge.py new file mode 100644 index 0000000..24bb1ef --- /dev/null +++ b/tests/test_music_metadata_merge.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from metadata.merge import merge_metadata + + +def test_merge_metadata_precedence_and_normalization(caplog) -> None: + spotify = { + "title": ' Song:/Name* ', + "artists": [{"name": "Main Artist"}, {"name": "Guest One"}], + "album": " Album ", + "album_artist": " Main Artist ", + "track_number": "03/10", + "disc_number": "1/2", + "release_date": "2025-01-01", + "genre": [" Pop ", "Dance "], + "isrc": "usabc1234567", + "artwork_url": "https://img.example/cover.jpg", + } + mb = { + "title": "MB Title", + "artist": "MB Artist", + "mbid": "mbid-1", + "lyrics": "MB lyrics", + } + ytdlp = { + "title": "YT Title", + "artist": "YT Artist", + "lyrics": "YT lyrics", + } + + with caplog.at_level("INFO"): + merged = merge_metadata(spotify, mb, ytdlp) + + assert merged.title == "Song:/Name*" + assert merged.artist == "MB Artist" + assert merged.album == "Album" + assert merged.album_artist == "Main Artist" + assert merged.track_num == 3 + assert merged.disc_num == 1 + assert merged.date == "2025-01-01" + assert merged.genre == "Pop, Dance" + assert merged.isrc == "usabc1234567" + assert merged.mbid == "mbid-1" + assert merged.artwork is None + assert merged.lyrics == "MB lyrics" + + # Verify source logging happens per merged field. + field_logs = [r.message for r in caplog.records if "metadata_field_source field=" in r.message] + assert len(field_logs) == 12 + assert any("field=title source=spotify" in msg for msg in field_logs) + assert any("field=mbid source=musicbrainz" in msg for msg in field_logs) + assert any("field=lyrics source=musicbrainz" in msg for msg in field_logs) + + +def test_merge_metadata_fallback_and_featured_artist_parsing() -> None: + spotify = { + "title": "", + "artists": [], + "album": None, + "album_artist": None, + } + mb = { + "title": None, + "artist": "", + "album": "", + "album_artist": "", + "genre": "", + } + ytdlp = { + "title": " Live Track ", + "artist": "Lead Artist ft. Guest A & Guest B", + "album": "YT Album", + "album_artist": "Lead Artist", + "date": "2024", + "genre": "Alt / Rock", + "isrc": "gbxyz7654321", + "recording_id": "mb-recording-xyz", + "thumbnail": "https://cdn.example/a:b.jpg", + "lyrics": " line1 \n line2 ", + } + + merged = merge_metadata(spotify, mb, ytdlp) + + assert merged.title == "Live Track" + assert merged.artist == "Lead Artist ft. Guest A & Guest B" + assert merged.album == "YT Album" + assert merged.album_artist == "Lead Artist" + assert merged.date == "2024" + assert merged.genre == "Alt / Rock" + assert merged.isrc == "gbxyz7654321" + assert merged.mbid == "mb-recording-xyz" + assert merged.artwork is None + assert merged.lyrics == "line1 line2" diff --git a/tests/test_musicbrainz_provider_efficiency.py b/tests/test_musicbrainz_provider_efficiency.py new file mode 100644 index 0000000..8166b75 --- /dev/null +++ b/tests/test_musicbrainz_provider_efficiency.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import sys +from pathlib import Path +from types import ModuleType + +import pytest + +engine_pkg = ModuleType("engine") +engine_pkg.__path__ = [str(Path(__file__).resolve().parents[1] / "engine")] +sys.modules.setdefault("engine", engine_pkg) + +google_mod = ModuleType("google") +google_auth_mod = ModuleType("google.auth") +google_auth_ex_mod = ModuleType("google.auth.exceptions") +setattr(google_auth_ex_mod, "RefreshError", Exception) +sys.modules.setdefault("google", google_mod) +sys.modules.setdefault("google.auth", google_auth_mod) +sys.modules.setdefault("google.auth.exceptions", google_auth_ex_mod) + +pytest.importorskip("musicbrainzngs") + +from metadata.providers.musicbrainz import MusicBrainzMetadataProvider + + +def test_resolve_track_defers_release_lookup_until_best_candidate(monkeypatch) -> None: + calls = {"search_recordings": 0, "get_release": 0} + + class _FakeService: + def search_recordings(self, artist, title, *, album=None, limit=5): + calls["search_recordings"] += 1 + # Two candidates from different releases; only best candidate should trigger get_release. + return { + "recording-list": [ + { + "id": "rec-1", + "title": "Target Track", + "artist-credit": [{"artist": {"name": "Target Artist"}}], + "release-list": [{"id": "rel-1", "title": "Target Album", "date": "2020-01-01"}], + }, + { + "id": "rec-2", + "title": "Target Track Live", + "artist-credit": [{"artist": {"name": "Other Artist"}}], + "release-list": [{"id": "rel-2", "title": "Live Album", "date": "2021-01-01"}], + }, + ] + } + + def get_release(self, release_id, *, includes=None): + calls["get_release"] += 1 + return { + "release": { + "medium-list": [ + { + "track-list": [ + { + "position": "3", + "recording": {"id": "rec-1"}, + } + ] + } + ] + } + } + + fake = _FakeService() + monkeypatch.setattr("metadata.providers.musicbrainz.get_musicbrainz_service", lambda: fake) + + provider = MusicBrainzMetadataProvider(min_confidence=0.1) + resolved = provider.resolve_track("Target Artist", "Target Track", album="Target Album") + + assert resolved is not None + assert resolved["provider"] == "musicbrainz" + assert resolved["track_number"] == "3" + assert calls["search_recordings"] == 1 + assert calls["get_release"] == 1 diff --git a/tests/test_path_builder.py b/tests/test_path_builder.py new file mode 100644 index 0000000..c1d0000 --- /dev/null +++ b/tests/test_path_builder.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from pathlib import Path + +from media.path_builder import build_music_path +from metadata.types import MusicMetadata + + +def _metadata(**overrides) -> MusicMetadata: + base = { + "title": "Track Title", + "artist": "Artist Name", + "album": "Album Name", + "album_artist": "Artist Name", + "track_num": 1, + "disc_num": 1, + "date": "2024-01-10", + "genre": "Pop", + } + base.update(overrides) + return MusicMetadata(**base) + + +def test_single_disc_album_with_year() -> None: + path = build_music_path(Path("/library"), _metadata(), "mp3") + + assert path == Path("/library/Music/Artist Name/Album Name (2024)/Disc 1/01 - Track Title.mp3") + + +def test_multi_disc_album() -> None: + path = build_music_path(Path("/library"), _metadata(disc_num=2, track_num=7), "flac") + + assert path == Path("/library/Music/Artist Name/Album Name (2024)/Disc 2/07 - Track Title.flac") + + +def test_missing_year_omits_parentheses() -> None: + metadata = _metadata() + metadata.date = "" + + path = build_music_path(Path("/library"), metadata, "m4a") + + assert path == Path("/library/Music/Artist Name/Album Name/Disc 1/01 - Track Title.m4a") + + +def test_missing_disc_num_defaults_to_disc_1() -> None: + metadata = _metadata() + metadata.disc_num = None # type: ignore[assignment] + + path = build_music_path(Path("/library"), metadata, "mp3") + + assert path == Path("/library/Music/Artist Name/Album Name (2024)/Disc 1/01 - Track Title.mp3") + + +def test_missing_track_num_defaults_to_00() -> None: + metadata = _metadata() + metadata.track_num = None # type: ignore[assignment] + + path = build_music_path(Path("/library"), metadata, "mp3") + + assert path == Path("/library/Music/Artist Name/Album Name (2024)/Disc 1/00 - Track Title.mp3") + + +def test_unicode_characters_are_preserved() -> None: + metadata = _metadata( + album_artist="Beyoncé", + title="Café del Mar", + album="Été", + ) + + path = build_music_path(Path("/library"), metadata, "mp3") + + assert path == Path("/library/Music/Beyoncé/Été (2024)/Disc 1/01 - Café del Mar.mp3") + + +def test_invalid_filesystem_characters_are_removed() -> None: + metadata = _metadata( + album_artist='A<>:"/\\|?*rtist', + album='Alb<>:"/\\|?*um', + title='Ti<>:"/\\|?*tle', + ) + + path = build_music_path(Path("/library"), metadata, "mp3") + + assert path == Path("/library/Music/Artist/Album (2024)/Disc 1/01 - Title.mp3") diff --git a/tests/test_playlist_diff.py b/tests/test_playlist_diff.py new file mode 100644 index 0000000..c18048f --- /dev/null +++ b/tests/test_playlist_diff.py @@ -0,0 +1,35 @@ +from spotify.diff import diff_playlist + + +def test_diff_playlist_added_removed_and_moved() -> None: + prev = ["a", "b", "c"] + curr = ["b", "a", "d"] + + changes = diff_playlist(prev, curr) + + assert changes["added"] == ["d"] + assert changes["removed"] == ["c"] + assert changes["moved"] == [ + {"uri": "a", "from": 0, "to": 1}, + {"uri": "b", "from": 1, "to": 0}, + ] + + +def test_diff_playlist_honors_duplicates() -> None: + prev = ["x", "y", "x"] + curr = ["x", "x", "y", "x"] + + changes = diff_playlist(prev, curr) + + assert changes["added"] == ["x"] + assert changes["removed"] == [] + assert changes["moved"] == [ + {"uri": "x", "from": 2, "to": 1}, + {"uri": "y", "from": 1, "to": 2}, + ] + + +def test_diff_playlist_handles_empty_lists() -> None: + changes = diff_playlist([], []) + + assert changes == {"added": [], "removed": [], "moved": []} diff --git a/tests/test_playlist_export.py b/tests/test_playlist_export.py new file mode 100644 index 0000000..d3c6188 --- /dev/null +++ b/tests/test_playlist_export.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from pathlib import Path + +from playlist.export import write_m3u + + +def test_write_m3u_writes_relative_paths_skips_missing_and_overwrites(tmp_path, monkeypatch) -> None: + music_root = tmp_path / "Music" + monkeypatch.setenv("RETREIVR_MUSIC_ROOT", str(music_root)) + + track_one = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / "01 - Song One.mp3" + track_two = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / "02 - Song Two.mp3" + missing = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / "03 - Missing.mp3" + track_one.parent.mkdir(parents=True, exist_ok=True) + track_one.write_bytes(b"a") + track_two.write_bytes(b"b") + + playlist_root = tmp_path / "playlists" + + first_path = write_m3u( + playlist_root=playlist_root, + playlist_name="My: Playlist", + track_paths=[track_one, missing, track_two], + ) + + assert first_path.exists() is True + assert first_path.name == "My Playlist.m3u" + first_content = first_path.read_text(encoding="utf-8") + assert "#EXTM3U" in first_content + assert "Artist A/Album A (2020)/Disc 1/01 - Song One.mp3" in first_content + assert "Artist A/Album A (2020)/Disc 1/02 - Song Two.mp3" in first_content + assert "03 - Missing.mp3" not in first_content + + second_path = write_m3u( + playlist_root=playlist_root, + playlist_name="My: Playlist", + track_paths=[track_two], + ) + + assert second_path == first_path + second_content = second_path.read_text(encoding="utf-8") + assert "01 - Song One.mp3" not in second_content + assert "02 - Song Two.mp3" in second_content diff --git a/tests/test_playlist_rebuild.py b/tests/test_playlist_rebuild.py new file mode 100644 index 0000000..0e8983e --- /dev/null +++ b/tests/test_playlist_rebuild.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from pathlib import Path + +from playlist.rebuild import rebuild_playlist_from_tracks + + +def test_rebuild_playlist_from_tracks_writes_existing_relative_entries(tmp_path) -> None: + music_root = tmp_path / "Music" + playlist_root = tmp_path / "Playlists" + + track_one = music_root / "Artist" / "Album (2020)" / "Disc 1" / "01 - Song One.mp3" + track_two = music_root / "Artist" / "Album (2020)" / "Disc 1" / "02 - Song Two.mp3" + missing = music_root / "Artist" / "Album (2020)" / "Disc 1" / "03 - Missing.mp3" + track_one.parent.mkdir(parents=True, exist_ok=True) + track_one.write_bytes(b"a") + track_two.write_bytes(b"b") + + result_path = rebuild_playlist_from_tracks( + playlist_name="My Playlist", + playlist_root=playlist_root, + music_root=music_root, + track_file_paths=[str(track_one), str(missing), str(track_two)], + ) + + assert result_path.exists() is True + content = result_path.read_text(encoding="utf-8") + assert "#EXTM3U" in content + assert "Artist/Album (2020)/Disc 1/01 - Song One.mp3" in content + assert "Artist/Album (2020)/Disc 1/02 - Song Two.mp3" in content + assert "03 - Missing.mp3" not in content diff --git a/tests/test_playlist_snapshot_diff_integration.py b/tests/test_playlist_snapshot_diff_integration.py new file mode 100644 index 0000000..4bf42d9 --- /dev/null +++ b/tests/test_playlist_snapshot_diff_integration.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from db.playlist_snapshots import PlaylistSnapshotStore +from scheduler.jobs.spotify_playlist_watch import playlist_watch_job + + +def _item(track_id: str, position: int) -> dict[str, Any]: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": f"2026-02-17T00:0{position}:00Z", + "artist": f"artist-{track_id}", + "title": f"title-{track_id}", + "album": f"album-{track_id}", + "duration_ms": 1000 + position, + "isrc": f"isrc-{track_id}", + } + + +class _StaticClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = list(items) + + def get_playlist_items(self, playlist_id: str) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.items) + + +def test_reordered_playlist_produces_no_new_jobs(tmp_path: Path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + playlist_id = "playlist-1234" + previous_items = [_item("a", 0), _item("b", 1), _item("c", 2)] + store.store_snapshot(playlist_id, "snap-old", previous_items) + + current_items = [_item("b", 0), _item("a", 1), _item("c", 2)] + client = _StaticClient("snap-new", current_items) + enqueued: list[str] = [] + + result = playlist_watch_job( + client, + store, + lambda item: enqueued.append(item["spotify_track_id"]), + playlist_id, + config={ + "music_download_folder": str(tmp_path / "Music"), + "playlists_folder": str(tmp_path / "Playlists"), + }, + ) + + assert result["status"] == "updated" + assert result["added_count"] == 0 + assert result["moved_count"] == 2 + assert result["enqueued"] == 0 + assert enqueued == [] + assert result["run_summary"]["added"] == 0 + assert result["run_summary"]["completed"] == 0 + + +def test_removed_track_does_not_delete_local_files_unless_explicitly_configured( + tmp_path: Path, monkeypatch +) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + playlist_id = "playlist-5678" + store.store_snapshot(playlist_id, "snap-prev", [_item("a", 0), _item("b", 1)]) + client = _StaticClient("snap-next", [_item("a", 0)]) + + delete_calls: list[Path] = [] + original_unlink = Path.unlink + + def _spy_unlink(path_self: Path, *args, **kwargs): + delete_calls.append(path_self) + return original_unlink(path_self, *args, **kwargs) + + monkeypatch.setattr(Path, "unlink", _spy_unlink) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._load_downloaded_track_paths", + lambda _playlist_id: [], + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._resolve_playlist_dirs", + lambda _config: (tmp_path / "Playlists", tmp_path / "Music"), + ) + + result = playlist_watch_job(client, store, lambda _item: None, playlist_id, playlist_name="NoDelete") + + assert result["status"] == "updated" + assert result["removed_count"] == 1 + assert result["enqueued"] == 0 + assert delete_calls == [] + + +def test_crash_restart_recovery_is_idempotent_after_snapshot_persist(tmp_path: Path) -> None: + db_path = tmp_path / "snapshots.sqlite" + playlist_id = "playlist-9012" + first_store = PlaylistSnapshotStore(str(db_path)) + first_store.store_snapshot(playlist_id, "snap-prev", [_item("a", 0)]) + + client = _StaticClient("snap-next", [_item("a", 0), _item("b", 1)]) + first_enqueued: list[str] = [] + first_result = playlist_watch_job( + client, + first_store, + lambda item: first_enqueued.append(item["spotify_track_id"]), + playlist_id, + config={ + "music_download_folder": str(tmp_path / "Music"), + "playlists_folder": str(tmp_path / "Playlists"), + }, + ) + + assert first_result["status"] == "updated" + assert first_enqueued == ["b"] + assert first_result["enqueued"] == 1 + + # Simulate process restart by creating fresh store/client instances. + second_store = PlaylistSnapshotStore(str(db_path)) + second_client = _StaticClient("snap-next", [_item("a", 0), _item("b", 1)]) + second_enqueued: list[str] = [] + second_result = playlist_watch_job( + second_client, + second_store, + lambda item: second_enqueued.append(item["spotify_track_id"]), + playlist_id, + config={ + "music_download_folder": str(tmp_path / "Music"), + "playlists_folder": str(tmp_path / "Playlists"), + }, + ) + + assert second_result["status"] == "unchanged" + assert second_result["enqueued"] == 0 + assert second_enqueued == [] + assert second_result["run_summary"]["completed"] == 0 diff --git a/tests/test_playlist_snapshot_store.py b/tests/test_playlist_snapshot_store.py new file mode 100644 index 0000000..ee46b0a --- /dev/null +++ b/tests/test_playlist_snapshot_store.py @@ -0,0 +1,101 @@ +import sqlite3 + +from db.playlist_snapshots import PlaylistSnapshotStore + + +def _sample_items() -> list[dict[str, object]]: + return [ + { + "uri": "spotify:track:1", + "track_id": "1", + "added_at": "2026-02-09T00:00:00+00:00", + "added_by": "user_a", + "is_local": False, + "name": "Track One", + }, + { + "uri": "spotify:track:2", + "track_id": "2", + "added_at": "2026-02-09T00:01:00+00:00", + "added_by": "user_b", + "is_local": False, + "name": "Track Two", + }, + ] + + +def test_snapshot_store_inserts_snapshot_and_items(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + + result = store.insert_snapshot( + source="spotify", + playlist_id="playlist-1", + snapshot_id="snap-1", + items=_sample_items(), + ) + + assert result.inserted is True + latest = store.get_latest_snapshot("spotify", "playlist-1") + assert latest is not None + assert latest["snapshot_id"] == "snap-1" + assert latest["track_count"] == 2 + assert [item["track_uri"] for item in latest["items"]] == [ + "spotify:track:1", + "spotify:track:2", + ] + + +def test_snapshot_store_fast_path_for_same_snapshot_id(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + store.insert_snapshot( + source="spotify", + playlist_id="playlist-1", + snapshot_id="snap-1", + items=_sample_items(), + ) + + second = store.insert_snapshot( + source="spotify", + playlist_id="playlist-1", + snapshot_id="snap-1", + items=_sample_items(), + ) + + assert second.inserted is False + assert second.reason == "snapshot_unchanged" + + with sqlite3.connect(db_path) as conn: + count = conn.execute("SELECT COUNT(*) FROM playlist_snapshots").fetchone()[0] + assert count == 1 + + +def test_snapshot_store_tracks_latest_snapshot_uris(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + store.insert_snapshot( + source="spotify", + playlist_id="playlist-2", + snapshot_id="snap-1", + items=_sample_items(), + ) + updated_items = _sample_items() + [ + { + "uri": "spotify:track:3", + "track_id": "3", + "added_at": "2026-02-09T00:02:00+00:00", + "added_by": "user_c", + "is_local": False, + "name": "Track Three", + } + ] + store.insert_snapshot( + source="spotify", + playlist_id="playlist-2", + snapshot_id="snap-2", + items=updated_items, + ) + + latest_uris = store.get_latest_track_uris("spotify", "playlist-2") + assert latest_uris == ["spotify:track:1", "spotify:track:2", "spotify:track:3"] diff --git a/tests/test_playlist_watcher_m3u.py b/tests/test_playlist_watcher_m3u.py new file mode 100644 index 0000000..9e6100d --- /dev/null +++ b/tests/test_playlist_watcher_m3u.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from playlist.rebuild import rebuild_playlist_from_tracks as _real_rebuild +from scheduler.jobs.spotify_playlist_watch import playlist_watch_job + + +def _item(track_id: str, position: int) -> dict[str, Any]: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": f"2026-02-16T00:0{position}:00Z", + "artist": f"artist-{track_id}", + "title": f"title-{track_id}", + "album": f"album-{track_id}", + "duration_ms": 1000 + position, + "isrc": f"isrc-{track_id}", + } + + +class _MockSpotifyClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = items + + def get_playlist_items(self, playlist_id: str) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.items) + + +class _MockSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> Any: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + return type("WriteResult", (), {"snapshot_db_id": 42})() + + +def test_playlist_watch_job_rebuilds_m3u_after_successful_sync(tmp_path, monkeypatch) -> None: + music_root = tmp_path / "Music" + playlist_root = tmp_path / "Playlists" + track_paths: list[str] = [] + for n in (1, 2, 3): + track = music_root / "Artist A" / "Album A (2020)" / "Disc 1" / f"{n:02d} - Song {n}.mp3" + track.parent.mkdir(parents=True, exist_ok=True) + track.write_bytes(b"x") + track_paths.append(str(track)) + + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._load_downloaded_track_paths", + lambda playlist_id: list(track_paths), + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._resolve_playlist_dirs", + lambda config: (playlist_root, music_root), + ) + + calls: list[dict[str, Any]] = [] + + def _spy_rebuild(playlist_name, playlist_root, music_root, track_file_paths): + calls.append( + { + "playlist_name": playlist_name, + "playlist_root": Path(playlist_root), + "music_root": Path(music_root), + "track_file_paths": list(track_file_paths), + } + ) + return _real_rebuild( + playlist_name=playlist_name, + playlist_root=Path(playlist_root), + music_root=Path(music_root), + track_file_paths=track_file_paths, + ) + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.rebuild_playlist_from_tracks", _spy_rebuild) + + prev_items = [_item("a", 0)] + curr_items = [_item("a", 0), _item("b", 1)] + store = _MockSnapshotStore({"snapshot_id": "snap-1", "items": prev_items}) + client = _MockSpotifyClient("snap-2", curr_items) + enqueued: list[str] = [] + + result = playlist_watch_job( + client, + store, + lambda item: enqueued.append(str(item["spotify_track_id"])), + "playlist-1", + playlist_name="Country Bangers", + ) + + assert result["status"] == "updated" + assert len(calls) == 1 + assert calls[0]["playlist_name"] == "Country Bangers" + assert calls[0]["track_file_paths"] == track_paths + + m3u_path = playlist_root / "Country Bangers.m3u" + assert m3u_path.exists() is True + content = m3u_path.read_text(encoding="utf-8") + assert "Artist A/Album A (2020)/Disc 1/01 - Song 1.mp3" in content + assert "Artist A/Album A (2020)/Disc 1/02 - Song 2.mp3" in content + assert "Artist A/Album A (2020)/Disc 1/03 - Song 3.mp3" in content diff --git a/tests/test_resolver.py b/tests/test_resolver.py new file mode 100644 index 0000000..aac9bce --- /dev/null +++ b/tests/test_resolver.py @@ -0,0 +1,109 @@ +import asyncio + +from spotify.resolve import resolve_spotify_track, score_search_candidates + + +def test_score_search_candidates_exact_match() -> None: + spotify_track = {"artist": "Artist A", "title": "Track A", "duration_ms": 200000} + candidates = [ + {"title": "Track A", "artist": "Artist A", "duration": 200, "source": "youtube"}, + {"title": "Track A live", "artist": "Artist A", "duration": 200, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + + assert best["title"] == "Track A" + assert best["artist"] == "Artist A" + + +def test_score_search_candidates_duration_mismatch() -> None: + spotify_track = {"artist": "Artist B", "title": "Track B", "duration_ms": 180000} + candidates = [ + {"title": "Track B", "artist": "Artist B", "duration": 181, "source": "youtube"}, + {"title": "Track B", "artist": "Artist B", "duration": 240, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + + assert best["duration"] == 181 + + +def test_score_search_candidates_tie_behavior_source_priority() -> None: + spotify_track = {"artist": "Artist C", "title": "Track C", "duration_ms": 210000} + candidates = [ + {"title": "Track C", "artist": "Artist C", "duration": 210, "source": "soundcloud"}, + {"title": "Track C", "artist": "Artist C", "duration": 210, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + + assert best["source"] == "youtube_music" + + +class _MockSearchService: + def __init__(self, results): + self._results = results + self.calls = [] + + async def search(self, query: str): + self.calls.append(query) + return self._results + + +def test_resolve_spotify_track_no_results() -> None: + search_service = _MockSearchService([]) + spotify_track = {"artist": "Artist D", "title": "Track D", "duration_ms": 180000} + + resolved = asyncio.run(resolve_spotify_track(spotify_track, search_service)) + + assert resolved == {} + assert search_service.calls == ["Artist D - Track D official audio"] + + +def test_resolve_spotify_track_single_result() -> None: + results = [ + { + "media_url": "https://example.com/one", + "title": "Track E", + "duration": 200, + "source_id": "youtube", + "extra": {"id": "1"}, + } + ] + search_service = _MockSearchService(results) + spotify_track = {"artist": "Artist E", "title": "Track E", "duration_ms": 200000} + + resolved = asyncio.run(resolve_spotify_track(spotify_track, search_service)) + + assert resolved["media_url"] == "https://example.com/one" + assert resolved["title"] == "Track E" + assert resolved["source_id"] == "youtube" + + +def test_resolve_spotify_track_multiple_results_best_match_chosen() -> None: + results = [ + { + "media_url": "https://example.com/bad", + "title": "Track F (live)", + "duration": 260, + "source_id": "youtube_music", + "extra": {"id": "bad"}, + "artist": "Artist F", + }, + { + "media_url": "https://example.com/best", + "title": "Track F", + "duration": 210, + "source_id": "youtube", + "extra": {"id": "best"}, + "artist": "Artist F", + }, + ] + search_service = _MockSearchService(results) + spotify_track = {"artist": "Artist F", "title": "Track F", "duration_ms": 210000} + + resolved = asyncio.run(resolve_spotify_track(spotify_track, search_service)) + + assert resolved["media_url"] == "https://example.com/best" + assert resolved["title"] == "Track F" + assert resolved["source_id"] == "youtube" diff --git a/tests/test_resolver_scoring.py b/tests/test_resolver_scoring.py new file mode 100644 index 0000000..07cf503 --- /dev/null +++ b/tests/test_resolver_scoring.py @@ -0,0 +1,36 @@ +from spotify.resolve import score_search_candidates + + +def test_exact_match_favored() -> None: + spotify_track = {"title": "Track One", "artist": "Artist A", "duration_ms": 200000} + candidates = [ + {"title": "Track One", "artist": "Artist A", "duration": 200, "source": "youtube"}, + {"title": "Track One (live)", "artist": "Artist A", "duration": 200, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + assert best["title"] == "Track One" + assert best["artist"] == "Artist A" + + +def test_duration_mismatch_deprioritized() -> None: + spotify_track = {"title": "Track Two", "artist": "Artist B", "duration_ms": 180000} + candidates = [ + {"title": "Track Two", "artist": "Artist B", "duration": 181, "source": "youtube"}, + {"title": "Track Two", "artist": "Artist B", "duration": 220, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + assert best["duration"] == 181 + + +def test_tie_broken_in_source_order() -> None: + spotify_track = {"title": "Track Three", "artist": "Artist C", "duration_ms": 210000} + candidates = [ + {"title": "Track Three", "artist": "Artist C", "duration": 210, "source": "soundcloud"}, + {"title": "Track Three", "artist": "Artist C", "duration": 210, "source": "youtube_music"}, + ] + + best = score_search_candidates(candidates, spotify_track) + assert best["source"] == "youtube_music" + diff --git a/tests/test_retreivr_playlist_snapshot_store.py b/tests/test_retreivr_playlist_snapshot_store.py new file mode 100644 index 0000000..455f8ae --- /dev/null +++ b/tests/test_retreivr_playlist_snapshot_store.py @@ -0,0 +1,81 @@ +import sqlite3 + +from db.playlist_snapshots import PlaylistSnapshotStore + + +def _items() -> list[dict[str, object]]: + return [ + { + "spotify_track_id": "track-1", + "position": 0, + "added_at": "2026-02-16T00:00:00Z", + "artist": "Artist 1", + "title": "Title 1", + "album": "Album 1", + "duration_ms": 1000, + "isrc": "ISRC1", + }, + { + "spotify_track_id": "track-2", + "position": 1, + "added_at": "2026-02-16T00:01:00Z", + "artist": "Artist 2", + "title": "Title 2", + "album": "Album 2", + "duration_ms": 2000, + "isrc": "ISRC2", + }, + ] + + +def test_store_snapshot_inserts_rows_and_preserves_positions(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + + result = store.store_snapshot("playlist-a", "snapshot-1", _items()) + + assert result.inserted is True + latest = store.get_latest_snapshot("playlist-a") + assert latest is not None + assert latest["snapshot_id"] == "snapshot-1" + assert latest["track_count"] == 2 + assert [item["spotify_track_id"] for item in latest["items"]] == ["track-1", "track-2"] + assert [item["position"] for item in latest["items"]] == [0, 1] + + +def test_store_snapshot_fast_path_when_snapshot_unchanged(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + + first = store.store_snapshot("playlist-a", "snapshot-1", _items()) + second = store.store_snapshot("playlist-a", "snapshot-1", _items()) + + assert first.inserted is True + assert second.inserted is False + assert second.reason == "snapshot_unchanged" + assert first.snapshot_db_id == second.snapshot_db_id + + with sqlite3.connect(db_path) as conn: + snapshot_count = conn.execute("SELECT COUNT(*) FROM playlist_snapshots").fetchone()[0] + item_count = conn.execute("SELECT COUNT(*) FROM playlist_snapshot_items").fetchone()[0] + assert snapshot_count == 1 + assert item_count == 2 + + +def test_store_snapshot_fast_path_when_hash_unchanged_even_if_snapshot_id_changes(tmp_path) -> None: + db_path = tmp_path / "snapshots.sqlite" + store = PlaylistSnapshotStore(str(db_path)) + + first = store.store_snapshot("playlist-a", "snapshot-1", _items()) + second = store.store_snapshot("playlist-a", "snapshot-2", _items()) + + assert first.inserted is True + assert second.inserted is False + assert second.reason == "snapshot_hash_unchanged" + assert first.snapshot_db_id == second.snapshot_db_id + + with sqlite3.connect(db_path) as conn: + snapshot_count = conn.execute("SELECT COUNT(*) FROM playlist_snapshots").fetchone()[0] + item_count = conn.execute("SELECT COUNT(*) FROM playlist_snapshot_items").fetchone()[0] + assert snapshot_count == 1 + assert item_count == 2 diff --git a/tests/test_spotify_diff.py b/tests/test_spotify_diff.py new file mode 100644 index 0000000..8df53a9 --- /dev/null +++ b/tests/test_spotify_diff.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from spotify.diff import diff_playlist + + +def _item(track_id: str, position: int, *, added_at: str = "2026-02-16T00:00:00Z") -> dict: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": added_at, + "artist": f"artist-{track_id}", + "title": f"title-{track_id}", + "album": f"album-{track_id}", + "duration_ms": 1000, + "isrc": f"isrc-{track_id}", + } + + +def test_diff_playlist_no_change() -> None: + prev = [_item("a", 0), _item("b", 1)] + curr = [_item("a", 0), _item("b", 1)] + + diff = diff_playlist(prev, curr) + + assert diff["added"] == [] + assert diff["removed"] == [] + assert diff["moved"] == [] + + +def test_diff_playlist_simple_add() -> None: + prev = [_item("a", 0)] + curr = [_item("a", 0), _item("b", 1)] + + diff = diff_playlist(prev, curr) + + assert [item["spotify_track_id"] for item in diff["added"]] == ["b"] + assert diff["removed"] == [] + assert diff["moved"] == [] + + +def test_diff_playlist_simple_remove() -> None: + prev = [_item("a", 0), _item("b", 1)] + curr = [_item("a", 0)] + + diff = diff_playlist(prev, curr) + + assert diff["added"] == [] + assert [item["spotify_track_id"] for item in diff["removed"]] == ["b"] + assert diff["moved"] == [] + + +def test_diff_playlist_moved_only() -> None: + prev = [_item("a", 0), _item("b", 1), _item("c", 2)] + curr = [_item("b", 0), _item("a", 1), _item("c", 2)] + + diff = diff_playlist(prev, curr) + + assert diff["added"] == [] + assert diff["removed"] == [] + moved = diff["moved"] + assert [entry["spotify_track_id"] for entry in moved] == ["b", "a"] + assert moved[0]["from_position"] == 1 + assert moved[0]["to_position"] == 0 + assert moved[1]["from_position"] == 0 + assert moved[1]["to_position"] == 1 + + +def test_diff_playlist_combination_add_remove_move_with_duplicates() -> None: + prev = [ + _item("a", 0, added_at="2026-02-16T00:00:00Z"), + _item("x", 1, added_at="2026-02-16T00:01:00Z"), + _item("a", 2, added_at="2026-02-16T00:02:00Z"), + _item("b", 3, added_at="2026-02-16T00:03:00Z"), + ] + curr = [ + _item("a", 0, added_at="2026-02-16T00:10:00Z"), + _item("a", 1, added_at="2026-02-16T00:11:00Z"), + _item("c", 2, added_at="2026-02-16T00:12:00Z"), + _item("x", 3, added_at="2026-02-16T00:13:00Z"), + ] + + diff = diff_playlist(prev, curr) + + assert [item["spotify_track_id"] for item in diff["added"]] == ["c"] + assert [item["spotify_track_id"] for item in diff["removed"]] == ["b"] + moved = diff["moved"] + assert [entry["spotify_track_id"] for entry in moved] == ["a", "x"] + assert moved[0]["from_position"] == 2 + assert moved[0]["to_position"] == 1 + assert moved[1]["from_position"] == 1 + assert moved[1]["to_position"] == 3 + diff --git a/tests/test_spotify_liked_songs_sync.py b/tests/test_spotify_liked_songs_sync.py new file mode 100644 index 0000000..2e47c5a --- /dev/null +++ b/tests/test_spotify_liked_songs_sync.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import ( + SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + get_liked_songs_playlist_name, + spotify_liked_songs_watch_job, +) + + +class _FakeSpotifyClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = items + + async def get_liked_songs(self) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.items) + + +class _FakeSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + if self.latest_snapshot is None: + return None + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + + +def _item(track_id: str, position: int) -> dict[str, Any]: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": f"2026-02-16T00:0{position}:00Z", + "artist": f"Artist {track_id}", + "title": f"Title {track_id}", + "album": "Album", + "duration_ms": 123000, + "isrc": f"ISRC{track_id}", + } + + +def test_liked_songs_sync_enqueues_added_tracks_and_rebuilds_m3u(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + db = _FakeSnapshotStore(latest_snapshot=None) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient("snap-liked-1", [_item("a", 0), _item("b", 1)]) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return SimpleNamespace(access_token="oauth-token") + + enqueue_calls: list[tuple[str, dict[str, Any]]] = [] + + async def _spy_enqueue_spotify_track(queue, spotify_track: dict, search_service, playlist_id: str): + enqueue_calls.append((playlist_id, dict(spotify_track))) + + rebuild_calls: list[dict[str, Any]] = [] + + def _spy_rebuild_playlist_from_tracks(playlist_name, playlist_root, music_root, track_file_paths): + rebuild_calls.append( + { + "playlist_name": playlist_name, + "playlist_root": playlist_root, + "music_root": music_root, + "track_file_paths": list(track_file_paths), + } + ) + return playlist_root / f"{playlist_name}.m3u" + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.enqueue_spotify_track", + _spy_enqueue_spotify_track, + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._load_downloaded_track_paths", + lambda _playlist_id: [], + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.rebuild_playlist_from_tracks", + _spy_rebuild_playlist_from_tracks, + ) + + result = asyncio.run( + spotify_liked_songs_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "updated" + assert result["playlist_id"] == SPOTIFY_LIKED_SONGS_PLAYLIST_ID + assert result["enqueued"] == 2 + + assert [call[0] for call in enqueue_calls] == [ + SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + SPOTIFY_LIKED_SONGS_PLAYLIST_ID, + ] + assert [call[1]["spotify_track_id"] for call in enqueue_calls] == ["a", "b"] + + assert len(db.store_calls) == 1 + assert db.store_calls[0][0] == SPOTIFY_LIKED_SONGS_PLAYLIST_ID + assert db.store_calls[0][1] == "snap-liked-1" + + assert len(rebuild_calls) == 1 + assert rebuild_calls[0]["playlist_name"] == get_liked_songs_playlist_name() + + +def test_liked_songs_sync_exits_cleanly_when_oauth_token_missing(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + db = _FakeSnapshotStore(latest_snapshot=None) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient("snap-liked-1", [_item("a", 0)]) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return None + + enqueue_calls: list[dict[str, Any]] = [] + + async def _spy_enqueue_spotify_track(queue, spotify_track: dict, search_service, playlist_id: str): + enqueue_calls.append(dict(spotify_track)) + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.enqueue_spotify_track", + _spy_enqueue_spotify_track, + ) + + result = asyncio.run( + spotify_liked_songs_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "skipped" + assert result["playlist_id"] == SPOTIFY_LIKED_SONGS_PLAYLIST_ID + assert result["enqueued"] == 0 + assert enqueue_calls == [] + assert db.store_calls == [] diff --git a/tests/test_spotify_oauth_endpoints.py b/tests/test_spotify_oauth_endpoints.py new file mode 100644 index 0000000..4ccfd05 --- /dev/null +++ b/tests/test_spotify_oauth_endpoints.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import importlib +import sys +from types import SimpleNamespace + +import pytest + +pytest.importorskip("fastapi") +from fastapi.testclient import TestClient + +from spotify.oauth_store import SpotifyOAuthStore + + +def _build_client(monkeypatch, tmp_path) -> tuple[TestClient, object]: + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + + db_path = tmp_path / "oauth_endpoints.sqlite" + module.app.state.paths = SimpleNamespace(db_path=str(db_path)) + module.app.state.spotify_oauth_state = None + monkeypatch.setattr( + module, + "_read_config_or_404", + lambda: { + "spotify": { + "client_id": "test-client-id", + "client_secret": "test-client-secret", + "redirect_uri": "http://localhost/callback", + } + }, + ) + return TestClient(module.app), module + + +def test_oauth_connect_returns_auth_url_with_client_id(monkeypatch, tmp_path) -> None: + client, _module = _build_client(monkeypatch, tmp_path) + + response = client.get("/api/spotify/oauth/connect") + + assert response.status_code == 200 + payload = response.json() + auth_url = payload["auth_url"] + assert "accounts.spotify.com/authorize" in auth_url + assert "client_id=test-client-id" in auth_url + + +def test_oauth_callback_stores_token_and_returns_connected(monkeypatch, tmp_path) -> None: + client, module = _build_client(monkeypatch, tmp_path) + module.app.state.spotify_oauth_state = "state-123" + + class _FakeResponse: + status_code = 200 + text = "" + + @staticmethod + def json(): + return { + "access_token": "access-token", + "refresh_token": "refresh-token", + "expires_in": 3600, + "scope": "user-library-read", + } + + monkeypatch.setattr("api.main.requests.post", lambda *args, **kwargs: _FakeResponse()) + + response = client.get("/api/spotify/oauth/callback?code=abc&state=state-123") + + assert response.status_code == 200 + assert response.json() == {"status": "connected"} + + store = SpotifyOAuthStore(tmp_path / "oauth_endpoints.sqlite") + token = store.load() + assert token is not None + assert token.access_token == "access-token" + assert token.refresh_token == "refresh-token" + assert token.scope == "user-library-read" + assert token.expires_at > 0 + + +def test_oauth_callback_invalid_state_returns_400(monkeypatch, tmp_path) -> None: + client, module = _build_client(monkeypatch, tmp_path) + module.app.state.spotify_oauth_state = "expected-state" + + response = client.get("/api/spotify/oauth/callback?code=abc&state=wrong-state") + + assert response.status_code == 400 diff --git a/tests/test_spotify_oauth_injection.py b/tests/test_spotify_oauth_injection.py new file mode 100644 index 0000000..6e27289 --- /dev/null +++ b/tests/test_spotify_oauth_injection.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import importlib +import sys +from types import SimpleNamespace + +import pytest + +pytest.importorskip("fastapi") + + +class _FakeSpotifyClient: + def __init__(self, **kwargs): + self.kwargs = kwargs + + +class _FakeStoreWithToken: + def __init__(self, _db_path): + self._token = SimpleNamespace(access_token="oauth-access-token") + + def load(self): + return self._token + + def get_valid_token(self, _client_id, _client_secret, config=None): + return self._token + + +class _FakeStoreNoToken: + def __init__(self, _db_path): + self._token = None + + def load(self): + return self._token + + def get_valid_token(self, _client_id, _client_secret, config=None): + return None + + +def _import_api_main(monkeypatch): + monkeypatch.setattr(sys, "version_info", (3, 11, 0, "final", 0), raising=False) + monkeypatch.setattr(sys, "version", "3.11.9", raising=False) + sys.modules.pop("api.main", None) + module = importlib.import_module("api.main") + module.app.router.on_startup.clear() + module.app.router.on_shutdown.clear() + return module + + +def test_build_spotify_client_uses_oauth_access_token_when_valid(monkeypatch, tmp_path) -> None: + module = _import_api_main(monkeypatch) + module.app.state.paths = SimpleNamespace(db_path=str(tmp_path / "oauth.sqlite")) + + monkeypatch.setattr(module, "SpotifyOAuthStore", _FakeStoreWithToken) + monkeypatch.setattr(module, "SpotifyPlaylistClient", _FakeSpotifyClient) + + client = module._build_spotify_client_with_optional_oauth( + { + "spotify": { + "client_id": "client-id", + "client_secret": "client-secret", + } + } + ) + + assert isinstance(client, _FakeSpotifyClient) + assert client.kwargs["client_id"] == "client-id" + assert client.kwargs["client_secret"] == "client-secret" + assert client.kwargs["access_token"] == "oauth-access-token" + + +def test_build_spotify_client_falls_back_to_public_mode_when_no_token(monkeypatch, tmp_path) -> None: + module = _import_api_main(monkeypatch) + module.app.state.paths = SimpleNamespace(db_path=str(tmp_path / "oauth.sqlite")) + + monkeypatch.setattr(module, "SpotifyOAuthStore", _FakeStoreNoToken) + monkeypatch.setattr(module, "SpotifyPlaylistClient", _FakeSpotifyClient) + + client = module._build_spotify_client_with_optional_oauth( + { + "spotify": { + "client_id": "client-id", + "client_secret": "client-secret", + } + } + ) + + assert isinstance(client, _FakeSpotifyClient) + assert client.kwargs["client_id"] == "client-id" + assert client.kwargs["client_secret"] == "client-secret" + assert "access_token" not in client.kwargs diff --git a/tests/test_spotify_oauth_refresh.py b/tests/test_spotify_oauth_refresh.py new file mode 100644 index 0000000..2a68d79 --- /dev/null +++ b/tests/test_spotify_oauth_refresh.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import time + +from spotify.oauth_store import SpotifyOAuthStore, SpotifyOAuthToken + + +def test_get_valid_token_returns_original_when_not_expired(tmp_path) -> None: + store = SpotifyOAuthStore(tmp_path / "oauth_refresh.sqlite") + token = SpotifyOAuthToken( + access_token="access-current", + refresh_token="refresh-current", + expires_at=int(time.time()) + 3600, + scope="user-library-read", + ) + store.save(token) + + result = store.get_valid_token(client_id="cid", client_secret="secret") + + assert result is not None + assert result.access_token == "access-current" + assert result.refresh_token == "refresh-current" + assert result.scope == "user-library-read" + + +def test_get_valid_token_refreshes_and_updates_db_when_expired(tmp_path, monkeypatch) -> None: + store = SpotifyOAuthStore(tmp_path / "oauth_refresh.sqlite") + old = SpotifyOAuthToken( + access_token="old-access", + refresh_token="old-refresh", + expires_at=int(time.time()) - 10, + scope="user-library-read", + ) + store.save(old) + + monkeypatch.setattr( + "spotify.oauth_store.refresh_access_token", + lambda client_id, client_secret, refresh_token: { + "access_token": "new-access", + "refresh_token": "new-refresh", + "expires_in": 7200, + "scope": "user-library-read playlist-read-private", + }, + ) + + result = store.get_valid_token(client_id="cid", client_secret="secret") + + assert result is not None + assert result.access_token == "new-access" + assert result.refresh_token == "new-refresh" + assert result.scope == "user-library-read playlist-read-private" + assert result.expires_at > int(time.time()) + + persisted = store.load() + assert persisted is not None + assert persisted.access_token == "new-access" + assert persisted.refresh_token == "new-refresh" + assert persisted.scope == "user-library-read playlist-read-private" + + +def test_get_valid_token_clears_token_when_refresh_fails(tmp_path, monkeypatch) -> None: + store = SpotifyOAuthStore(tmp_path / "oauth_refresh.sqlite") + token = SpotifyOAuthToken( + access_token="expired-access", + refresh_token="expired-refresh", + expires_at=int(time.time()) - 10, + scope="user-library-read", + ) + store.save(token) + + def _raise_refresh_error(client_id, client_secret, refresh_token): + raise RuntimeError("refresh failed") + + monkeypatch.setattr("spotify.oauth_store.refresh_access_token", _raise_refresh_error) + + result = store.get_valid_token(client_id="cid", client_secret="secret") + + assert result is None + assert store.load() is None diff --git a/tests/test_spotify_oauth_store.py b/tests/test_spotify_oauth_store.py new file mode 100644 index 0000000..8979dbe --- /dev/null +++ b/tests/test_spotify_oauth_store.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from spotify.oauth_store import SpotifyOAuthStore, SpotifyOAuthToken + + +def test_spotify_oauth_store_lifecycle(tmp_path) -> None: + db_path = tmp_path / "spotify_oauth.sqlite" + store = SpotifyOAuthStore(db_path) + + first = SpotifyOAuthToken( + access_token="access-1", + refresh_token="refresh-1", + expires_at=1_800_000_000, + scope="user-library-read", + ) + store.save(first) + + loaded_first = store.load() + assert loaded_first is not None + assert loaded_first.access_token == first.access_token + assert loaded_first.refresh_token == first.refresh_token + assert loaded_first.expires_at == first.expires_at + assert loaded_first.scope == first.scope + + second = SpotifyOAuthToken( + access_token="access-2", + refresh_token="refresh-2", + expires_at=1_900_000_000, + scope="user-library-read playlist-read-private", + ) + store.save(second) + + loaded_second = store.load() + assert loaded_second is not None + assert loaded_second.access_token == second.access_token + assert loaded_second.refresh_token == second.refresh_token + assert loaded_second.expires_at == second.expires_at + assert loaded_second.scope == second.scope + + store.clear() + assert store.load() is None diff --git a/tests/test_spotify_playlist_client.py b/tests/test_spotify_playlist_client.py new file mode 100644 index 0000000..d9dd855 --- /dev/null +++ b/tests/test_spotify_playlist_client.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +from typing import Any + +from spotify.client import SpotifyPlaylistClient + + +def test_get_playlist_items_empty_playlist(monkeypatch) -> None: + client = SpotifyPlaylistClient(client_id="id", client_secret="secret") + + def fake_request_json(url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + return {"snapshot_id": "snap-empty", "tracks": {"items": [], "next": None}} + + monkeypatch.setattr(client, "_request_json", fake_request_json) + + snapshot_id, items = client.get_playlist_items("playlist-empty") + + assert snapshot_id == "snap-empty" + assert items == [] + + +def test_get_playlist_items_preserves_duplicates_and_order(monkeypatch) -> None: + client = SpotifyPlaylistClient(client_id="id", client_secret="secret") + + page_one = { + "snapshot_id": "snap-dup", + "tracks": { + "items": [ + { + "added_at": "2026-02-01T00:00:00Z", + "track": { + "id": "track-1", + "name": "Song A", + "duration_ms": 1000, + "external_ids": {"isrc": "ISRC_A"}, + "album": {"name": "Album A"}, + "artists": [{"name": "Artist A"}], + }, + }, + { + "added_at": "2026-02-01T00:01:00Z", + "track": { + "id": "track-1", + "name": "Song A", + "duration_ms": 1000, + "external_ids": {"isrc": "ISRC_A"}, + "album": {"name": "Album A"}, + "artists": [{"name": "Artist A"}], + }, + }, + ], + "next": "https://api.spotify.com/v1/playlists/p/tracks?offset=2&limit=2", + }, + } + page_two = { + "items": [ + { + "added_at": "2026-02-01T00:02:00Z", + "track": { + "id": "track-2", + "name": "Song B", + "duration_ms": 2000, + "external_ids": {"isrc": "ISRC_B"}, + "album": {"name": "Album B"}, + "artists": [{"name": "Artist B"}], + }, + } + ], + "next": None, + } + + calls: list[str] = [] + + def fake_request_json(url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + calls.append(url) + if "playlists" in url and "offset=2" not in url: + return page_one + return page_two + + monkeypatch.setattr(client, "_request_json", fake_request_json) + + snapshot_id, items = client.get_playlist_items("playlist-dup") + + assert snapshot_id == "snap-dup" + assert [item["spotify_track_id"] for item in items] == ["track-1", "track-1", "track-2"] + assert [item["position"] for item in items] == [0, 1, 2] + assert items[0]["artist"] == "Artist A" + assert items[2]["title"] == "Song B" + assert items[2]["album"] == "Album B" + assert items[2]["duration_ms"] == 2000 + assert items[2]["isrc"] == "ISRC_B" + assert len(calls) == 2 + + +def test_get_playlist_items_drops_null_track_entries(monkeypatch) -> None: + client = SpotifyPlaylistClient(client_id="id", client_secret="secret") + + def fake_request_json(url: str, params: dict[str, Any] | None = None) -> dict[str, Any]: + return { + "snapshot_id": "snap-null", + "tracks": { + "items": [ + {"added_at": "2026-02-01T00:00:00Z", "track": None}, + { + "added_at": "2026-02-01T00:01:00Z", + "track": { + "id": "track-3", + "name": "Song C", + "duration_ms": 3000, + "external_ids": {"isrc": "ISRC_C"}, + "album": {"name": "Album C"}, + "artists": [{"name": "Artist C"}], + }, + }, + ], + "next": None, + }, + } + + monkeypatch.setattr(client, "_request_json", fake_request_json) + + snapshot_id, items = client.get_playlist_items("playlist-null") + + assert snapshot_id == "snap-null" + assert len(items) == 1 + assert items[0]["spotify_track_id"] == "track-3" + assert items[0]["position"] == 1 + diff --git a/tests/test_spotify_playlist_integration.py b/tests/test_spotify_playlist_integration.py new file mode 100644 index 0000000..0f05088 --- /dev/null +++ b/tests/test_spotify_playlist_integration.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +import asyncio +from typing import Any + +from metadata.types import MusicMetadata +from scheduler.jobs.spotify_playlist_watch import enqueue_spotify_track + + +class _MockSpotifyClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = items + + def get_playlist_items(self, _playlist_id: str) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.items) + + +class _MockSearchService: + def __init__(self, results: list[dict[str, Any]]) -> None: + self._results = results + self.queries: list[str] = [] + + async def search(self, query: str) -> list[dict[str, Any]]: + self.queries.append(query) + return list(self._results) + + +class _MockQueue: + def __init__(self) -> None: + self.items: list[dict[str, Any]] = [] + + def enqueue(self, payload: dict[str, Any]) -> None: + self.items.append(payload) + + +def test_enqueue_spotify_track_integration_single_result() -> None: + spotify_client = _MockSpotifyClient( + "snap-1", + [ + { + "spotify_track_id": "sp-track-1", + "artist": "Artist One", + "title": "Track One", + "duration_ms": 200000, + } + ], + ) + _snapshot_id, tracks = spotify_client.get_playlist_items("playlist-1") + spotify_track = tracks[0] + + search_service = _MockSearchService( + [ + { + "media_url": "https://example.com/media-1", + "title": "Track One", + "duration": 200, + "source_id": "youtube_music", + "extra": {"lyrics": "la la"}, + } + ] + ) + queue = _MockQueue() + + asyncio.run( + enqueue_spotify_track( + queue=queue, + spotify_track=spotify_track, + search_service=search_service, + playlist_id="playlist-1", + ) + ) + + assert len(queue.items) == 1 + payload = queue.items[0] + assert payload["playlist_id"] == "playlist-1" + assert payload["spotify_track_id"] == "sp-track-1" + assert payload["resolved_media"]["media_url"] == "https://example.com/media-1" + assert isinstance(payload["music_metadata"], MusicMetadata) + assert payload["music_metadata"].title == "Track One" + assert payload["music_metadata"].artist == "Artist One" + assert search_service.queries == ["Artist One - Track One official audio"] + + +def test_enqueue_spotify_track_integration_best_result_selected() -> None: + spotify_client = _MockSpotifyClient( + "snap-2", + [ + { + "spotify_track_id": "sp-track-2", + "artist": "Artist Two", + "title": "Track Two", + "duration_ms": 210000, + } + ], + ) + _snapshot_id, tracks = spotify_client.get_playlist_items("playlist-2") + spotify_track = tracks[0] + + search_service = _MockSearchService( + [ + { + "media_url": "https://example.com/worse", + "title": "Track Two (live)", + "duration": 260, + "source_id": "youtube_music", + "extra": {}, + }, + { + "media_url": "https://example.com/best", + "title": "Track Two", + "duration": 210, + "source_id": "youtube", + "extra": {"genre": "Rock"}, + }, + ] + ) + queue = _MockQueue() + + asyncio.run( + enqueue_spotify_track( + queue=queue, + spotify_track=spotify_track, + search_service=search_service, + playlist_id="playlist-2", + ) + ) + + assert len(queue.items) == 1 + payload = queue.items[0] + assert payload["resolved_media"]["media_url"] == "https://example.com/best" + assert payload["resolved_media"]["source_id"] == "youtube" + assert isinstance(payload["music_metadata"], MusicMetadata) + assert payload["music_metadata"].title == "Track Two" + assert payload["music_metadata"].artist == "Artist Two" + diff --git a/tests/test_spotify_playlist_watch_job.py b/tests/test_spotify_playlist_watch_job.py new file mode 100644 index 0000000..31ba56a --- /dev/null +++ b/tests/test_spotify_playlist_watch_job.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import run_spotify_playlist_watch_job + + +def _item(track_id: str, position: int) -> dict[str, Any]: + return { + "spotify_track_id": track_id, + "position": position, + "added_at": f"2026-02-16T00:0{position}:00Z", + "artist": f"artist-{track_id}", + "title": f"title-{track_id}", + "album": f"album-{track_id}", + "duration_ms": 1000 + position, + "isrc": f"isrc-{track_id}", + } + + +class _MockSpotifyClient: + def __init__(self, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.items = items + self.calls: list[str] = [] + + def get_playlist_items(self, playlist_id: str) -> tuple[str, list[dict[str, Any]]]: + self.calls.append(playlist_id) + return self.snapshot_id, list(self.items) + + +class _MockSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.get_calls: list[str] = [] + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + self.get_calls.append(playlist_id) + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> Any: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + return type("WriteResult", (), {"snapshot_db_id": 42})() + + +def _isolate_playlist_paths(monkeypatch, tmp_path) -> None: + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._resolve_playlist_dirs", + lambda _config: (tmp_path / "Playlists", tmp_path / "Music"), + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._load_downloaded_track_paths", + lambda _playlist_id: [], + ) + + +def test_watch_job_unchanged_snapshot_exits_without_enqueue(monkeypatch, tmp_path) -> None: + _isolate_playlist_paths(monkeypatch, tmp_path) + prev_items = [_item("a", 0)] + store = _MockSnapshotStore({"snapshot_id": "snap-1", "items": prev_items}) + client = _MockSpotifyClient("snap-1", [_item("a", 0), _item("b", 1)]) + enqueued: list[str] = [] + + result = run_spotify_playlist_watch_job( + playlist_id="playlist-1", + spotify_client=client, + snapshot_store=store, + enqueue_track=lambda item: enqueued.append(str(item["spotify_track_id"])), + ) + + assert result["status"] == "unchanged" + assert result["enqueued"] == 0 + assert enqueued == [] + assert store.store_calls == [] + + +def test_watch_job_enqueues_only_added_items_in_order(monkeypatch, tmp_path) -> None: + _isolate_playlist_paths(monkeypatch, tmp_path) + prev_items = [_item("a", 0), _item("b", 1)] + curr_items = [_item("a", 0), _item("b", 1), _item("c", 2), _item("d", 3)] + store = _MockSnapshotStore({"snapshot_id": "snap-1", "items": prev_items}) + client = _MockSpotifyClient("snap-2", curr_items) + enqueued: list[str] = [] + + result = run_spotify_playlist_watch_job( + playlist_id="playlist-1", + spotify_client=client, + snapshot_store=store, + enqueue_track=lambda item: enqueued.append(str(item["spotify_track_id"])), + ) + + assert result["status"] == "updated" + assert result["added_count"] == 2 + assert result["enqueued"] == 2 + assert enqueued == ["c", "d"] + assert len(store.store_calls) == 1 + assert store.store_calls[0][1] == "snap-2" + + +def test_watch_job_moved_items_do_not_enqueue(monkeypatch, tmp_path) -> None: + _isolate_playlist_paths(monkeypatch, tmp_path) + prev_items = [_item("a", 0), _item("b", 1), _item("c", 2)] + curr_items = [_item("b", 0), _item("a", 1), _item("c", 2)] + store = _MockSnapshotStore({"snapshot_id": "snap-1", "items": prev_items}) + client = _MockSpotifyClient("snap-2", curr_items) + enqueued: list[str] = [] + + result = run_spotify_playlist_watch_job( + playlist_id="playlist-1", + spotify_client=client, + snapshot_store=store, + enqueue_track=lambda item: enqueued.append(str(item["spotify_track_id"])), + ) + + assert result["status"] == "updated" + assert result["added_count"] == 0 + assert result["moved_count"] == 2 + assert result["enqueued"] == 0 + assert enqueued == [] + assert len(store.store_calls) == 1 diff --git a/tests/test_spotify_saved_albums_sync.py b/tests/test_spotify_saved_albums_sync.py new file mode 100644 index 0000000..740be4e --- /dev/null +++ b/tests/test_spotify_saved_albums_sync.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import ( + SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID, + spotify_saved_albums_watch_job, +) + + +class _FakeSpotifyClient: + def __init__(self, snapshot_id: str, albums: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.albums = albums + + async def get_saved_albums(self) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.albums) + + +class _FakeSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + + +def _album(album_id: str, position: int) -> dict[str, Any]: + return { + "album_id": album_id, + "position": position, + "added_at": f"2026-02-16T00:0{position}:00Z", + "name": f"Album {album_id}", + "artist": f"Artist {album_id}", + "artists": [f"Artist {album_id}"], + "release_date": "2024-01-01", + "total_tracks": 10, + "tracks": [], + } + + +def test_saved_albums_sync_triggers_album_sync_only_for_new_albums(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + previous_items = [ + {"spotify_track_id": "album-a", "position": 0, "added_at": "2026-02-16T00:00:00Z"}, + ] + db = _FakeSnapshotStore({"snapshot_id": "snap-prev", "items": previous_items}) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient( + "snap-next", + [_album("album-a", 0), _album("album-b", 1), _album("album-c", 2)], + ) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return SimpleNamespace(access_token="oauth-token") + + album_sync_calls: list[str] = [] + + async def _spy_run_spotify_album_sync(*, album_id, config, db, queue, spotify_client): + album_sync_calls.append(str(album_id)) + return { + "status": "accepted", + "intent_type": "spotify_album", + "identifier": str(album_id), + "message": "ok", + "enqueued_count": 1, + } + + rebuild_calls: list[dict[str, Any]] = [] + + def _spy_rebuild_playlist_from_tracks(playlist_name, playlist_root, music_root, track_file_paths): + rebuild_calls.append( + { + "playlist_name": playlist_name, + "playlist_root": playlist_root, + "music_root": music_root, + "track_file_paths": list(track_file_paths), + } + ) + return playlist_root / f"{playlist_name}.m3u" + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr("api.intent_dispatcher.run_spotify_album_sync", _spy_run_spotify_album_sync) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch._load_downloaded_track_paths_for_playlist_ids", + lambda _playlist_ids: [], + ) + monkeypatch.setattr( + "scheduler.jobs.spotify_playlist_watch.rebuild_playlist_from_tracks", + _spy_rebuild_playlist_from_tracks, + ) + + result = asyncio.run( + spotify_saved_albums_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "updated" + assert result["playlist_id"] == SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID + assert album_sync_calls == ["album-b", "album-c"] + + assert len(db.store_calls) == 1 + assert db.store_calls[0][0] == SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID + assert db.store_calls[0][1] == "snap-next" + + assert len(rebuild_calls) == 1 + assert rebuild_calls[0]["playlist_name"] == "Spotify - Saved Albums" + + +def test_saved_albums_sync_skips_when_oauth_token_missing(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + db = _FakeSnapshotStore(latest_snapshot=None) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient("snap-next", [_album("album-a", 0)]) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return None + + album_sync_calls: list[str] = [] + + async def _spy_run_spotify_album_sync(*, album_id, config, db, queue, spotify_client): + album_sync_calls.append(str(album_id)) + return {"status": "accepted", "enqueued_count": 1} + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr("api.intent_dispatcher.run_spotify_album_sync", _spy_run_spotify_album_sync) + + result = asyncio.run( + spotify_saved_albums_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "skipped" + assert result["playlist_id"] == SPOTIFY_SAVED_ALBUMS_PLAYLIST_ID + assert result["enqueued"] == 0 + assert album_sync_calls == [] + assert db.store_calls == [] diff --git a/tests/test_spotify_user_playlists_sync.py b/tests/test_spotify_user_playlists_sync.py new file mode 100644 index 0000000..7d4ef40 --- /dev/null +++ b/tests/test_spotify_user_playlists_sync.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import asyncio +from types import SimpleNamespace +from typing import Any + +from scheduler.jobs.spotify_playlist_watch import ( + SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID, + spotify_user_playlists_watch_job, +) + + +class _FakeSpotifyClient: + def __init__(self, snapshot_id: str, playlists: list[dict[str, Any]]) -> None: + self.snapshot_id = snapshot_id + self.playlists = playlists + + async def get_user_playlists(self) -> tuple[str, list[dict[str, Any]]]: + return self.snapshot_id, list(self.playlists) + + +class _FakeSnapshotStore: + def __init__(self, latest_snapshot: dict[str, Any] | None) -> None: + self.latest_snapshot = latest_snapshot + self.store_calls: list[tuple[str, str, list[dict[str, Any]]]] = [] + + def get_latest_snapshot(self, playlist_id: str) -> dict[str, Any] | None: + return self.latest_snapshot + + def store_snapshot(self, playlist_id: str, snapshot_id: str, items: list[dict[str, Any]]) -> None: + self.store_calls.append((playlist_id, snapshot_id, list(items))) + + +def _playlist(playlist_id: str, name: str, track_count: int) -> dict[str, Any]: + return { + "id": playlist_id, + "name": name, + "track_count": track_count, + } + + +def test_user_playlists_sync_triggers_existing_watch_job_for_new_playlists(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + previous_items = [ + {"spotify_track_id": "pl-a", "position": 0, "added_at": None}, + ] + db = _FakeSnapshotStore({"snapshot_id": "snap-prev", "items": previous_items}) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient( + "snap-next", + [_playlist("pl-a", "Existing", 10), _playlist("pl-b", "New One", 20), _playlist("pl-c", "New Two", 30)], + ) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return SimpleNamespace(access_token="oauth-token") + + watch_calls: list[dict[str, Any]] = [] + + def _spy_playlist_watch_job(*, spotify_client, db, queue, playlist_id, playlist_name=None, config=None): + watch_calls.append( + { + "playlist_id": playlist_id, + "playlist_name": playlist_name, + "config": config, + } + ) + return { + "status": "updated", + "playlist_id": playlist_id, + "enqueued": 0, + } + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.playlist_watch_job", _spy_playlist_watch_job) + + result = asyncio.run( + spotify_user_playlists_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "updated" + assert result["playlist_id"] == SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID + assert [call["playlist_id"] for call in watch_calls] == ["pl-b", "pl-c"] + assert [call["playlist_name"] for call in watch_calls] == ["New One", "New Two"] + + assert len(db.store_calls) == 1 + assert db.store_calls[0][0] == SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID + assert db.store_calls[0][1] == "snap-next" + + +def test_user_playlists_sync_skips_cleanly_when_oauth_token_missing(monkeypatch) -> None: + config = {"spotify": {"client_id": "cid", "client_secret": "csec"}} + db = _FakeSnapshotStore(latest_snapshot=None) + queue = object() + search_service = object() + spotify_client = _FakeSpotifyClient("snap-next", [_playlist("pl-a", "Any", 1)]) + + class _FakeOAuthStore: + def __init__(self, _db_path): + pass + + def get_valid_token(self, _client_id, _client_secret, config=None): + return None + + watch_calls: list[str] = [] + + def _spy_playlist_watch_job(*, spotify_client, db, queue, playlist_id, playlist_name=None, config=None): + watch_calls.append(playlist_id) + return {"status": "updated", "playlist_id": playlist_id, "enqueued": 0} + + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.SpotifyOAuthStore", _FakeOAuthStore) + monkeypatch.setattr("scheduler.jobs.spotify_playlist_watch.playlist_watch_job", _spy_playlist_watch_job) + + result = asyncio.run( + spotify_user_playlists_watch_job( + config=config, + db=db, + queue=queue, + spotify_client=spotify_client, + search_service=search_service, + ) + ) + + assert result["status"] == "skipped" + assert result["playlist_id"] == SPOTIFY_USER_PLAYLISTS_PLAYLIST_ID + assert result["enqueued"] == 0 + assert watch_calls == [] + assert db.store_calls == [] diff --git a/tests/test_validation_config.py b/tests/test_validation_config.py new file mode 100644 index 0000000..a3066df --- /dev/null +++ b/tests/test_validation_config.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace + +from download.worker import ( + DownloadWorker, + JOB_STATUS_COMPLETED, + JOB_STATUS_VALIDATION_FAILED, +) + + +class _MockDownloader: + def download(self, media_url: str) -> str: + path = Path("/tmp/mock-audio.mp3") + path.write_bytes(b"mock-audio") + return str(path) + + +def _job() -> SimpleNamespace: + return SimpleNamespace( + payload={ + "playlist_id": "playlist-1", + "spotify_track_id": "track-1", + "resolved_media": {"media_url": "https://example.test/audio"}, + "music_metadata": { + "title": "Track", + "artist": "Artist", + "isrc": "USABC1234567", + "expected_ms": 1_000, + }, + } + ) + + +def test_duration_tolerance_config_changes_validation_outcome(monkeypatch) -> None: + recorded: list[tuple[str, str, str]] = [] + + monkeypatch.setattr("download.worker.ENABLE_DURATION_VALIDATION", True) + monkeypatch.setattr( + "download.worker.record_downloaded_track", + lambda playlist_id, isrc, file_path: recorded.append((playlist_id, isrc, file_path)), + ) + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + + # Deterministic validator model: actual=1.20s, expected=1.00s (delta=0.20s). + monkeypatch.setattr( + "download.worker.validate_duration", + lambda _file_path, expected_ms, tolerance_seconds: abs(1.2 - (expected_ms / 1000.0)) + <= tolerance_seconds, + ) + + worker = DownloadWorker(_MockDownloader()) + + # Baseline tolerance: passes. + monkeypatch.setattr("download.worker.SPOTIFY_DURATION_TOLERANCE_SECONDS", 0.30) + first_job = _job() + worker.process_job(first_job) + assert first_job.status == JOB_STATUS_COMPLETED + assert len(recorded) == 1 + + # Very small tolerance: same track now fails validation. + monkeypatch.setattr("download.worker.SPOTIFY_DURATION_TOLERANCE_SECONDS", 0.05) + second_job = _job() + worker.process_job(second_job) + assert second_job.status == JOB_STATUS_VALIDATION_FAILED + assert len(recorded) == 1 + + # Increased tolerance again: track passes. + monkeypatch.setattr("download.worker.SPOTIFY_DURATION_TOLERANCE_SECONDS", 0.30) + third_job = _job() + worker.process_job(third_job) + assert third_job.status == JOB_STATUS_COMPLETED + assert len(recorded) == 2 diff --git a/tests/test_webui_smoke.py b/tests/test_webui_smoke.py new file mode 100644 index 0000000..887706d --- /dev/null +++ b/tests/test_webui_smoke.py @@ -0,0 +1,275 @@ +from __future__ import annotations + +import socket +import threading +import time +from pathlib import Path +from typing import Any + +import pytest + +fastapi = pytest.importorskip("fastapi") +pytest.importorskip("uvicorn") +requests = pytest.importorskip("requests") +playwright_sync = pytest.importorskip("playwright.sync_api") + +from fastapi import FastAPI +from fastapi.responses import PlainTextResponse +from fastapi.staticfiles import StaticFiles +from playwright.sync_api import Error as PlaywrightError +from playwright.sync_api import sync_playwright +import uvicorn + + +def _free_port() -> int: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + sock.bind(("127.0.0.1", 0)) + return int(sock.getsockname()[1]) + finally: + sock.close() + + +def _build_webui_test_app() -> FastAPI: + app = FastAPI() + state: dict[str, Any] = { + "request_id": "req-1", + "item_id": "item-1", + "candidate_id": "cand-1", + "job_id": "job-1", + "request_status": "completed", + "job_status": "", + } + + @app.get("/api/paths") + def api_paths() -> dict[str, str]: + return {"config_dir": "config", "downloads_dir": "downloads", "tokens_dir": "tokens"} + + @app.get("/api/version") + def api_version() -> dict[str, str]: + return {"app_version": "0.0.0-test"} + + @app.get("/api/status") + def api_status() -> dict[str, Any]: + return { + "running": False, + "run_id": None, + "started_at": None, + "finished_at": None, + "watcher": {"enabled": False, "paused": False}, + "scheduler": {"enabled": False}, + "status": {"run_successes": [], "run_failures": []}, + "watcher_status": {"state": "idle", "pending_playlists_count": 0, "batch_active": False}, + } + + @app.get("/api/spotify/status") + def api_spotify_status() -> dict[str, Any]: + return {"oauth_connected": False} + + @app.get("/api/metrics") + def api_metrics() -> dict[str, Any]: + return { + "downloads_files": 0, + "downloads_bytes": 0, + "disk_free_bytes": 1024 * 1024 * 1024, + "disk_total_bytes": 2 * 1024 * 1024 * 1024, + "disk_free_percent": 50, + } + + @app.get("/api/schedule") + def api_schedule() -> dict[str, Any]: + return {"schedule": {"enabled": False, "interval_hours": 6, "run_on_startup": False}} + + @app.get("/api/logs") + def api_logs() -> PlainTextResponse: + return PlainTextResponse("ok\n") + + @app.get("/api/files") + def api_files() -> list[dict[str, Any]]: + return [] + + @app.get("/api/history") + def api_history() -> list[dict[str, Any]]: + return [] + + @app.get("/api/download_jobs") + def api_download_jobs(limit: int = 50) -> dict[str, Any]: + if state["job_status"]: + return { + "jobs": [ + { + "id": state["job_id"], + "origin": "search", + "origin_id": state["request_id"], + "url": "https://www.youtube.com/watch?v=stub123", + "status": state["job_status"], + } + ] + } + return {"jobs": []} + + @app.post("/api/search/requests") + def api_create_search_request(_payload: dict[str, Any]) -> dict[str, Any]: + return { + "request_id": state["request_id"], + "music_mode": False, + "music_resolution": None, + "music_candidates": [], + } + + @app.get("/api/search/requests") + def api_list_search_requests() -> dict[str, Any]: + return { + "requests": [ + { + "id": state["request_id"], + "status": state["request_status"], + "created_at": "2026-01-01T00:00:00Z", + "media_type": "video", + } + ] + } + + @app.get("/api/search/requests/{request_id}") + def api_get_search_request(request_id: str) -> dict[str, Any]: + return { + "request": { + "id": request_id, + "status": state["request_status"], + "media_type": "video", + "resolved_destination": "downloads", + }, + "items": [ + { + "id": state["item_id"], + "request_id": state["request_id"], + "status": "candidate_found", + "candidate_count": 1, + "media_type": "video", + "position": 1, + "allow_download": True, + } + ], + } + + @app.get("/api/search/items/{item_id}/candidates") + def api_get_candidates(item_id: str) -> dict[str, Any]: + if item_id != state["item_id"]: + return {"candidates": []} + return { + "candidates": [ + { + "id": state["candidate_id"], + "url": "https://www.youtube.com/watch?v=stub123", + "title": "Smoke Candidate", + "source": "youtube", + "allow_download": True, + "final_score": 99, + "job_status": state["job_status"] or None, + } + ] + } + + @app.post("/api/search/items/{item_id}/enqueue") + def api_enqueue_candidate(item_id: str, payload: dict[str, Any]) -> dict[str, Any]: + if item_id != state["item_id"] or payload.get("candidate_id") != state["candidate_id"]: + return {"created": False, "job_id": None} + state["job_status"] = "queued" + return {"created": True, "job_id": state["job_id"]} + + @app.post("/api/search/resolve/once") + def api_resolve_once() -> dict[str, Any]: + return {"request_id": state["request_id"]} + + @app.get("/api/search/queue") + def api_search_queue() -> dict[str, Any]: + return {"jobs": []} + + app.mount("/", StaticFiles(directory=str(Path("webUI").resolve()), html=True), name="webui") + return app + + +@pytest.fixture() +def webui_server() -> str: + app = _build_webui_test_app() + port = _free_port() + base_url = f"http://127.0.0.1:{port}" + config = uvicorn.Config(app=app, host="127.0.0.1", port=port, log_level="error") + server = uvicorn.Server(config=config) + thread = threading.Thread(target=server.run, daemon=True) + thread.start() + + deadline = time.time() + 10 + while time.time() < deadline: + try: + response = requests.get(base_url, timeout=0.25) + if response.status_code == 200: + break + except Exception: + pass + time.sleep(0.1) + else: + server.should_exit = True + thread.join(timeout=5) + pytest.fail("Failed to start local FastAPI test server for WebUI smoke test.") + + try: + yield base_url + finally: + server.should_exit = True + thread.join(timeout=5) + + +@pytest.fixture() +def page(): + with sync_playwright() as pw: + try: + browser = pw.chromium.launch(headless=True) + except PlaywrightError as exc: + pytest.skip(f"Playwright browser not available: {exc}") + context = browser.new_context() + page = context.new_page() + try: + yield page + finally: + context.close() + browser.close() + + +def test_webui_home_search_download_status_without_legacy_run_errors(webui_server: str, page) -> None: + console_errors: list[str] = [] + page_errors: list[str] = [] + + def on_console(msg) -> None: + if msg.type == "error": + console_errors.append(msg.text) + + page.on("console", on_console) + page.on("pageerror", lambda err: page_errors.append(str(err))) + + page.goto(webui_server, wait_until="networkidle") + page.fill("#home-search-input", "smoke test query") + page.click("#home-search-only") + + page.wait_for_selector("#home-results .home-result-card", timeout=10000) + page.wait_for_selector('button[data-action="home-download"]', timeout=10000) + page.click('button[data-action="home-download"]') + + page.wait_for_function( + """() => { + const el = document.querySelector("#home-search-message"); + return !!el && /Enqueued job/i.test(el.textContent || ""); + }""", + timeout=10000, + ) + page.wait_for_function( + """() => { + const state = document.querySelector(".home-candidate-state"); + return !!state && /queued/i.test(state.textContent || ""); + }""", + timeout=10000, + ) + + assert not page_errors, f"Page JS errors detected: {page_errors}" + assert not console_errors, f"Console errors detected: {console_errors}" + assert not any("legacy-run" in msg.lower() or "#run-" in msg.lower() for msg in console_errors) diff --git a/tests/test_worker_canonical_path.py b/tests/test_worker_canonical_path.py new file mode 100644 index 0000000..af9f730 --- /dev/null +++ b/tests/test_worker_canonical_path.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from pathlib import Path +from types import SimpleNamespace + +from download.worker import DownloadWorker, JOB_STATUS_COMPLETED + + +class _MockDownloader: + def __init__(self, temp_path: Path) -> None: + self.temp_path = temp_path + + def download(self, media_url: str) -> str: + self.temp_path.write_bytes(b"mock-audio") + return str(self.temp_path) + + +def test_worker_moves_to_canonical_path_and_returns_it(tmp_path, monkeypatch) -> None: + root = tmp_path / "Music" + temp_file = tmp_path / "download-temp.mp3" + + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + + worker = DownloadWorker(_MockDownloader(temp_file)) + job = SimpleNamespace( + payload={ + "music_root": str(root), + "resolved_media": {"media_url": "https://example.test/audio"}, + "music_metadata": { + "album_artist": "Artist", + "artist": "Artist", + "album": "Album", + "date": "2020", + "disc_num": 2, + "track_num": 3, + "title": "Song", + "genre": "Pop", + }, + } + ) + + result = worker.process_job(job) + + expected = root / "Artist" / "Album (2020)" / "Disc 2" / "03 - Song.mp3" + assert result == {"status": JOB_STATUS_COMPLETED, "file_path": str(expected)} + assert expected.exists() is True + assert temp_file.exists() is False diff --git a/tests/test_worker_return_contract.py b/tests/test_worker_return_contract.py new file mode 100644 index 0000000..d81a0be --- /dev/null +++ b/tests/test_worker_return_contract.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from types import SimpleNamespace + +from download.worker import JOB_STATUS_VALIDATION_FAILED, DownloadWorker, safe_int + + +class _MockDownloader: + def download(self, media_url: str) -> str: + return "/tmp/mock-track.mp3" + + +def test_process_job_returns_validation_failed_without_file_path(monkeypatch) -> None: + recorded: list[tuple[str, str, str]] = [] + + monkeypatch.setattr("download.worker.validate_duration", lambda *_args, **_kwargs: False) + monkeypatch.setattr("download.worker.get_media_duration", lambda _path: 1.0) + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + monkeypatch.setattr( + "download.worker.record_downloaded_track", + lambda playlist_id, isrc, file_path: recorded.append((playlist_id, isrc, file_path)), + ) + + worker = DownloadWorker(_MockDownloader()) + job = SimpleNamespace( + payload={ + "playlist_id": "playlist-1", + "spotify_track_id": "track-1", + "resolved_media": {"media_url": "https://example.test/audio"}, + "music_metadata": { + "title": "Track One", + "artist": "Artist One", + "isrc": "USABC1234567", + "expected_ms": 180_000, + }, + } + ) + + result = worker.process_job(job) + + assert result == {"status": JOB_STATUS_VALIDATION_FAILED, "file_path": None} + assert job.status == JOB_STATUS_VALIDATION_FAILED + assert recorded == [] + + +def test_safe_int_parses_or_returns_none_for_malformed_values() -> None: + assert safe_int("01/12") == 1 + assert safe_int("") is None + assert safe_int(None) is None + assert safe_int("Disc 1") == 1 + assert safe_int("no number") is None diff --git a/tests/test_worker_validation.py b/tests/test_worker_validation.py new file mode 100644 index 0000000..5c134ab --- /dev/null +++ b/tests/test_worker_validation.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import wave +from pathlib import Path +from types import SimpleNamespace + +from download.worker import DownloadWorker, JOB_STATUS_VALIDATION_FAILED + + +class _MockDownloader: + def __init__(self, output_path: Path) -> None: + self.output_path = output_path + + def download(self, media_url: str) -> str: + with wave.open(str(self.output_path), "wb") as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(44_100) + wav_file.writeframes(b"\x00\x00" * 44_100) # 1 second of silence + return str(self.output_path) + + +def test_worker_sets_validation_failed_and_skips_record(monkeypatch, tmp_path: Path) -> None: + recorded_calls: list[tuple[str, str, str]] = [] + + monkeypatch.setattr( + "download.worker.record_downloaded_track", + lambda playlist_id, isrc, file_path: recorded_calls.append((playlist_id, isrc, file_path)), + ) + monkeypatch.setattr( + "download.worker.validate_duration", + lambda file_path, expected_ms, tolerance_seconds: False, + ) + monkeypatch.setattr("download.worker.get_media_duration", lambda file_path: 1.0) + monkeypatch.setattr("download.worker.tag_file", lambda _path, _metadata: None) + + file_path = tmp_path / "short.wav" + worker = DownloadWorker(_MockDownloader(file_path)) + job = SimpleNamespace( + payload={ + "playlist_id": "playlist-1", + "spotify_track_id": "track-1", + "resolved_media": {"media_url": "https://example.test/audio"}, + "music_metadata": { + "title": "Track", + "artist": "Artist", + "isrc": "USABC1234567", + "expected_ms": 180_000, # far from 1-second file + }, + } + ) + + worker.process_job(job) + + assert recorded_calls == [] + assert job.status == JOB_STATUS_VALIDATION_FAILED diff --git a/webUI/app.js b/webUI/app.js index fa5c069..738f36e 100644 --- a/webUI/app.js +++ b/webUI/app.js @@ -24,6 +24,10 @@ const state = { homeSearchRequestId: null, homeResultsTimer: null, homeSearchMode: "searchOnly", + homeMusicMode: false, + homeAlbumCandidatesRequestId: null, + homeQueuedAlbumReleaseGroups: new Set(), + homeAlbumCoverCache: {}, homeRequestContext: {}, homeBestScores: {}, homeCandidateCache: {}, @@ -40,6 +44,8 @@ const state = { homeDirectJobTimer: null, homeJobTimer: null, homeJobSnapshot: null, + spotifyOauthConnected: false, + spotifyConnectedNoticeShown: false, }; const browserState = { open: false, @@ -70,6 +76,8 @@ const GITHUB_RELEASE_PAGE = "https://github.com/Retreivr/retreivr/releases"; const RELEASE_CHECK_KEY = "yt_archiver_release_checked_at"; const RELEASE_CACHE_KEY = "yt_archiver_release_cache"; const RELEASE_VERSION_KEY = "yt_archiver_release_app_version"; +const HOME_MUSIC_MODE_KEY = "retreivr.home.music_mode"; +const HOME_MUSIC_DEBUG_KEY = "retreivr.debug.music"; const HOME_SOURCE_PRIORITY_MAP = { auto: null, youtube: ["youtube"], @@ -118,13 +126,14 @@ function normalizePageName(page) { if (!page) { return "home"; } - if (page === "search") { + const cleanPage = String(page).split("?")[0] || page; + if (cleanPage === "search") { return "advanced"; } - if (["downloads", "history", "logs"].includes(page)) { + if (["downloads", "history", "logs"].includes(cleanPage)) { return "status"; } - return page; + return cleanPage; } function setNotice(el, message, isError = false) { @@ -210,7 +219,13 @@ function setPage(page) { refreshLogs(); } else if (target === "config") { if (!state.config || !state.configDirty) { - loadConfig(); + loadConfig().then(async () => { + await refreshSpotifyConfig(); + if (consumeSpotifyConnectedHashFlag()) { + await refreshSpotifyConfig(); + setConfigNotice("Spotify connected successfully.", false, true); + } + }); } refreshSchedule(); } else if (target === "advanced") { @@ -238,6 +253,19 @@ function setPage(page) { } } +function consumeSpotifyConnectedHashFlag() { + const hash = window.location.hash || ""; + if (!hash.includes("spotify=connected")) { + return false; + } + if (state.spotifyConnectedNoticeShown) { + return false; + } + history.replaceState(null, "", window.location.pathname + window.location.search); + state.spotifyConnectedNoticeShown = true; + return true; +} + function isValidHttpUrl(value) { if (!value) return false; try { @@ -1078,24 +1106,6 @@ async function refreshStatus() { $("#status-video-progress-meta").textContent = "-"; } - const singleLink = $("#run-single-download"); - if (singleLink) { - const clientDeliveryId = status.client_delivery_id; - const fileId = status.last_completed_file_id; - if (clientDeliveryId) { - singleLink.href = `/api/deliveries/${clientDeliveryId}/download`; - singleLink.textContent = "Download to device"; - singleLink.setAttribute("aria-disabled", "false"); - } else if (fileId) { - singleLink.href = downloadUrl(fileId); - singleLink.textContent = "Download last"; - singleLink.setAttribute("aria-disabled", "false"); - } else { - singleLink.href = "#"; - singleLink.textContent = "Download last"; - singleLink.setAttribute("aria-disabled", "true"); - } - } const cancelBtn = $("#status-cancel"); if (cancelBtn) { cancelBtn.disabled = !data.running; @@ -1107,12 +1117,53 @@ async function refreshStatus() { await cancelJob(jobId); await refreshStatus(); } catch (err) { - setNotice($("#run-message"), `Cancel failed: ${err.message}`, true); + setNotice($("#home-search-message"), `Cancel failed: ${err.message}`, true); } }; } + + try { + const spotifyStatus = await fetchJson("/api/spotify/status"); + const oauthConnected = !!spotifyStatus.oauth_connected; + const oauthEl = $("#spotify-status-oauth"); + if (oauthEl) { + oauthEl.textContent = oauthConnected ? "Connected" : "Not connected"; + } + const likedEl = $("#spotify-status-liked"); + if (likedEl) { + if (spotifyStatus.liked_sync_running) { + likedEl.textContent = "Running..."; + likedEl.classList.add("running"); + } else { + likedEl.classList.remove("running"); + likedEl.textContent = formatTimestamp(spotifyStatus.last_liked_sync) || "-"; + } + } + const savedEl = $("#spotify-status-saved"); + if (savedEl) { + if (spotifyStatus.saved_sync_running) { + savedEl.textContent = "Running..."; + savedEl.classList.add("running"); + } else { + savedEl.classList.remove("running"); + savedEl.textContent = formatTimestamp(spotifyStatus.last_saved_sync) || "-"; + } + } + const playlistsEl = $("#spotify-status-playlists"); + if (playlistsEl) { + if (spotifyStatus.playlists_sync_running) { + playlistsEl.textContent = "Running..."; + playlistsEl.classList.add("running"); + } else { + playlistsEl.classList.remove("running"); + playlistsEl.textContent = formatTimestamp(spotifyStatus.last_playlists_sync) || "-"; + } + } + } catch (err) { + // Best-effort status enrichment; ignore when endpoint is unavailable. + } } catch (err) { - setNotice($("#run-message"), `Status error: ${err.message}`, true); + setNotice($("#home-search-message"), `Status error: ${err.message}`, true); } } @@ -1567,6 +1618,59 @@ function parseHomeSearchQuery(value, preferAlbum) { }; } +function homeMusicDebugEnabled() { + try { + return localStorage.getItem(HOME_MUSIC_DEBUG_KEY) === "1"; + } catch (_err) { + return false; + } +} + +function homeMusicDebugLog(...args) { + if (!homeMusicDebugEnabled()) { + return; + } + console.debug(...args); +} + +function ensureHomeMusicModeBadge() { + let badge = $("#home-music-mode-badge"); + if (badge) { + return badge; + } + const headerActions = document.querySelector(".home-results-header-actions"); + if (!headerActions) { + return null; + } + badge = document.createElement("span"); + badge.id = "home-music-mode-badge"; + badge.className = "chip idle hidden"; + badge.textContent = "Music Mode"; + headerActions.appendChild(badge); + return badge; +} + +function updateHomeMusicModeUI() { + const toggle = $("#home-music-mode"); + if (toggle) { + toggle.checked = !!state.homeMusicMode; + } + const badge = ensureHomeMusicModeBadge(); + if (badge) { + badge.classList.toggle("hidden", !state.homeMusicMode); + } +} + +function loadHomeMusicModePreference() { + const raw = localStorage.getItem(HOME_MUSIC_MODE_KEY); + state.homeMusicMode = raw === "true"; + updateHomeMusicModeUI(); +} + +function saveHomeMusicModePreference() { + localStorage.setItem(HOME_MUSIC_MODE_KEY, state.homeMusicMode ? "true" : "false"); +} + function buildHomeSearchPayload(autoEnqueue, rawQuery = "") { const preferAlbum = $("#home-prefer-albums")?.checked; const parsed = parseHomeSearchQuery($("#home-search-input")?.value, preferAlbum); @@ -1575,7 +1679,7 @@ function buildHomeSearchPayload(autoEnqueue, rawQuery = "") { } const minScoreRaw = parseFloat($("#home-min-score")?.value); const destination = $("#home-destination")?.value.trim(); - const treatAsMusic = $("#home-music-mode")?.checked ?? $("#home-treat-music")?.checked ?? false; + const treatAsMusic = !!state.homeMusicMode; const formatOverride = $("#home-format")?.value.trim(); const deliveryMode = ($("#home-delivery-mode")?.value || "server").toLowerCase(); const rawText = rawQuery || $("#home-search-input")?.value || ""; @@ -1719,6 +1823,260 @@ function setHomeResultsDetail(text, isError = false) { detailEl.classList.remove("hidden"); } +function clearHomeAlbumCandidates() { + const existing = document.getElementById("home-album-candidates"); + if (existing) { + existing.remove(); + } +} + +function normalizeMusicAlbumCandidates(rawCandidates) { + if (!Array.isArray(rawCandidates)) { + return []; + } + return rawCandidates + .map((item) => { + const releaseGroupId = item?.release_group_id || item?.album_id || null; + if (!releaseGroupId) { + return null; + } + return { + release_group_id: releaseGroupId, + title: item?.title || "", + artist_credit: item?.artist_credit || item?.artist || "", + first_release_date: item?.first_release_date || item?.first_released || "", + primary_type: item?.primary_type || "Album", + secondary_types: Array.isArray(item?.secondary_types) ? item.secondary_types : [], + score: Number.isFinite(Number(item?.score)) ? Number(item.score) : null, + track_count: Number.isFinite(Number(item?.track_count)) ? Number(item.track_count) : null, + }; + }) + .filter(Boolean); +} + +function uniqueMusicAlbumCandidates(candidates) { + const seen = new Set(); + return candidates.filter((candidate) => { + const key = String(candidate?.release_group_id || "").trim(); + if (!key || seen.has(key)) { + return false; + } + seen.add(key); + return true; + }); +} + +function renderHomeAlbumCandidates(candidates, query = "") { + clearHomeAlbumCandidates(); + const homeResults = document.getElementById("home-results"); + const header = homeResults?.querySelector(".home-results-header"); + if (!homeResults || !header) { + return; + } + + const showPanel = !!state.homeMusicMode && !!String(query || "").trim(); + if (!showPanel) { + return; + } + + const normalized = uniqueMusicAlbumCandidates(normalizeMusicAlbumCandidates(candidates)); + const container = document.createElement("div"); + container.id = "home-album-candidates"; + container.className = "stack"; + const panelHeader = document.createElement("div"); + panelHeader.className = "row"; + const panelTitle = document.createElement("div"); + panelTitle.className = "group-title"; + panelTitle.textContent = "Albums (MusicBrainz)"; + panelHeader.appendChild(panelTitle); + container.appendChild(panelHeader); + + if (!normalized.length) { + const empty = document.createElement("div"); + empty.className = "meta"; + empty.textContent = "No album matches found"; + container.appendChild(empty); + header.insertAdjacentElement("afterend", container); + return; + } + + normalized.forEach((candidate) => { + const card = document.createElement("div"); + card.className = "home-result-card album-card"; + + const cover = document.createElement("img"); + cover.className = "album-cover"; + cover.alt = candidate.title ? `${candidate.title} cover` : "Album cover"; + cover.loading = "lazy"; + cover.style.width = "64px"; + cover.style.height = "64px"; + cover.style.objectFit = "cover"; + cover.style.borderRadius = "8px"; + cover.style.display = "none"; + cover.style.flexShrink = "0"; + card.appendChild(cover); + + const body = document.createElement("div"); + body.className = "stack"; + body.style.flex = "1"; + + const title = document.createElement("span"); + title.className = "album-title home-candidate-title"; + title.textContent = candidate.title || ""; + body.appendChild(title); + + const artist = document.createElement("span"); + artist.className = "album-artist meta"; + artist.textContent = candidate.artist_credit || ""; + body.appendChild(artist); + + const date = document.createElement("span"); + date.className = "album-date meta"; + date.textContent = candidate.first_release_date || ""; + body.appendChild(date); + + const badges = document.createElement("div"); + badges.className = "row"; + const primary = document.createElement("span"); + primary.className = "chip idle"; + primary.textContent = candidate.primary_type || "Album"; + badges.appendChild(primary); + (candidate.secondary_types || []).forEach((type) => { + const secondary = document.createElement("span"); + secondary.className = "chip idle"; + secondary.textContent = String(type); + badges.appendChild(secondary); + }); + if (candidate.score !== null) { + const score = document.createElement("span"); + score.className = "meta"; + score.textContent = `Score ${candidate.score}`; + badges.appendChild(score); + } + body.appendChild(badges); + card.appendChild(body); + + const button = document.createElement("button"); + button.className = "button primary small album-download-btn"; + button.dataset.releaseGroupId = candidate.release_group_id || ""; + button.dataset.albumTitle = candidate.title || ""; + const alreadyQueued = state.homeQueuedAlbumReleaseGroups.has(candidate.release_group_id || ""); + button.textContent = alreadyQueued ? "Queued..." : "Download Album"; + button.disabled = alreadyQueued; + card.appendChild(button); + + container.appendChild(card); + }); + container.addEventListener("click", async (event) => { + const button = event.target.closest(".album-download-btn"); + if (!button) { + return; + } + const releaseGroupId = button.dataset.releaseGroupId; + if (!releaseGroupId) { + return; + } + if (state.homeQueuedAlbumReleaseGroups.has(releaseGroupId)) { + button.disabled = true; + button.textContent = "Queued..."; + return; + } + const originalLabel = button.textContent; + button.disabled = true; + try { + const payload = { + release_group_id: releaseGroupId, + destination: $("#home-destination")?.value.trim() || null, + final_format: $("#home-format")?.value.trim() || null, + music_mode: true, + }; + homeMusicDebugLog("[MUSIC UI] queue album", payload); + const result = await fetchJson("/api/music/album/download", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }); + state.homeQueuedAlbumReleaseGroups.add(releaseGroupId); + container.querySelectorAll(`.album-download-btn[data-release-group-id="${CSS.escape(releaseGroupId)}"]`) + .forEach((dupButton) => { + dupButton.disabled = true; + dupButton.textContent = "Queued..."; + }); + button.textContent = "Queued..."; + const count = Number.isFinite(Number(result?.tracks_enqueued)) + ? Number(result.tracks_enqueued) + : 0; + setNotice( + $("#home-search-message"), + `Queued album: ${button.dataset.albumTitle || "Album"} — ${count} tracks`, + false + ); + } catch (err) { + button.disabled = false; + button.textContent = originalLabel; + setNotice($("#home-search-message"), `Album queue failed: ${err.message}`, true); + } + }); + header.insertAdjacentElement("afterend", container); + const cards = Array.from(container.querySelectorAll(".album-card")); + normalized.forEach((candidate, index) => { + if (!candidate?.release_group_id) { + return; + } + const card = cards[index]; + if (!card) { + return; + } + const cover = card.querySelector(".album-cover"); + if (!cover) { + return; + } + setTimeout(async () => { + const coverUrl = await fetchHomeAlbumCoverUrl(candidate.release_group_id); + if (coverUrl) { + cover.src = coverUrl; + cover.style.display = "block"; + } + }, index * 150); + }); +} + +async function loadAndRenderHomeAlbumCandidates(query, preloadedCandidates = null) { + const normalized = (query || "").trim(); + if (!normalized) { + clearHomeAlbumCandidates(); + return; + } + let candidates = normalizeMusicAlbumCandidates(preloadedCandidates); + if (!candidates.length) { + const data = await fetchJson( + `/api/music/albums/search?q=${encodeURIComponent(normalized)}&limit=10` + ); + candidates = normalizeMusicAlbumCandidates(Array.isArray(data) ? data : data?.album_candidates); + } + homeMusicDebugLog("[MUSIC UI] album candidates", { query: normalized, count: candidates.length }); + renderHomeAlbumCandidates(candidates, normalized); +} + +async function fetchHomeAlbumCoverUrl(albumId) { + const key = String(albumId || "").trim(); + if (!key) { + return null; + } + if (Object.prototype.hasOwnProperty.call(state.homeAlbumCoverCache, key)) { + return state.homeAlbumCoverCache[key]; + } + try { + const data = await fetchJson(`/api/music/album/art/${encodeURIComponent(key)}`); + const url = typeof data?.cover_url === "string" && data.cover_url ? data.cover_url : null; + state.homeAlbumCoverCache[key] = url; + return url; + } catch (_err) { + state.homeAlbumCoverCache[key] = null; + return null; + } +} + function buildHomeResultsStatusInfo(requestId) { const context = state.homeRequestContext[requestId]; if (!context) { @@ -2028,6 +2386,12 @@ function renderHomeResultItem(item) { title.innerHTML = `${summary}`; header.appendChild(title); header.appendChild(renderHomeStatusBadge(item.status)); + if (item.media_type === "music") { + const sourceTag = document.createElement("span"); + sourceTag.className = "home-candidate-source-tag"; + sourceTag.textContent = "Spotify Metadata"; + header.appendChild(sourceTag); + } card.appendChild(header); // Remove destination line for Home page result cards (visual polish) // No destination line @@ -2369,6 +2733,281 @@ function renderHomeDirectUrlCard(preview, status) { return card; } +function formatDetectedIntentLabel(intentType) { + const mapping = { + spotify_album: "Album", + spotify_playlist: "Playlist", + spotify_track: "Track", + spotify_artist: "Artist", + youtube_playlist: "Playlist", + }; + return mapping[intentType] || intentType || "Unknown"; +} + +function isSpotifyPreviewIntent(intentType) { + return intentType === "spotify_album" || intentType === "spotify_playlist"; +} + +function normalize_spotify_playlist_identifier(value) { + if (!value) { + return ""; + } + const raw = String(value).trim(); + if (!raw) { + return ""; + } + try { + const parsed = new URL(raw); + if (parsed.hostname.includes("open.spotify.com") && parsed.pathname.includes("/playlist/")) { + return parsed.pathname.split("/playlist/").pop().split("?")[0]; + } + } catch (err) { + // ignore and continue + } + if (raw.startsWith("spotify:playlist:")) { + return raw.split(":").pop(); + } + return raw; +} + +function detectSpotifyUrlIntent(raw) { + const value = (raw || "").trim(); + if (!value) { + return null; + } + try { + const parsed = new URL(value); + const host = (parsed.hostname || "").toLowerCase(); + if (host !== "open.spotify.com" && host !== "spotify.com" && host !== "www.spotify.com") { + return null; + } + const segments = parsed.pathname.split("/").filter(Boolean); + if (segments.length < 2) { + return null; + } + const kind = String(segments[0] || "").toLowerCase(); + const identifier = String(segments[1] || "").trim(); + if (!identifier) { + return null; + } + const mapping = { + album: "spotify_album", + playlist: "spotify_playlist", + track: "spotify_track", + artist: "spotify_artist", + }; + const intentType = mapping[kind] || null; + if (!intentType) { + return null; + } + return { intentType, identifier }; + } catch (err) { + return null; + } +} + +function detect_intent(rawInput) { + const spotifyIntent = detectSpotifyUrlIntent(rawInput); + if (spotifyIntent) { + return { + type: spotifyIntent.intentType, + identifier: spotifyIntent.identifier, + }; + } + return { + type: "search", + identifier: (rawInput || "").trim(), + }; +} + +async function fetchIntentPreview(intentType, identifier) { + return fetchJson("/api/intent/preview", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + intent_type: intentType, + identifier, + }), + }); +} + +async function runSpotifyIntentFlow(spotifyIntent, messageEl) { + const intentType = spotifyIntent?.intentType || ""; + const identifier = spotifyIntent?.identifier || ""; + if (!intentType || !identifier) { + return; + } + + state.homeSearchRequestId = null; + updateHomeViewAdvancedLink(); + stopHomeResultPolling(); + setHomeSearchActive(false); + setHomeSearchControlsEnabled(true); + showHomeResults(true); + setHomeResultsState({ hasResults: true, terminal: true }); + setHomeResultsStatus("Intent detected"); + setHomeResultsDetail("Preparing intent preview...", false); + + const list = $("#home-results-list"); + if (!list) { + return; + } + list.textContent = ""; + + const needsPreview = isSpotifyPreviewIntent(intentType); + list.appendChild( + renderHomeIntentCard(intentType, identifier, { + loading: needsPreview, + canConfirm: !needsPreview, + }) + ); + + if (needsPreview) { + try { + const preview = await fetchIntentPreview(intentType, identifier); + list.textContent = ""; + list.appendChild( + renderHomeIntentCard(intentType, identifier, { + preview, + canConfirm: true, + }) + ); + setHomeResultsStatus("Intent preview ready"); + setHomeResultsDetail("Review metadata and confirm to continue.", false); + setNotice(messageEl, "Intent metadata loaded.", false); + } catch (previewErr) { + list.textContent = ""; + list.appendChild( + renderHomeIntentCard(intentType, identifier, { + error: previewErr.message || "Failed to fetch metadata", + canConfirm: false, + }) + ); + setHomeResultsStatus("Intent preview failed"); + setHomeResultsDetail("Could not fetch Spotify metadata. Please retry.", true); + setNotice(messageEl, `Intent preview failed: ${previewErr.message}`, true); + setHomeSearchControlsEnabled(true); + } + } else { + setHomeResultsDetail("Confirm to proceed or cancel to return to search.", false); + setNotice(messageEl, "Intent detected. Confirm to continue.", false); + } +} + +function renderHomeIntentCard(intentType, identifier, options = {}) { + const { + loading = false, + error = "", + preview = null, + canConfirm = false, + } = options; + const card = document.createElement("article"); + card.className = "home-result-card"; + card.dataset.intentType = intentType || ""; + card.dataset.intentIdentifier = identifier || ""; + + const header = document.createElement("div"); + header.className = "home-result-header"; + const title = document.createElement("div"); + const strong = document.createElement("strong"); + strong.textContent = `Detected: ${formatDetectedIntentLabel(intentType)}`; + title.appendChild(strong); + header.appendChild(title); + header.appendChild(renderHomeStatusBadge("candidate_found")); + card.appendChild(header); + + if (loading) { + const loadingEl = document.createElement("div"); + loadingEl.className = "home-candidate-title"; + loadingEl.textContent = "Fetching Spotify metadata…"; + card.appendChild(loadingEl); + } else if (error) { + const errorEl = document.createElement("div"); + errorEl.className = "home-candidate-title"; + errorEl.textContent = `Preview failed: ${error}`; + card.appendChild(errorEl); + } else if (preview) { + const titleEl = document.createElement("div"); + titleEl.className = "home-candidate-title"; + titleEl.textContent = `Title: ${preview.title || "-"}`; + card.appendChild(titleEl); + + const artistEl = document.createElement("div"); + artistEl.className = "home-candidate-meta"; + artistEl.textContent = `Artist: ${preview.artist || "-"}`; + card.appendChild(artistEl); + + const countEl = document.createElement("div"); + countEl.className = "home-candidate-meta"; + countEl.textContent = `Track count: ${Number.isFinite(preview.track_count) ? preview.track_count : "-"}`; + card.appendChild(countEl); + } else { + const detail = document.createElement("div"); + detail.className = "home-candidate-title"; + detail.textContent = `Identifier: ${identifier || "-"}`; + card.appendChild(detail); + } + + const actions = document.createElement("div"); + actions.className = "row"; + if (canConfirm) { + const confirmButton = document.createElement("button"); + confirmButton.className = "button"; + confirmButton.dataset.action = "home-intent-confirm"; + confirmButton.dataset.intentType = intentType || ""; + confirmButton.dataset.identifier = identifier || ""; + if (intentType === "spotify_album") { + confirmButton.textContent = "Download Album"; + } else if (intentType === "spotify_playlist") { + confirmButton.textContent = "Download Playlist"; + } else if (intentType === "spotify_track") { + confirmButton.textContent = "Download Track"; + } else { + confirmButton.textContent = "Confirm Download"; + } + actions.appendChild(confirmButton); + } + + const cancelButton = document.createElement("button"); + cancelButton.className = "button ghost"; + cancelButton.dataset.action = "home-intent-cancel"; + cancelButton.textContent = "Cancel"; + actions.appendChild(cancelButton); + card.appendChild(actions); + return card; +} + +function resetHomeIntentConfirmation() { + state.homeSearchRequestId = null; + updateHomeViewAdvancedLink(); + stopHomeResultPolling(); + stopHomeJobPolling(); + setHomeSearchControlsEnabled(true); + setHomeSearchActive(false); + setHomeResultsState({ hasResults: false, terminal: false }); + showHomeResults(false); + const list = $("#home-results-list"); + if (list) { + list.textContent = ""; + } + setHomeResultsStatus("Ready to discover media"); + setHomeResultsDetail( + "Search Only is the default discovery action; use Search & Download to enqueue jobs.", + false + ); +} + +async function executeDetectedIntent(intentType, identifier) { + return fetchJson("/api/intent/execute", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + intent_type: intentType, + identifier, + }), + }); +} + function showHomeDirectUrlError(url, message, messageEl) { const text = message || DIRECT_URL_PLAYLIST_ERROR; if (messageEl) { @@ -2403,6 +3042,7 @@ function showHomeDirectUrlError(url, message, messageEl) { container.appendChild(renderHomeDirectUrlCard(state.homeDirectPreview, "failed")); } setHomeResultsState({ hasResults: true, terminal: true }); + setHomeSearchControlsEnabled(true); } function showHomeDirectUrlPreview(preview) { @@ -2541,12 +3181,16 @@ async function refreshHomeResults(requestId) { const container = $("#home-results-list"); if (!container) return null; try { + const previousContext = state.homeRequestContext[requestId] || {}; const data = await fetchJson(`/api/search/requests/${encodeURIComponent(requestId)}`); const requestStatus = data.request?.status || "queued"; + const requestMediaType = data.request?.media_type || ""; const items = data.items || []; state.homeRequestContext[requestId] = { request: data.request || {}, items, + musicMode: previousContext.musicMode || false, + musicCandidates: previousContext.musicCandidates || [], }; updateHomeResultsStatusForRequest(requestId); const existingCards = new Map(); @@ -2572,6 +3216,14 @@ async function refreshHomeResults(requestId) { setHomeSearchActive(false); stopHomeResultPolling(); startHomeJobPolling(requestId); + if (requestMediaType === "music" && state.homeAlbumCandidatesRequestId !== requestId) { + state.homeAlbumCandidatesRequestId = requestId; + const query = $("#home-search-input")?.value || ""; + const preloaded = state.homeRequestContext[requestId]?.musicCandidates || []; + await loadAndRenderHomeAlbumCandidates(query, preloaded); + } else if (requestMediaType !== "music") { + clearHomeAlbumCandidates(); + } } return requestStatus; } @@ -2604,6 +3256,14 @@ async function refreshHomeResults(requestId) { setHomeSearchActive(false); stopHomeResultPolling(); startHomeJobPolling(requestId); + if (requestMediaType === "music" && state.homeAlbumCandidatesRequestId !== requestId) { + state.homeAlbumCandidatesRequestId = requestId; + const query = $("#home-search-input")?.value || ""; + const preloaded = state.homeRequestContext[requestId]?.musicCandidates || []; + await loadAndRenderHomeAlbumCandidates(query, preloaded); + } else if (requestMediaType !== "music") { + clearHomeAlbumCandidates(); + } } Object.keys(state.homeCandidateCache).forEach((key) => { if (!currentIds.has(key)) { @@ -2707,6 +3367,26 @@ async function submitHomeSearch(autoEnqueue) { state.homeDirectPreview = null; stopHomeJobPolling(); state.homeCandidateData = {}; + state.homeAlbumCandidatesRequestId = null; + clearHomeAlbumCandidates(); + + const intent = detect_intent(inputValue); + if (intent.type !== "search") { + try { + await runSpotifyIntentFlow( + { + intentType: intent.type, + identifier: intent.identifier, + }, + messageEl + ); + } catch (spotifyIntentErr) { + setNotice(messageEl, `Intent preview failed: ${spotifyIntentErr.message}`, true); + setHomeSearchControlsEnabled(true); + } + return; + } + if (deliveryMode === "client" && destinationValue) { setNotice(messageEl, "Client delivery does not use a server destination.", true); setHomeSearchControlsEnabled(true); @@ -2741,7 +3421,32 @@ async function submitHomeSearch(autoEnqueue) { headers: { "Content-Type": "application/json" }, body: JSON.stringify(payload), }); + const responseMusicMode = !!data?.music_mode; + const responseMusicCandidates = normalizeMusicAlbumCandidates(data?.music_candidates || []); + state.homeRequestContext.pending = { + musicMode: responseMusicMode, + musicCandidates: responseMusicCandidates, + }; + if (state.homeMusicMode && inputValue) { + await loadAndRenderHomeAlbumCandidates(inputValue, responseMusicCandidates); + } + if (data && data.detected_intent) { + await runSpotifyIntentFlow( + { + intentType: data.detected_intent, + identifier: data.identifier || "", + }, + messageEl + ); + return; + } state.homeRequestContext = {}; + state.homeRequestContext[data.request_id] = { + request: {}, + items: [], + musicMode: responseMusicMode, + musicCandidates: responseMusicCandidates, + }; state.homeBestScores = {}; state.homeCandidateCache = {}; state.homeCandidatesLoading = {}; @@ -2764,7 +3469,7 @@ async function handleHomeDirectUrl(url, destination, messageEl) { if (!messageEl) return; setHomeSearchActive(true); const formatOverride = $("#home-format")?.value.trim(); - const treatAsMusic = $("#home-music-mode")?.checked ?? $("#home-treat-music")?.checked ?? false; + const treatAsMusic = !!state.homeMusicMode; const deliveryMode = ($("#home-delivery-mode")?.value || "server").toLowerCase(); const playlistId = extractPlaylistIdFromUrl(url); if (playlistId) { @@ -3269,9 +3974,9 @@ function addPlaylistRow(entry = {}) {