Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clustered_playlists.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,4 @@ def generate_clustered_playlists(
log_callback(f"\u2717 Failed to write {outfile}: {e}")

log_callback("✓ Clustered playlist generation finished")
return feats
return X
111 changes: 111 additions & 0 deletions controllers/genre_playlist_controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
"""Helpers for generating playlists grouped by genre."""

from __future__ import annotations

import os
import re
from typing import Callable, Dict, Iterable, List, Mapping

from mutagen import File as MutagenFile

from controllers.normalize_controller import normalize_genres
from playlist_generator import write_playlist

GenreGroups = Dict[str, List[str]]

_SPLIT_RE = re.compile(r"[;,/|]")


def _safe_name(text: str) -> str:
cleaned = re.sub(r"[^\w\- ]+", "_", text).strip(" _")
return cleaned or "Unknown"


def read_genres(path: str, split_multi: bool = True) -> list[str]:
"""Return list of genres found in ``path``.

Parameters
----------
path:
Audio file path.
split_multi:
When True, split combined genre strings on common separators.
"""

try:
audio = MutagenFile(path, easy=True)
except Exception:
return []

if not audio or not audio.tags:
return []

genres = audio.tags.get("genre", []) or []
results: list[str] = []
for raw in genres:
parts = _SPLIT_RE.split(raw) if split_multi else [raw]
for part in parts:
part = part.strip()
if part:
results.append(part)
return results


def group_tracks_by_genre(
tracks: Iterable[str],
mapping: Mapping[str, str] | None = None,
include_unknown: bool = False,
split_multi: bool = True,
log_callback: Callable[[str], None] | None = None,
) -> GenreGroups:
"""Group ``tracks`` into playlists keyed by genre."""

mapping = mapping or {}
log = log_callback or (lambda _m: None)
grouped: GenreGroups = {}

for track in tracks:
genres = read_genres(track, split_multi=split_multi)
if mapping:
genres = normalize_genres(genres, mapping)

if not genres:
if include_unknown:
grouped.setdefault("Unknown", []).append(track)
log(f"! No genre tag for {track}")
continue

for genre in genres:
grouped.setdefault(genre, []).append(track)
log(f"• {os.path.basename(track)} → {genre}")

return grouped


def write_genre_playlists(
grouped: Mapping[str, List[str]],
playlists_dir: str,
log_callback: Callable[[str], None] | None = None,
) -> Dict[str, str]:
"""Write one playlist per genre and return mapping of genre->path."""

os.makedirs(playlists_dir, exist_ok=True)
log = log_callback or (lambda _m: None)

used_names: set[str] = set()
out_paths: Dict[str, str] = {}
for genre in sorted(grouped.keys(), key=str.lower):
base = _safe_name(genre)
name = base
suffix = 2
while name in used_names:
name = f"{base}_{suffix}"
suffix += 1
used_names.add(name)

outfile = os.path.join(playlists_dir, f"{name}.m3u")
write_playlist(grouped[genre], outfile)
out_paths[genre] = outfile
log(f"→ Wrote {outfile}")

return out_paths
27 changes: 27 additions & 0 deletions docs/hdbscan_task.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Task: Fix HDBSCAN visualization & parameter handling in Clustered Playlists

## Problem
The interactive HDBSCAN view in the Playlist Creator tab often renders a single-color scatter plot because clustering is recomputed on raw, unscaled feature vectors (`np.vstack(self.features)`) while offline generation scales features with `StandardScaler`. Without normalization or dimensionality reduction, HDBSCAN frequently labels all points as noise, leading to the uninformative visualization.

## Scope
- `cluster_graph_panel.py`: interactive reclustering and parameter dialog.
- `main_gui.py`: plugin wiring for interactive HDBSCAN/KMeans panels.
- `clustered_playlists.py`: feature extraction and storage feeding the interactive view.

## Goals
1. Ensure the interactive HDBSCAN workflow uses the same preprocessing as offline generation (feature scaling and optional dimensionality reduction) so clusters can form when density exists.
2. Let users adjust key HDBSCAN parameters before running the initial interactive plot, not only after clicking “Redo Values.”
3. Keep K-Means behavior unchanged while preventing shared-state regressions between the two interactive modes.
4. Make the visualization clearly differentiate noise points versus clustered points after preprocessing fixes.

## Proposed Approach
- Pass scaled (and optionally reduced) features into the interactive panel, reusing the standardized matrix already produced for playlist generation instead of the raw feature list.
- Centralize HDBSCAN parameter defaults so both the initial render and the edit dialog share the same values, and surface the dialog before the first run (or persist last-used values from `cluster_params`).
- Update recoloring logic if needed to keep noise (`label == -1`) visibly distinct after preprocessing changes.
- Add defensive logging to confirm when HDBSCAN returns only noise versus multiple clusters to aid troubleshooting.

## Acceptance Criteria
- Interactive HDBSCAN plots use normalized feature data and, when clusters exist, show multiple colors; an all-noise result is logged explicitly rather than silently appearing unclustered.
- Users can configure `min_cluster_size`, `min_samples`, and `cluster_selection_epsilon` prior to the first interactive HDBSCAN run, with sensible defaults and validation.
- K-Means interactive behavior remains the same, and shared UI elements (buttons, dialogs) still function for both clustering modes.
- Manual test instructions added to the issue or commit notes describing how to verify the corrected visualization and parameter entry.
Loading