Greensand321 · Greensand321 · Dec 13, 2025 · Dec 13, 2025 · Dec 13, 2025 · Dec 13, 2025
diff --git a/clustered_playlists.py b/clustered_playlists.py
@@ -150,4 +150,4 @@ def generate_clustered_playlists(
             log_callback(f"\u2717 Failed to write {outfile}: {e}")
 
     log_callback("✓ Clustered playlist generation finished")
-    return feats
+    return X
diff --git a/controllers/genre_playlist_controller.py b/controllers/genre_playlist_controller.py
@@ -0,0 +1,111 @@
+"""Helpers for generating playlists grouped by genre."""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import Callable, Dict, Iterable, List, Mapping
+
+from mutagen import File as MutagenFile
+
+from controllers.normalize_controller import normalize_genres
+from playlist_generator import write_playlist
+
+GenreGroups = Dict[str, List[str]]
+
+_SPLIT_RE = re.compile(r"[;,/|]")
+
+
+def _safe_name(text: str) -> str:
+    cleaned = re.sub(r"[^\w\- ]+", "_", text).strip(" _")
+    return cleaned or "Unknown"
+
+
+def read_genres(path: str, split_multi: bool = True) -> list[str]:
+    """Return list of genres found in ``path``.
+
+    Parameters
+    ----------
+    path:
+        Audio file path.
+    split_multi:
+        When True, split combined genre strings on common separators.
+    """
+
+    try:
+        audio = MutagenFile(path, easy=True)
+    except Exception:
+        return []
+
+    if not audio or not audio.tags:
+        return []
+
+    genres = audio.tags.get("genre", []) or []
+    results: list[str] = []
+    for raw in genres:
+        parts = _SPLIT_RE.split(raw) if split_multi else [raw]
+        for part in parts:
+            part = part.strip()
+            if part:
+                results.append(part)
+    return results
+
+
+def group_tracks_by_genre(
+    tracks: Iterable[str],
+    mapping: Mapping[str, str] | None = None,
+    include_unknown: bool = False,
+    split_multi: bool = True,
+    log_callback: Callable[[str], None] | None = None,
+) -> GenreGroups:
+    """Group ``tracks`` into playlists keyed by genre."""
+
+    mapping = mapping or {}
+    log = log_callback or (lambda _m: None)
+    grouped: GenreGroups = {}
+
+    for track in tracks:
+        genres = read_genres(track, split_multi=split_multi)
+        if mapping:
+            genres = normalize_genres(genres, mapping)
+
+        if not genres:
+            if include_unknown:
+                grouped.setdefault("Unknown", []).append(track)
+            log(f"! No genre tag for {track}")
+            continue
+
+        for genre in genres:
+            grouped.setdefault(genre, []).append(track)
+            log(f"• {os.path.basename(track)} → {genre}")
+
+    return grouped
+
+
+def write_genre_playlists(
+    grouped: Mapping[str, List[str]],
+    playlists_dir: str,
+    log_callback: Callable[[str], None] | None = None,
+) -> Dict[str, str]:
+    """Write one playlist per genre and return mapping of genre->path."""
+
+    os.makedirs(playlists_dir, exist_ok=True)
+    log = log_callback or (lambda _m: None)
+
+    used_names: set[str] = set()
+    out_paths: Dict[str, str] = {}
+    for genre in sorted(grouped.keys(), key=str.lower):
+        base = _safe_name(genre)
+        name = base
+        suffix = 2
+        while name in used_names:
+            name = f"{base}_{suffix}"
+            suffix += 1
+        used_names.add(name)
+
+        outfile = os.path.join(playlists_dir, f"{name}.m3u")
+        write_playlist(grouped[genre], outfile)
+        out_paths[genre] = outfile
+        log(f"→ Wrote {outfile}")
+
+    return out_paths
diff --git a/docs/hdbscan_task.md b/docs/hdbscan_task.md
@@ -0,0 +1,27 @@
+# Task: Fix HDBSCAN visualization & parameter handling in Clustered Playlists
+
+## Problem
+The interactive HDBSCAN view in the Playlist Creator tab often renders a single-color scatter plot because clustering is recomputed on raw, unscaled feature vectors (`np.vstack(self.features)`) while offline generation scales features with `StandardScaler`. Without normalization or dimensionality reduction, HDBSCAN frequently labels all points as noise, leading to the uninformative visualization.
+
+## Scope
+- `cluster_graph_panel.py`: interactive reclustering and parameter dialog.
+- `main_gui.py`: plugin wiring for interactive HDBSCAN/KMeans panels.
+- `clustered_playlists.py`: feature extraction and storage feeding the interactive view.
+
+## Goals
+1. Ensure the interactive HDBSCAN workflow uses the same preprocessing as offline generation (feature scaling and optional dimensionality reduction) so clusters can form when density exists.
+2. Let users adjust key HDBSCAN parameters before running the initial interactive plot, not only after clicking “Redo Values.”
+3. Keep K-Means behavior unchanged while preventing shared-state regressions between the two interactive modes.
+4. Make the visualization clearly differentiate noise points versus clustered points after preprocessing fixes.
+
+## Proposed Approach
+- Pass scaled (and optionally reduced) features into the interactive panel, reusing the standardized matrix already produced for playlist generation instead of the raw feature list.
+- Centralize HDBSCAN parameter defaults so both the initial render and the edit dialog share the same values, and surface the dialog before the first run (or persist last-used values from `cluster_params`).
+- Update recoloring logic if needed to keep noise (`label == -1`) visibly distinct after preprocessing changes.
+- Add defensive logging to confirm when HDBSCAN returns only noise versus multiple clusters to aid troubleshooting.
+
+## Acceptance Criteria
+- Interactive HDBSCAN plots use normalized feature data and, when clusters exist, show multiple colors; an all-noise result is logged explicitly rather than silently appearing unclustered.
+- Users can configure `min_cluster_size`, `min_samples`, and `cluster_selection_epsilon` prior to the first interactive HDBSCAN run, with sensible defaults and validation.
+- K-Means interactive behavior remains the same, and shared UI elements (buttons, dialogs) still function for both clustering modes.
+- Manual test instructions added to the issue or commit notes describing how to verify the corrected visualization and parameter entry.