From b5c4864c06e951ecc168bc4222f909585395ddec Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 10:31:12 -0500 Subject: [PATCH 01/18] Flush duplicate finder fingerprint cache after scan --- main_gui.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main_gui.py b/main_gui.py index 639a75e..c5e7e00 100644 --- a/main_gui.py +++ b/main_gui.py @@ -90,6 +90,7 @@ get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, + flush_fingerprint_writes, ) from simple_duplicate_finder import SUPPORTED_EXTS, _compute_fp from tag_fixer import MIN_INTERACTIVE_SCORE, FileRecord @@ -3039,6 +3040,8 @@ def _track_payload( ): failure_count += 1 + flush_fingerprint_writes(db_path) + tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( From 5f9629ef4cb4a7eb92be329cf46ec70059ef3608 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 10:34:52 -0500 Subject: [PATCH 02/18] Revert "Flush duplicate finder fingerprint cache after scan" --- main_gui.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/main_gui.py b/main_gui.py index c5e7e00..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -90,7 +90,6 @@ get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, - flush_fingerprint_writes, ) from simple_duplicate_finder import SUPPORTED_EXTS, _compute_fp from tag_fixer import MIN_INTERACTIVE_SCORE, FileRecord @@ -3040,8 +3039,6 @@ def _track_payload( ): failure_count += 1 - flush_fingerprint_writes(db_path) - tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( From e32ef9e1490479e7a94804100927edf15086efab Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 10:52:29 -0500 Subject: [PATCH 03/18] Avoid recompute on duplicate scan refresh --- main_gui.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/main_gui.py b/main_gui.py index 639a75e..fe3a7ad 100644 --- a/main_gui.py +++ b/main_gui.py @@ -90,6 +90,7 @@ get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, + flush_fingerprint_writes, ) from simple_duplicate_finder import SUPPORTED_EXTS, _compute_fp from tag_fixer import MIN_INTERACTIVE_SCORE, FileRecord @@ -2776,13 +2777,14 @@ def _gather_tracks( self, library_root: str, *, + allow_compute: bool = True, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, idle_callback: Callable[[], None] | None = None, - ) -> tuple[list[dict[str, object]], int, int]: + ) -> tuple[list[dict[str, object]], int, int, int]: if not library_root: - return [], 0, 0 + return [], 0, 0, 0 docs_dir = os.path.join(library_root, "Docs") os.makedirs(docs_dir, exist_ok=True) db_path = os.path.join(docs_dir, ".duplicate_fingerprints.db") @@ -2807,7 +2809,7 @@ def _gather_tracks( fingerprint_status_callback("No eligible audio files found.") if status_callback: status_callback("Idle", progress=0) - return tracks, 0, 0 + return tracks, 0, 0, 0 if fingerprint_status_callback: fingerprint_status_callback(f"Fingerprinting 0/{total}") if status_callback: @@ -2953,6 +2955,9 @@ def _track_payload( if source in {"stat_error", "cache_error"}: failure_count += 1 completed += 1 + elif not allow_compute: + missing_count += 1 + completed += 1 else: pending_paths.append(path) now = time.monotonic() @@ -2982,7 +2987,7 @@ def _track_payload( ) max_workers = min(8, os.cpu_count() or 4) - if pending_paths: + if pending_paths and allow_compute: if log_callback: log_callback(f"Computing {len(pending_paths)} missing fingerprints…") with ThreadPoolExecutor(max_workers=max_workers) as executor: @@ -3039,6 +3044,8 @@ def _track_payload( ): failure_count += 1 + flush_fingerprint_writes(db_path) + tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3046,7 +3053,7 @@ def _track_payload( f"{cached_count} cached, {computed_count} computed, " f"{missing_count} missing, {failure_count} failures." ) - return tracks, missing_count, failure_count + return tracks, missing_count, failure_count, computed_count def _generate_plan( self, @@ -3082,13 +3089,23 @@ def report_fingerprint_status(message: str) -> None: plan_start_time = time.monotonic() logger.info("Preview plan timing start: %s", plan_start_ts) try: - tracks, missing_count, failure_count = self._gather_tracks( + tracks, missing_count, failure_count, computed_count = self._gather_tracks( library_root, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, idle_callback=idle_callback, ) + if computed_count: + log("Fingerprinting completed; refreshing catalog from cache before grouping.") + tracks, missing_count, failure_count, _ = self._gather_tracks( + library_root, + allow_compute=False, + log_callback=log, + status_callback=status_callback, + fingerprint_status_callback=fingerprint_status_callback, + idle_callback=idle_callback, + ) if not tracks: return PlanGenerationResult( plan=None, From 1a08da7d4e72d08017a6f35e527ede5e28d01551 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 10:56:04 -0500 Subject: [PATCH 04/18] Revert "Skip recomputing fingerprints during duplicate-scan refresh" --- main_gui.py | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/main_gui.py b/main_gui.py index fe3a7ad..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -90,7 +90,6 @@ get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, - flush_fingerprint_writes, ) from simple_duplicate_finder import SUPPORTED_EXTS, _compute_fp from tag_fixer import MIN_INTERACTIVE_SCORE, FileRecord @@ -2777,14 +2776,13 @@ def _gather_tracks( self, library_root: str, *, - allow_compute: bool = True, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, idle_callback: Callable[[], None] | None = None, - ) -> tuple[list[dict[str, object]], int, int, int]: + ) -> tuple[list[dict[str, object]], int, int]: if not library_root: - return [], 0, 0, 0 + return [], 0, 0 docs_dir = os.path.join(library_root, "Docs") os.makedirs(docs_dir, exist_ok=True) db_path = os.path.join(docs_dir, ".duplicate_fingerprints.db") @@ -2809,7 +2807,7 @@ def _gather_tracks( fingerprint_status_callback("No eligible audio files found.") if status_callback: status_callback("Idle", progress=0) - return tracks, 0, 0, 0 + return tracks, 0, 0 if fingerprint_status_callback: fingerprint_status_callback(f"Fingerprinting 0/{total}") if status_callback: @@ -2955,9 +2953,6 @@ def _track_payload( if source in {"stat_error", "cache_error"}: failure_count += 1 completed += 1 - elif not allow_compute: - missing_count += 1 - completed += 1 else: pending_paths.append(path) now = time.monotonic() @@ -2987,7 +2982,7 @@ def _track_payload( ) max_workers = min(8, os.cpu_count() or 4) - if pending_paths and allow_compute: + if pending_paths: if log_callback: log_callback(f"Computing {len(pending_paths)} missing fingerprints…") with ThreadPoolExecutor(max_workers=max_workers) as executor: @@ -3044,8 +3039,6 @@ def _track_payload( ): failure_count += 1 - flush_fingerprint_writes(db_path) - tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3053,7 +3046,7 @@ def _track_payload( f"{cached_count} cached, {computed_count} computed, " f"{missing_count} missing, {failure_count} failures." ) - return tracks, missing_count, failure_count, computed_count + return tracks, missing_count, failure_count def _generate_plan( self, @@ -3089,23 +3082,13 @@ def report_fingerprint_status(message: str) -> None: plan_start_time = time.monotonic() logger.info("Preview plan timing start: %s", plan_start_ts) try: - tracks, missing_count, failure_count, computed_count = self._gather_tracks( + tracks, missing_count, failure_count = self._gather_tracks( library_root, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, idle_callback=idle_callback, ) - if computed_count: - log("Fingerprinting completed; refreshing catalog from cache before grouping.") - tracks, missing_count, failure_count, _ = self._gather_tracks( - library_root, - allow_compute=False, - log_callback=log, - status_callback=status_callback, - fingerprint_status_callback=fingerprint_status_callback, - idle_callback=idle_callback, - ) if not tracks: return PlanGenerationResult( plan=None, From f4febef589357e93757c6807179852d7b282285f Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 11:25:36 -0500 Subject: [PATCH 05/18] Flush fingerprint cache after compute --- main_gui.py | 150 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 100 insertions(+), 50 deletions(-) diff --git a/main_gui.py b/main_gui.py index 639a75e..446ab5b 100644 --- a/main_gui.py +++ b/main_gui.py @@ -2776,6 +2776,7 @@ def _gather_tracks( self, library_root: str, *, + flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2908,6 +2909,82 @@ def _track_payload( }, } + def _load_cached_tracks( + *, + allow_pending: bool, + ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: + nonlocal last_update + refreshed_map: dict[str, dict[str, object]] = {} + refreshed_pending: list[str] = [] + refreshed_cached_count = 0 + refreshed_missing_count = 0 + refreshed_failure_count = 0 + refreshed_metadata_refresh_count = 0 + refreshed_completed = 0 + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + refreshed_metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + refreshed_failure_count += 1 + refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + refreshed_cached_count += 1 + refreshed_completed += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + refreshed_failure_count += 1 + refreshed_completed += 1 + elif allow_pending: + refreshed_pending.append(path) + else: + refreshed_missing_count += 1 + now = time.monotonic() + if refreshed_completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress( + "fingerprinting", + 0 if total == 0 else refreshed_completed / total, + ), + ) + if idle_callback: + idle_callback() + last_update = now + return ( + refreshed_map, + refreshed_pending, + refreshed_cached_count, + refreshed_missing_count, + refreshed_failure_count, + refreshed_metadata_refresh_count, + ) + tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2917,56 +2994,15 @@ def _track_payload( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - failure_count += 1 - tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - completed += 1 - cached_count += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - failure_count += 1 - completed += 1 - else: - pending_paths.append(path) - now = time.monotonic() - if completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress("fingerprinting", completed / total), - ) - if idle_callback: - idle_callback() - last_update = now + ( + tracks_map, + pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=True) + completed = cached_count + failure_count if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3039,6 +3075,19 @@ def _track_payload( ): failure_count += 1 + if flush_after_compute: + if log_callback: + log_callback("Fingerprint cache flushed; refreshing track list from cache.") + flush_fingerprint_writes(db_path) + ( + tracks_map, + _pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=False) + tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3084,6 +3133,7 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, + flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 8be094120824055226433e458bda9218b58e7a5b Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 11:27:40 -0500 Subject: [PATCH 06/18] Revert "Flush fingerprint cache after compute and refresh cached track list" --- main_gui.py | 150 ++++++++++++++++++---------------------------------- 1 file changed, 50 insertions(+), 100 deletions(-) diff --git a/main_gui.py b/main_gui.py index 446ab5b..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -2776,7 +2776,6 @@ def _gather_tracks( self, library_root: str, *, - flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2909,82 +2908,6 @@ def _track_payload( }, } - def _load_cached_tracks( - *, - allow_pending: bool, - ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: - nonlocal last_update - refreshed_map: dict[str, dict[str, object]] = {} - refreshed_pending: list[str] = [] - refreshed_cached_count = 0 - refreshed_missing_count = 0 - refreshed_failure_count = 0 - refreshed_metadata_refresh_count = 0 - refreshed_completed = 0 - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - refreshed_metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - refreshed_failure_count += 1 - refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - refreshed_cached_count += 1 - refreshed_completed += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - refreshed_failure_count += 1 - refreshed_completed += 1 - elif allow_pending: - refreshed_pending.append(path) - else: - refreshed_missing_count += 1 - now = time.monotonic() - if refreshed_completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress( - "fingerprinting", - 0 if total == 0 else refreshed_completed / total, - ), - ) - if idle_callback: - idle_callback() - last_update = now - return ( - refreshed_map, - refreshed_pending, - refreshed_cached_count, - refreshed_missing_count, - refreshed_failure_count, - refreshed_metadata_refresh_count, - ) - tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2994,15 +2917,56 @@ def _load_cached_tracks( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - ( - tracks_map, - pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=True) - completed = cached_count + failure_count + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + failure_count += 1 + tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + completed += 1 + cached_count += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + failure_count += 1 + completed += 1 + else: + pending_paths.append(path) + now = time.monotonic() + if completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress("fingerprinting", completed / total), + ) + if idle_callback: + idle_callback() + last_update = now if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3075,19 +3039,6 @@ def _load_cached_tracks( ): failure_count += 1 - if flush_after_compute: - if log_callback: - log_callback("Fingerprint cache flushed; refreshing track list from cache.") - flush_fingerprint_writes(db_path) - ( - tracks_map, - _pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=False) - tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3133,7 +3084,6 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, - flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 5c7e04eba0a2045214aa5577b963daea83323329 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 11:30:17 -0500 Subject: [PATCH 07/18] Import fingerprint cache flush helper --- main_gui.py | 151 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 101 insertions(+), 50 deletions(-) diff --git a/main_gui.py b/main_gui.py index 639a75e..cce878b 100644 --- a/main_gui.py +++ b/main_gui.py @@ -87,6 +87,7 @@ ) from fingerprint_cache import ( ensure_fingerprint_cache, + flush_fingerprint_writes, get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, @@ -2776,6 +2777,7 @@ def _gather_tracks( self, library_root: str, *, + flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2908,6 +2910,82 @@ def _track_payload( }, } + def _load_cached_tracks( + *, + allow_pending: bool, + ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: + nonlocal last_update + refreshed_map: dict[str, dict[str, object]] = {} + refreshed_pending: list[str] = [] + refreshed_cached_count = 0 + refreshed_missing_count = 0 + refreshed_failure_count = 0 + refreshed_metadata_refresh_count = 0 + refreshed_completed = 0 + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + refreshed_metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + refreshed_failure_count += 1 + refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + refreshed_cached_count += 1 + refreshed_completed += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + refreshed_failure_count += 1 + refreshed_completed += 1 + elif allow_pending: + refreshed_pending.append(path) + else: + refreshed_missing_count += 1 + now = time.monotonic() + if refreshed_completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress( + "fingerprinting", + 0 if total == 0 else refreshed_completed / total, + ), + ) + if idle_callback: + idle_callback() + last_update = now + return ( + refreshed_map, + refreshed_pending, + refreshed_cached_count, + refreshed_missing_count, + refreshed_failure_count, + refreshed_metadata_refresh_count, + ) + tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2917,56 +2995,15 @@ def _track_payload( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - failure_count += 1 - tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - completed += 1 - cached_count += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - failure_count += 1 - completed += 1 - else: - pending_paths.append(path) - now = time.monotonic() - if completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress("fingerprinting", completed / total), - ) - if idle_callback: - idle_callback() - last_update = now + ( + tracks_map, + pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=True) + completed = cached_count + failure_count if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3039,6 +3076,19 @@ def _track_payload( ): failure_count += 1 + if flush_after_compute: + if log_callback: + log_callback("Fingerprint cache flushed; refreshing track list from cache.") + flush_fingerprint_writes(db_path) + ( + tracks_map, + _pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=False) + tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3084,6 +3134,7 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, + flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From d9e69ed1830055e3261d27310b411d4b7e23e76b Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 11:33:49 -0500 Subject: [PATCH 08/18] Revert "Import fingerprint cache flush helper and refresh cached tracks after compute" --- main_gui.py | 151 +++++++++++++++++----------------------------------- 1 file changed, 50 insertions(+), 101 deletions(-) diff --git a/main_gui.py b/main_gui.py index cce878b..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -87,7 +87,6 @@ ) from fingerprint_cache import ( ensure_fingerprint_cache, - flush_fingerprint_writes, get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, @@ -2777,7 +2776,6 @@ def _gather_tracks( self, library_root: str, *, - flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2910,82 +2908,6 @@ def _track_payload( }, } - def _load_cached_tracks( - *, - allow_pending: bool, - ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: - nonlocal last_update - refreshed_map: dict[str, dict[str, object]] = {} - refreshed_pending: list[str] = [] - refreshed_cached_count = 0 - refreshed_missing_count = 0 - refreshed_failure_count = 0 - refreshed_metadata_refresh_count = 0 - refreshed_completed = 0 - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - refreshed_metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - refreshed_failure_count += 1 - refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - refreshed_cached_count += 1 - refreshed_completed += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - refreshed_failure_count += 1 - refreshed_completed += 1 - elif allow_pending: - refreshed_pending.append(path) - else: - refreshed_missing_count += 1 - now = time.monotonic() - if refreshed_completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress( - "fingerprinting", - 0 if total == 0 else refreshed_completed / total, - ), - ) - if idle_callback: - idle_callback() - last_update = now - return ( - refreshed_map, - refreshed_pending, - refreshed_cached_count, - refreshed_missing_count, - refreshed_failure_count, - refreshed_metadata_refresh_count, - ) - tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2995,15 +2917,56 @@ def _load_cached_tracks( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - ( - tracks_map, - pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=True) - completed = cached_count + failure_count + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + failure_count += 1 + tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + completed += 1 + cached_count += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + failure_count += 1 + completed += 1 + else: + pending_paths.append(path) + now = time.monotonic() + if completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress("fingerprinting", completed / total), + ) + if idle_callback: + idle_callback() + last_update = now if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3076,19 +3039,6 @@ def _load_cached_tracks( ): failure_count += 1 - if flush_after_compute: - if log_callback: - log_callback("Fingerprint cache flushed; refreshing track list from cache.") - flush_fingerprint_writes(db_path) - ( - tracks_map, - _pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=False) - tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3134,7 +3084,6 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, - flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 8dcda12c2dbc8ed4195fab96ffa57f5e59a7aca7 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 11:44:00 -0500 Subject: [PATCH 09/18] Retry cache refresh with computed fingerprints --- main_gui.py | 187 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 137 insertions(+), 50 deletions(-) diff --git a/main_gui.py b/main_gui.py index 639a75e..2412007 100644 --- a/main_gui.py +++ b/main_gui.py @@ -87,6 +87,7 @@ ) from fingerprint_cache import ( ensure_fingerprint_cache, + flush_fingerprint_writes, get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, @@ -2776,6 +2777,7 @@ def _gather_tracks( self, library_root: str, *, + flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2908,6 +2910,82 @@ def _track_payload( }, } + def _load_cached_tracks( + *, + allow_pending: bool, + ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: + nonlocal last_update + refreshed_map: dict[str, dict[str, object]] = {} + refreshed_pending: list[str] = [] + refreshed_cached_count = 0 + refreshed_missing_count = 0 + refreshed_failure_count = 0 + refreshed_metadata_refresh_count = 0 + refreshed_completed = 0 + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + refreshed_metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + refreshed_failure_count += 1 + refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + refreshed_cached_count += 1 + refreshed_completed += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + refreshed_failure_count += 1 + refreshed_completed += 1 + elif allow_pending: + refreshed_pending.append(path) + else: + refreshed_missing_count += 1 + now = time.monotonic() + if refreshed_completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress( + "fingerprinting", + 0 if total == 0 else refreshed_completed / total, + ), + ) + if idle_callback: + idle_callback() + last_update = now + return ( + refreshed_map, + refreshed_pending, + refreshed_cached_count, + refreshed_missing_count, + refreshed_failure_count, + refreshed_metadata_refresh_count, + ) + tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2917,56 +2995,15 @@ def _track_payload( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - failure_count += 1 - tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - completed += 1 - cached_count += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - failure_count += 1 - completed += 1 - else: - pending_paths.append(path) - now = time.monotonic() - if completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress("fingerprinting", completed / total), - ) - if idle_callback: - idle_callback() - last_update = now + ( + tracks_map, + pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=True) + completed = cached_count + failure_count if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3039,6 +3076,55 @@ def _track_payload( ): failure_count += 1 + if flush_after_compute: + if log_callback: + log_callback("Fingerprint cache flushed; refreshing track list from cache.") + pre_flush_tracks_map = dict(tracks_map) + flush_fingerprint_writes(db_path) + ( + tracks_map, + _pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=False) + missing_paths = [ + path + for path in sorted_paths + if path not in tracks_map and path in pre_flush_tracks_map + ] + if missing_paths: + if log_callback: + log_callback( + "Fingerprint cache refresh missed entries; retrying cache write " + f"for {len(missing_paths)} tracks." + ) + for path in missing_paths: + payload = pre_flush_tracks_map.get(path, {}) + if not store_fingerprint( + path, + db_path, + None, + payload.get("fingerprint"), + log_callback=log_callback, + ext=str(payload.get("ext") or ""), + bitrate=int(payload.get("bitrate") or 0), + sample_rate=int(payload.get("sample_rate") or 0), + bit_depth=int(payload.get("bit_depth") or 0), + ): + if log_callback: + log_callback(f"! Cache retry failed for {path}") + flush_fingerprint_writes(db_path) + ( + tracks_map, + _pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=False) + tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3084,6 +3170,7 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, + flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 2b2a51377db5409d36b85fb7fe221844129c3112 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 11:57:14 -0500 Subject: [PATCH 10/18] Revert "Retry fingerprint cache refresh using computed payloads" --- main_gui.py | 187 ++++++++++++++-------------------------------------- 1 file changed, 50 insertions(+), 137 deletions(-) diff --git a/main_gui.py b/main_gui.py index 2412007..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -87,7 +87,6 @@ ) from fingerprint_cache import ( ensure_fingerprint_cache, - flush_fingerprint_writes, get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, @@ -2777,7 +2776,6 @@ def _gather_tracks( self, library_root: str, *, - flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2910,82 +2908,6 @@ def _track_payload( }, } - def _load_cached_tracks( - *, - allow_pending: bool, - ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: - nonlocal last_update - refreshed_map: dict[str, dict[str, object]] = {} - refreshed_pending: list[str] = [] - refreshed_cached_count = 0 - refreshed_missing_count = 0 - refreshed_failure_count = 0 - refreshed_metadata_refresh_count = 0 - refreshed_completed = 0 - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - refreshed_metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - refreshed_failure_count += 1 - refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - refreshed_cached_count += 1 - refreshed_completed += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - refreshed_failure_count += 1 - refreshed_completed += 1 - elif allow_pending: - refreshed_pending.append(path) - else: - refreshed_missing_count += 1 - now = time.monotonic() - if refreshed_completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress( - "fingerprinting", - 0 if total == 0 else refreshed_completed / total, - ), - ) - if idle_callback: - idle_callback() - last_update = now - return ( - refreshed_map, - refreshed_pending, - refreshed_cached_count, - refreshed_missing_count, - refreshed_failure_count, - refreshed_metadata_refresh_count, - ) - tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2995,15 +2917,56 @@ def _load_cached_tracks( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - ( - tracks_map, - pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=True) - completed = cached_count + failure_count + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + failure_count += 1 + tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + completed += 1 + cached_count += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + failure_count += 1 + completed += 1 + else: + pending_paths.append(path) + now = time.monotonic() + if completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress("fingerprinting", completed / total), + ) + if idle_callback: + idle_callback() + last_update = now if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3076,55 +3039,6 @@ def _load_cached_tracks( ): failure_count += 1 - if flush_after_compute: - if log_callback: - log_callback("Fingerprint cache flushed; refreshing track list from cache.") - pre_flush_tracks_map = dict(tracks_map) - flush_fingerprint_writes(db_path) - ( - tracks_map, - _pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=False) - missing_paths = [ - path - for path in sorted_paths - if path not in tracks_map and path in pre_flush_tracks_map - ] - if missing_paths: - if log_callback: - log_callback( - "Fingerprint cache refresh missed entries; retrying cache write " - f"for {len(missing_paths)} tracks." - ) - for path in missing_paths: - payload = pre_flush_tracks_map.get(path, {}) - if not store_fingerprint( - path, - db_path, - None, - payload.get("fingerprint"), - log_callback=log_callback, - ext=str(payload.get("ext") or ""), - bitrate=int(payload.get("bitrate") or 0), - sample_rate=int(payload.get("sample_rate") or 0), - bit_depth=int(payload.get("bit_depth") or 0), - ): - if log_callback: - log_callback(f"! Cache retry failed for {path}") - flush_fingerprint_writes(db_path) - ( - tracks_map, - _pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=False) - tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3170,7 +3084,6 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, - flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 8af6a66a5a9181af9e4102e34d8f5dcf840d925b Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 11:57:30 -0500 Subject: [PATCH 11/18] Fallback to in-memory fingerprints on refresh miss --- main_gui.py | 201 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 151 insertions(+), 50 deletions(-) diff --git a/main_gui.py b/main_gui.py index 639a75e..e654c3f 100644 --- a/main_gui.py +++ b/main_gui.py @@ -87,6 +87,7 @@ ) from fingerprint_cache import ( ensure_fingerprint_cache, + flush_fingerprint_writes, get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, @@ -2776,6 +2777,7 @@ def _gather_tracks( self, library_root: str, *, + flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2908,6 +2910,82 @@ def _track_payload( }, } + def _load_cached_tracks( + *, + allow_pending: bool, + ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: + nonlocal last_update + refreshed_map: dict[str, dict[str, object]] = {} + refreshed_pending: list[str] = [] + refreshed_cached_count = 0 + refreshed_missing_count = 0 + refreshed_failure_count = 0 + refreshed_metadata_refresh_count = 0 + refreshed_completed = 0 + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + refreshed_metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + refreshed_failure_count += 1 + refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + refreshed_cached_count += 1 + refreshed_completed += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + refreshed_failure_count += 1 + refreshed_completed += 1 + elif allow_pending: + refreshed_pending.append(path) + else: + refreshed_missing_count += 1 + now = time.monotonic() + if refreshed_completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress( + "fingerprinting", + 0 if total == 0 else refreshed_completed / total, + ), + ) + if idle_callback: + idle_callback() + last_update = now + return ( + refreshed_map, + refreshed_pending, + refreshed_cached_count, + refreshed_missing_count, + refreshed_failure_count, + refreshed_metadata_refresh_count, + ) + tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2917,56 +2995,15 @@ def _track_payload( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - failure_count += 1 - tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - completed += 1 - cached_count += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - failure_count += 1 - completed += 1 - else: - pending_paths.append(path) - now = time.monotonic() - if completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress("fingerprinting", completed / total), - ) - if idle_callback: - idle_callback() - last_update = now + ( + tracks_map, + pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=True) + completed = cached_count + failure_count if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3039,6 +3076,69 @@ def _track_payload( ): failure_count += 1 + if flush_after_compute: + if log_callback: + log_callback("Fingerprint cache flushed; refreshing track list from cache.") + pre_flush_tracks_map = dict(tracks_map) + flush_fingerprint_writes(db_path) + ( + tracks_map, + _pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=False) + missing_paths = [ + path + for path in sorted_paths + if path not in tracks_map and path in pre_flush_tracks_map + ] + if missing_paths: + if log_callback: + log_callback( + "Fingerprint cache refresh missed entries; retrying cache write " + f"for {len(missing_paths)} tracks." + ) + for path in missing_paths: + payload = pre_flush_tracks_map.get(path, {}) + if not store_fingerprint( + path, + db_path, + None, + payload.get("fingerprint"), + log_callback=log_callback, + ext=str(payload.get("ext") or ""), + bitrate=int(payload.get("bitrate") or 0), + sample_rate=int(payload.get("sample_rate") or 0), + bit_depth=int(payload.get("bit_depth") or 0), + ): + if log_callback: + log_callback(f"! Cache retry failed for {path}") + flush_fingerprint_writes(db_path) + ( + tracks_map, + _pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=False) + unresolved_paths = [ + path + for path in sorted_paths + if path not in tracks_map and path in pre_flush_tracks_map + ] + if unresolved_paths: + if log_callback: + log_callback( + "Fingerprint cache refresh still missing entries; " + f"falling back to in-memory fingerprints for {len(unresolved_paths)} tracks." + ) + for path in unresolved_paths: + tracks_map[path] = pre_flush_tracks_map[path] + missing_count = max(0, missing_count - len(unresolved_paths)) + tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3084,6 +3184,7 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, + flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 3112c74b97aace54716b0d0eaaedbdf65992c691 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 16:06:53 -0500 Subject: [PATCH 12/18] Revert "Fallback to in-memory fingerprints when cache refresh misses" --- main_gui.py | 201 +++++++++++++--------------------------------------- 1 file changed, 50 insertions(+), 151 deletions(-) diff --git a/main_gui.py b/main_gui.py index e654c3f..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -87,7 +87,6 @@ ) from fingerprint_cache import ( ensure_fingerprint_cache, - flush_fingerprint_writes, get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, @@ -2777,7 +2776,6 @@ def _gather_tracks( self, library_root: str, *, - flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2910,82 +2908,6 @@ def _track_payload( }, } - def _load_cached_tracks( - *, - allow_pending: bool, - ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: - nonlocal last_update - refreshed_map: dict[str, dict[str, object]] = {} - refreshed_pending: list[str] = [] - refreshed_cached_count = 0 - refreshed_missing_count = 0 - refreshed_failure_count = 0 - refreshed_metadata_refresh_count = 0 - refreshed_completed = 0 - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - refreshed_metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - refreshed_failure_count += 1 - refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - refreshed_cached_count += 1 - refreshed_completed += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - refreshed_failure_count += 1 - refreshed_completed += 1 - elif allow_pending: - refreshed_pending.append(path) - else: - refreshed_missing_count += 1 - now = time.monotonic() - if refreshed_completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress( - "fingerprinting", - 0 if total == 0 else refreshed_completed / total, - ), - ) - if idle_callback: - idle_callback() - last_update = now - return ( - refreshed_map, - refreshed_pending, - refreshed_cached_count, - refreshed_missing_count, - refreshed_failure_count, - refreshed_metadata_refresh_count, - ) - tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2995,15 +2917,56 @@ def _load_cached_tracks( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - ( - tracks_map, - pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=True) - completed = cached_count + failure_count + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + failure_count += 1 + tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + completed += 1 + cached_count += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + failure_count += 1 + completed += 1 + else: + pending_paths.append(path) + now = time.monotonic() + if completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress("fingerprinting", completed / total), + ) + if idle_callback: + idle_callback() + last_update = now if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3076,69 +3039,6 @@ def _load_cached_tracks( ): failure_count += 1 - if flush_after_compute: - if log_callback: - log_callback("Fingerprint cache flushed; refreshing track list from cache.") - pre_flush_tracks_map = dict(tracks_map) - flush_fingerprint_writes(db_path) - ( - tracks_map, - _pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=False) - missing_paths = [ - path - for path in sorted_paths - if path not in tracks_map and path in pre_flush_tracks_map - ] - if missing_paths: - if log_callback: - log_callback( - "Fingerprint cache refresh missed entries; retrying cache write " - f"for {len(missing_paths)} tracks." - ) - for path in missing_paths: - payload = pre_flush_tracks_map.get(path, {}) - if not store_fingerprint( - path, - db_path, - None, - payload.get("fingerprint"), - log_callback=log_callback, - ext=str(payload.get("ext") or ""), - bitrate=int(payload.get("bitrate") or 0), - sample_rate=int(payload.get("sample_rate") or 0), - bit_depth=int(payload.get("bit_depth") or 0), - ): - if log_callback: - log_callback(f"! Cache retry failed for {path}") - flush_fingerprint_writes(db_path) - ( - tracks_map, - _pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=False) - unresolved_paths = [ - path - for path in sorted_paths - if path not in tracks_map and path in pre_flush_tracks_map - ] - if unresolved_paths: - if log_callback: - log_callback( - "Fingerprint cache refresh still missing entries; " - f"falling back to in-memory fingerprints for {len(unresolved_paths)} tracks." - ) - for path in unresolved_paths: - tracks_map[path] = pre_flush_tracks_map[path] - missing_count = max(0, missing_count - len(unresolved_paths)) - tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3184,7 +3084,6 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, - flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 7a1ee6dba4d076d015c8dfe2e7b8114421fa69d3 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 16:11:16 -0500 Subject: [PATCH 13/18] Revert "Revert "Fallback to in-memory fingerprints when cache refresh misses"" --- main_gui.py | 201 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 151 insertions(+), 50 deletions(-) diff --git a/main_gui.py b/main_gui.py index 639a75e..e654c3f 100644 --- a/main_gui.py +++ b/main_gui.py @@ -87,6 +87,7 @@ ) from fingerprint_cache import ( ensure_fingerprint_cache, + flush_fingerprint_writes, get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, @@ -2776,6 +2777,7 @@ def _gather_tracks( self, library_root: str, *, + flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2908,6 +2910,82 @@ def _track_payload( }, } + def _load_cached_tracks( + *, + allow_pending: bool, + ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: + nonlocal last_update + refreshed_map: dict[str, dict[str, object]] = {} + refreshed_pending: list[str] = [] + refreshed_cached_count = 0 + refreshed_missing_count = 0 + refreshed_failure_count = 0 + refreshed_metadata_refresh_count = 0 + refreshed_completed = 0 + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + refreshed_metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + refreshed_failure_count += 1 + refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + refreshed_cached_count += 1 + refreshed_completed += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + refreshed_failure_count += 1 + refreshed_completed += 1 + elif allow_pending: + refreshed_pending.append(path) + else: + refreshed_missing_count += 1 + now = time.monotonic() + if refreshed_completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress( + "fingerprinting", + 0 if total == 0 else refreshed_completed / total, + ), + ) + if idle_callback: + idle_callback() + last_update = now + return ( + refreshed_map, + refreshed_pending, + refreshed_cached_count, + refreshed_missing_count, + refreshed_failure_count, + refreshed_metadata_refresh_count, + ) + tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2917,56 +2995,15 @@ def _track_payload( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - failure_count += 1 - tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - completed += 1 - cached_count += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - failure_count += 1 - completed += 1 - else: - pending_paths.append(path) - now = time.monotonic() - if completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress("fingerprinting", completed / total), - ) - if idle_callback: - idle_callback() - last_update = now + ( + tracks_map, + pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=True) + completed = cached_count + failure_count if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3039,6 +3076,69 @@ def _track_payload( ): failure_count += 1 + if flush_after_compute: + if log_callback: + log_callback("Fingerprint cache flushed; refreshing track list from cache.") + pre_flush_tracks_map = dict(tracks_map) + flush_fingerprint_writes(db_path) + ( + tracks_map, + _pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=False) + missing_paths = [ + path + for path in sorted_paths + if path not in tracks_map and path in pre_flush_tracks_map + ] + if missing_paths: + if log_callback: + log_callback( + "Fingerprint cache refresh missed entries; retrying cache write " + f"for {len(missing_paths)} tracks." + ) + for path in missing_paths: + payload = pre_flush_tracks_map.get(path, {}) + if not store_fingerprint( + path, + db_path, + None, + payload.get("fingerprint"), + log_callback=log_callback, + ext=str(payload.get("ext") or ""), + bitrate=int(payload.get("bitrate") or 0), + sample_rate=int(payload.get("sample_rate") or 0), + bit_depth=int(payload.get("bit_depth") or 0), + ): + if log_callback: + log_callback(f"! Cache retry failed for {path}") + flush_fingerprint_writes(db_path) + ( + tracks_map, + _pending_paths, + cached_count, + missing_count, + failure_count, + metadata_refresh_count, + ) = _load_cached_tracks(allow_pending=False) + unresolved_paths = [ + path + for path in sorted_paths + if path not in tracks_map and path in pre_flush_tracks_map + ] + if unresolved_paths: + if log_callback: + log_callback( + "Fingerprint cache refresh still missing entries; " + f"falling back to in-memory fingerprints for {len(unresolved_paths)} tracks." + ) + for path in unresolved_paths: + tracks_map[path] = pre_flush_tracks_map[path] + missing_count = max(0, missing_count - len(unresolved_paths)) + tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3084,6 +3184,7 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, + flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 9ac437d96aac48d99d8bb3131b364759f7e8bf5d Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 16:11:42 -0500 Subject: [PATCH 14/18] Revert "Revert "Revert "Fallback to in-memory fingerprints when cache refresh misses""" --- main_gui.py | 201 +++++++++++++--------------------------------------- 1 file changed, 50 insertions(+), 151 deletions(-) diff --git a/main_gui.py b/main_gui.py index e654c3f..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -87,7 +87,6 @@ ) from fingerprint_cache import ( ensure_fingerprint_cache, - flush_fingerprint_writes, get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, @@ -2777,7 +2776,6 @@ def _gather_tracks( self, library_root: str, *, - flush_after_compute: bool = False, log_callback: Callable[[str], None] | None = None, status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, @@ -2910,82 +2908,6 @@ def _track_payload( }, } - def _load_cached_tracks( - *, - allow_pending: bool, - ) -> tuple[dict[str, dict[str, object]], list[str], int, int, int, int]: - nonlocal last_update - refreshed_map: dict[str, dict[str, object]] = {} - refreshed_pending: list[str] = [] - refreshed_cached_count = 0 - refreshed_missing_count = 0 - refreshed_failure_count = 0 - refreshed_metadata_refresh_count = 0 - refreshed_completed = 0 - for path in sorted_paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - ) - if fp: - metadata = _metadata_for_payload(path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(path) - refreshed_metadata_refresh_count += 1 - if not store_fingerprint( - path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - refreshed_failure_count += 1 - refreshed_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) - refreshed_cached_count += 1 - refreshed_completed += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - refreshed_failure_count += 1 - refreshed_completed += 1 - elif allow_pending: - refreshed_pending.append(path) - else: - refreshed_missing_count += 1 - now = time.monotonic() - if refreshed_completed == total or now - last_update >= update_interval: - if fingerprint_status_callback: - fingerprint_status_callback(f"Fingerprinting {refreshed_completed}/{total}") - if status_callback: - status_callback( - "Fingerprinting…", - progress=self._weighted_progress( - "fingerprinting", - 0 if total == 0 else refreshed_completed / total, - ), - ) - if idle_callback: - idle_callback() - last_update = now - return ( - refreshed_map, - refreshed_pending, - refreshed_cached_count, - refreshed_missing_count, - refreshed_failure_count, - refreshed_metadata_refresh_count, - ) - tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -2995,15 +2917,56 @@ def _load_cached_tracks( missing_count = 0 metadata_refresh_count = 0 sorted_paths = sorted(audio_paths) - ( - tracks_map, - pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=True) - completed = cached_count + failure_count + for path in sorted_paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + ) + if fp: + metadata = _metadata_for_payload(path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(path) + metadata_refresh_count += 1 + if not store_fingerprint( + path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + failure_count += 1 + tracks_map[path] = _track_payload(path, fp, fingerprint_trace, metadata) + completed += 1 + cached_count += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + failure_count += 1 + completed += 1 + else: + pending_paths.append(path) + now = time.monotonic() + if completed == total or now - last_update >= update_interval: + if fingerprint_status_callback: + fingerprint_status_callback(f"Fingerprinting {completed}/{total}") + if status_callback: + status_callback( + "Fingerprinting…", + progress=self._weighted_progress("fingerprinting", completed / total), + ) + if idle_callback: + idle_callback() + last_update = now if not pending_paths and metadata_refresh_count == 0 and failure_count == 0: refresh_message = "Catalog up to date; using cached metadata." @@ -3076,69 +3039,6 @@ def _load_cached_tracks( ): failure_count += 1 - if flush_after_compute: - if log_callback: - log_callback("Fingerprint cache flushed; refreshing track list from cache.") - pre_flush_tracks_map = dict(tracks_map) - flush_fingerprint_writes(db_path) - ( - tracks_map, - _pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=False) - missing_paths = [ - path - for path in sorted_paths - if path not in tracks_map and path in pre_flush_tracks_map - ] - if missing_paths: - if log_callback: - log_callback( - "Fingerprint cache refresh missed entries; retrying cache write " - f"for {len(missing_paths)} tracks." - ) - for path in missing_paths: - payload = pre_flush_tracks_map.get(path, {}) - if not store_fingerprint( - path, - db_path, - None, - payload.get("fingerprint"), - log_callback=log_callback, - ext=str(payload.get("ext") or ""), - bitrate=int(payload.get("bitrate") or 0), - sample_rate=int(payload.get("sample_rate") or 0), - bit_depth=int(payload.get("bit_depth") or 0), - ): - if log_callback: - log_callback(f"! Cache retry failed for {path}") - flush_fingerprint_writes(db_path) - ( - tracks_map, - _pending_paths, - cached_count, - missing_count, - failure_count, - metadata_refresh_count, - ) = _load_cached_tracks(allow_pending=False) - unresolved_paths = [ - path - for path in sorted_paths - if path not in tracks_map and path in pre_flush_tracks_map - ] - if unresolved_paths: - if log_callback: - log_callback( - "Fingerprint cache refresh still missing entries; " - f"falling back to in-memory fingerprints for {len(unresolved_paths)} tracks." - ) - for path in unresolved_paths: - tracks_map[path] = pre_flush_tracks_map[path] - missing_count = max(0, missing_count - len(unresolved_paths)) - tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3184,7 +3084,6 @@ def report_fingerprint_status(message: str) -> None: try: tracks, missing_count, failure_count = self._gather_tracks( library_root, - flush_after_compute=True, log_callback=log, status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, From 8d55e8f4e0314723cdf9ef76a10ec85f6d77d79a Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 16:36:24 -0500 Subject: [PATCH 15/18] Flush fingerprint cache after compute --- main_gui.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/main_gui.py b/main_gui.py index 639a75e..2a67832 100644 --- a/main_gui.py +++ b/main_gui.py @@ -90,6 +90,7 @@ get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, + flush_fingerprint_writes, ) from simple_duplicate_finder import SUPPORTED_EXTS, _compute_fp from tag_fixer import MIN_INTERACTIVE_SCORE, FileRecord @@ -2780,6 +2781,7 @@ def _gather_tracks( status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, idle_callback: Callable[[], None] | None = None, + flush_after_compute: bool = False, ) -> tuple[list[dict[str, object]], int, int]: if not library_root: return [], 0, 0 @@ -3039,6 +3041,12 @@ def _track_payload( ): failure_count += 1 + if flush_after_compute: + flush_fingerprint_writes(db_path) + if log_callback: + log_callback( + "Fingerprint cache flushed after compute; continuing with in-memory track list." + ) tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3088,6 +3096,7 @@ def report_fingerprint_status(message: str) -> None: status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, idle_callback=idle_callback, + flush_after_compute=True, ) if not tracks: return PlanGenerationResult( From bed069a690b81d4151d4f6f44898db208ecaabcb Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 16:39:41 -0500 Subject: [PATCH 16/18] Revert "Flush fingerprint cache after compute pass (preserve in-memory track list)" --- main_gui.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/main_gui.py b/main_gui.py index 2a67832..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -90,7 +90,6 @@ get_fingerprint, get_cached_fingerprint_metadata, store_fingerprint, - flush_fingerprint_writes, ) from simple_duplicate_finder import SUPPORTED_EXTS, _compute_fp from tag_fixer import MIN_INTERACTIVE_SCORE, FileRecord @@ -2781,7 +2780,6 @@ def _gather_tracks( status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, idle_callback: Callable[[], None] | None = None, - flush_after_compute: bool = False, ) -> tuple[list[dict[str, object]], int, int]: if not library_root: return [], 0, 0 @@ -3041,12 +3039,6 @@ def _track_payload( ): failure_count += 1 - if flush_after_compute: - flush_fingerprint_writes(db_path) - if log_callback: - log_callback( - "Fingerprint cache flushed after compute; continuing with in-memory track list." - ) tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3096,7 +3088,6 @@ def report_fingerprint_status(message: str) -> None: status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, idle_callback=idle_callback, - flush_after_compute=True, ) if not tracks: return PlanGenerationResult( From e1ff52f26d1ca99830f3cc3bb4fa5a2bedb847a1 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 16:52:17 -0500 Subject: [PATCH 17/18] Refresh fingerprint cache after flush --- fingerprint_cache.py | 17 +++++++-- main_gui.py | 90 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 3 deletions(-) diff --git a/fingerprint_cache.py b/fingerprint_cache.py index 0e6de2a..5b3d5c7 100644 --- a/fingerprint_cache.py +++ b/fingerprint_cache.py @@ -486,6 +486,9 @@ def get_cached_fingerprint_metadata( *, retries: int = 3, retry_delay: float = 0.05, + lock_retry_attempts: int = 0, + lock_retry_delay: float = 0.1, + lock_retry_backoff: float = 2.0, ) -> tuple[str | None, dict[str, object] | None]: """Return cached fingerprint and metadata for path without computing new fingerprints.""" if log_callback is None: @@ -495,7 +498,9 @@ def get_cached_fingerprint_metadata( path = ensure_long_path(path) os.makedirs(os.path.dirname(db_path), exist_ok=True) - for attempt in range(retries): + total_attempts = max(retries, 1) + max(lock_retry_attempts, 0) + lock_delay = max(lock_retry_delay, 0.0) + for attempt in range(total_attempts): conn: sqlite3.Connection | None = None try: conn = _open_readonly_connection(db_path) @@ -559,12 +564,18 @@ def get_cached_fingerprint_metadata( trace["error"] = "" return None, None except sqlite3.OperationalError as e: - if "locked" not in str(e).lower() or attempt >= retries - 1: + locked = "locked" in str(e).lower() + if not locked or attempt >= total_attempts - 1: log_callback(f"! Fingerprint cache read failed: {e}") trace["source"] = "cache_error" trace["error"] = str(e) return None, None - time.sleep(retry_delay) + if locked and attempt >= retries - 1: + trace["lock_contention"] = True + time.sleep(lock_delay) + lock_delay *= max(lock_retry_backoff, 1.0) + else: + time.sleep(retry_delay) finally: if conn is not None: conn.close() diff --git a/main_gui.py b/main_gui.py index 639a75e..5c66712 100644 --- a/main_gui.py +++ b/main_gui.py @@ -2780,6 +2780,7 @@ def _gather_tracks( status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, idle_callback: Callable[[], None] | None = None, + refresh_after_flush: bool = False, ) -> tuple[list[dict[str, object]], int, int]: if not library_root: return [], 0, 0 @@ -2908,6 +2909,72 @@ def _track_payload( }, } + def _load_cached_tracks( + paths: list[str], + *, + lock_retry_attempts: int = 0, + ) -> tuple[dict[str, dict[str, object]], int, int, int, int, int]: + cached_tracks: dict[str, dict[str, object]] = {} + refresh_missing = 0 + refresh_failure = 0 + refresh_cached = 0 + refresh_metadata = 0 + lock_contention = 0 + for cached_path in paths: + fingerprint_trace: dict[str, object] = {} + fp, cached_metadata = get_cached_fingerprint_metadata( + cached_path, + db_path, + log_callback=log_callback, + trace=fingerprint_trace, + lock_retry_attempts=lock_retry_attempts, + lock_retry_delay=0.1, + lock_retry_backoff=1.5, + ) + if fingerprint_trace.get("lock_contention"): + lock_contention += 1 + if fp: + metadata = _metadata_for_payload(cached_path, cached_metadata) + if _needs_metadata_refresh(cached_metadata): + metadata = _extract_metadata(cached_path) + refresh_metadata += 1 + if not store_fingerprint( + cached_path, + db_path, + None, + fp, + log_callback=log_callback, + ext=str(metadata.get("ext") or ""), + bitrate=int(metadata.get("bitrate") or 0), + sample_rate=int(metadata.get("sample_rate") or 0), + bit_depth=int(metadata.get("bit_depth") or 0), + normalized_artist=metadata.get("normalized_artist"), + normalized_title=metadata.get("normalized_title"), + normalized_album=metadata.get("normalized_album"), + ): + refresh_failure += 1 + cached_tracks[cached_path] = _track_payload( + cached_path, + fp, + fingerprint_trace, + metadata, + ) + refresh_cached += 1 + else: + source = fingerprint_trace.get("source") + if source in {"stat_error", "cache_error"}: + refresh_failure += 1 + else: + refresh_missing += 1 + return ( + cached_tracks, + refresh_missing, + refresh_failure, + refresh_cached, + refresh_metadata, + lock_contention, + ) + tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -3039,6 +3106,28 @@ def _track_payload( ): failure_count += 1 + if refresh_after_flush: + if log_callback: + log_callback("Flushing fingerprint cache writes before refresh…") + flush_fingerprint_writes(db_path) + if log_callback: + log_callback("Refreshing fingerprint cache after flush…") + if fingerprint_status_callback: + fingerprint_status_callback("Refreshing fingerprint cache…") + ( + tracks_map, + missing_count, + failure_count, + cached_count, + metadata_refresh_count, + lock_contention_count, + ) = _load_cached_tracks(sorted_paths, lock_retry_attempts=8) + if lock_contention_count and log_callback: + log_callback( + "Fingerprint cache refresh encountered lock contention; " + f"retried {lock_contention_count} reads." + ) + tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3088,6 +3177,7 @@ def report_fingerprint_status(message: str) -> None: status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, idle_callback=idle_callback, + refresh_after_flush=True, ) if not tracks: return PlanGenerationResult( From 0a8249fde199efd811fb5fe7684e7f20b7116398 Mon Sep 17 00:00:00 2001 From: Greensand321 Date: Sat, 10 Jan 2026 16:55:48 -0500 Subject: [PATCH 18/18] Revert "Refresh duplicate scan fingerprint cache after flush and add lock-aware cache reads" --- fingerprint_cache.py | 17 ++------- main_gui.py | 90 -------------------------------------------- 2 files changed, 3 insertions(+), 104 deletions(-) diff --git a/fingerprint_cache.py b/fingerprint_cache.py index 5b3d5c7..0e6de2a 100644 --- a/fingerprint_cache.py +++ b/fingerprint_cache.py @@ -486,9 +486,6 @@ def get_cached_fingerprint_metadata( *, retries: int = 3, retry_delay: float = 0.05, - lock_retry_attempts: int = 0, - lock_retry_delay: float = 0.1, - lock_retry_backoff: float = 2.0, ) -> tuple[str | None, dict[str, object] | None]: """Return cached fingerprint and metadata for path without computing new fingerprints.""" if log_callback is None: @@ -498,9 +495,7 @@ def get_cached_fingerprint_metadata( path = ensure_long_path(path) os.makedirs(os.path.dirname(db_path), exist_ok=True) - total_attempts = max(retries, 1) + max(lock_retry_attempts, 0) - lock_delay = max(lock_retry_delay, 0.0) - for attempt in range(total_attempts): + for attempt in range(retries): conn: sqlite3.Connection | None = None try: conn = _open_readonly_connection(db_path) @@ -564,18 +559,12 @@ def get_cached_fingerprint_metadata( trace["error"] = "" return None, None except sqlite3.OperationalError as e: - locked = "locked" in str(e).lower() - if not locked or attempt >= total_attempts - 1: + if "locked" not in str(e).lower() or attempt >= retries - 1: log_callback(f"! Fingerprint cache read failed: {e}") trace["source"] = "cache_error" trace["error"] = str(e) return None, None - if locked and attempt >= retries - 1: - trace["lock_contention"] = True - time.sleep(lock_delay) - lock_delay *= max(lock_retry_backoff, 1.0) - else: - time.sleep(retry_delay) + time.sleep(retry_delay) finally: if conn is not None: conn.close() diff --git a/main_gui.py b/main_gui.py index 5c66712..639a75e 100644 --- a/main_gui.py +++ b/main_gui.py @@ -2780,7 +2780,6 @@ def _gather_tracks( status_callback: Callable[[str, float], None] | None = None, fingerprint_status_callback: Callable[[str], None] | None = None, idle_callback: Callable[[], None] | None = None, - refresh_after_flush: bool = False, ) -> tuple[list[dict[str, object]], int, int]: if not library_root: return [], 0, 0 @@ -2909,72 +2908,6 @@ def _track_payload( }, } - def _load_cached_tracks( - paths: list[str], - *, - lock_retry_attempts: int = 0, - ) -> tuple[dict[str, dict[str, object]], int, int, int, int, int]: - cached_tracks: dict[str, dict[str, object]] = {} - refresh_missing = 0 - refresh_failure = 0 - refresh_cached = 0 - refresh_metadata = 0 - lock_contention = 0 - for cached_path in paths: - fingerprint_trace: dict[str, object] = {} - fp, cached_metadata = get_cached_fingerprint_metadata( - cached_path, - db_path, - log_callback=log_callback, - trace=fingerprint_trace, - lock_retry_attempts=lock_retry_attempts, - lock_retry_delay=0.1, - lock_retry_backoff=1.5, - ) - if fingerprint_trace.get("lock_contention"): - lock_contention += 1 - if fp: - metadata = _metadata_for_payload(cached_path, cached_metadata) - if _needs_metadata_refresh(cached_metadata): - metadata = _extract_metadata(cached_path) - refresh_metadata += 1 - if not store_fingerprint( - cached_path, - db_path, - None, - fp, - log_callback=log_callback, - ext=str(metadata.get("ext") or ""), - bitrate=int(metadata.get("bitrate") or 0), - sample_rate=int(metadata.get("sample_rate") or 0), - bit_depth=int(metadata.get("bit_depth") or 0), - normalized_artist=metadata.get("normalized_artist"), - normalized_title=metadata.get("normalized_title"), - normalized_album=metadata.get("normalized_album"), - ): - refresh_failure += 1 - cached_tracks[cached_path] = _track_payload( - cached_path, - fp, - fingerprint_trace, - metadata, - ) - refresh_cached += 1 - else: - source = fingerprint_trace.get("source") - if source in {"stat_error", "cache_error"}: - refresh_failure += 1 - else: - refresh_missing += 1 - return ( - cached_tracks, - refresh_missing, - refresh_failure, - refresh_cached, - refresh_metadata, - lock_contention, - ) - tracks_map: dict[str, dict[str, object]] = {} pending_paths: list[str] = [] completed = 0 @@ -3106,28 +3039,6 @@ def _load_cached_tracks( ): failure_count += 1 - if refresh_after_flush: - if log_callback: - log_callback("Flushing fingerprint cache writes before refresh…") - flush_fingerprint_writes(db_path) - if log_callback: - log_callback("Refreshing fingerprint cache after flush…") - if fingerprint_status_callback: - fingerprint_status_callback("Refreshing fingerprint cache…") - ( - tracks_map, - missing_count, - failure_count, - cached_count, - metadata_refresh_count, - lock_contention_count, - ) = _load_cached_tracks(sorted_paths, lock_retry_attempts=8) - if lock_contention_count and log_callback: - log_callback( - "Fingerprint cache refresh encountered lock contention; " - f"retried {lock_contention_count} reads." - ) - tracks = [tracks_map[path] for path in sorted_paths if path in tracks_map] if log_callback: log_callback( @@ -3177,7 +3088,6 @@ def report_fingerprint_status(message: str) -> None: status_callback=status_callback, fingerprint_status_callback=fingerprint_status_callback, idle_callback=idle_callback, - refresh_after_flush=True, ) if not tracks: return PlanGenerationResult(