Memories-on-repeat/track_uris.py at main · Edwin574/Memories-on-repeat · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import math

import pandas as pd

from read_data import read_google_sheet
from spotify_service import get_spotify_client


SHEET_URL = "https://docs.google.com/spreadsheets/d/1BL6G4fhTS8fGfumrVIFml1lICXLkKLhYmjJ_vlXK6Ik/export?format=csv"
BATCH_SIZE = 50
DEFAULT_CSV = "spotify_results.csv"


def ordinal(n: int) -> str:
    if 10 <= n % 100 <= 20:
        suffix = "th"
    else:
        suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
    return f"{n}{suffix}"


def fetch_spotify_track_data(sp, songs):
    results = []

    for title, artist in songs:
        query = f"{title} {artist}"
        result = sp.search(q=query, type="track", limit=1)

        if result['tracks']['items']:
            uri = result['tracks']['items'][0]['uri']
            results.append({"title": title, "artist": artist, "spotify_uri": uri})
        else:
            print(f"Skipped '{title}' by '{artist}': track not found on Spotify.")

    print(f"Searched {len(songs)} tracks. Found {len(results)} Spotify matches.")
    return results


def save_results_to_csv(results, filename=DEFAULT_CSV):
    df = pd.DataFrame(results, columns=["title", "artist", "spotify_uri"])
    df.to_csv(filename, index=False)
    print(f"Saved {len(results)} rows to {filename}.")


def build_track_uri_csv(sheet_url=SHEET_URL, batch_size=BATCH_SIZE, csv_filename=DEFAULT_CSV):
    songs, _ = read_google_sheet(sheet_url)
    total_songs = len(songs)

    if total_songs == 0:
        print("No songs found in the Google Sheet. Nothing to process.")
        return csv_filename

    sp = get_spotify_client()
    accumulated_results = []
    total_batches = math.ceil(total_songs / batch_size)

    for batch_index in range(0, total_songs, batch_size):
        batch_number = (batch_index // batch_size) + 1
        batch = songs[batch_index:batch_index + batch_size]

        batch_results = fetch_spotify_track_data(sp, batch)

        if batch_results:
            accumulated_results.extend(batch_results)
            save_results_to_csv(accumulated_results, filename=csv_filename)
            print(f"{ordinal(batch_number)} batch URIs loaded to csv... ({len(batch_results)} new tracks)")
        else:
            print(f"{ordinal(batch_number)} batch produced no Spotify matches; CSV unchanged.")

    print(f"Finished processing {total_songs} songs across {total_batches} batches.")
    return csv_filename


def main():
    build_track_uri_csv()


if __name__ == "__main__":
    main()