billboard

from selenium import webdriver

from selenium.webdriver.chrome.service import Service

from selenium.webdriver.chrome.options import Options

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC

import pandas as pd

from webdriver_manager.chrome import ChromeDriverManager

from bs4 import BeautifulSoup

import os

from datetime import datetime, timedelta



def generate_date_range(start_date, end_date):

    """Generates a list of weekly dates from start_date to end_date (inclusive)."""

    date_list = []

    current_date = start_date

    while current_date <= end_date:

        date_list.append(current_date.strftime('%Y-%m-%d'))

        current_date += timedelta(days=7)

    if date_list[-1] != end_date.strftime('%Y-%m-%d'):

        date_list.append(end_date.strftime('%Y-%m-%d'))

    return date_list



def fetch_data(date):

    url = f'https://www.billboard.com/charts/hot-100/{date}/'

    print(f'Fetching data for {date}...')



    options = Options()

    options.add_argument('--headless')

    options.add_argument('--no-sandbox')

    options.add_argument('--disable-dev-shm-usage')

    service = Service(ChromeDriverManager().install())

    driver = webdriver.Chrome(service=service, options=options)



    driver.get(url)



    try:

        WebDriverWait(driver, 20).until(

            EC.presence_of_element_located((By.CSS_SELECTOR, 'li.o-chart-results-list__item h3.c-title'))

        )

    except Exception as e:

        print(f"Page did not load in time: {e}")

        driver.quit()

        return None



    html = driver.page_source

    driver.quit()

    return html



def parse_data(html, tracked_artist):

    soup = BeautifulSoup(html, 'html.parser')

    songs = []



    chart_items = soup.select('li.o-chart-results-list__item h3.c-title')

    print(f"Found {len(chart_items)} chart items.")



    for index, item in enumerate(chart_items):

        title = item.text.strip()

        artist_element = item.find_next('span', class_='c-label')

        artist = artist_element.text.strip() if artist_element else ''



        if tracked_artist.lower() in artist.lower():

            position = index + 1

            full_title = f"{title} ({artist})"

            songs.append([position, full_title])

            print(f"Found song: {full_title} at position {position}")



    return songs



def save_to_excel(all_songs, date_range):

    desktop_path = os.path.expanduser('~/Desktop')

    filename = f'{desktop_path}/Tracked_Songs_Hot_100.xlsx'



    # Generate dynamic headers based on found data

    all_found_titles = {song[1] for songs in all_songs for song in songs}

    tracked_headers = list(all_found_titles)  # Use full titles with artists as headers



    columns = ['Chart Date'] + tracked_headers

    all_data = []



    for date, songs in zip(date_range, all_songs):

        row = {'Chart Date': date}

        song_positions = {header: 'N/A' for header in tracked_headers}



        for song in songs:

            title_with_artist = song[1]

            position = song[0]

            if title_with_artist in song_positions:

                song_positions[title_with_artist] = position



        row.update(song_positions)

        all_data.append(row)



    df = pd.DataFrame(all_data, columns=columns)



    if not df.empty:

        try:

            df.to_excel(filename, index=False)

            print(f'Data saved to {filename}')

        except Exception as e:

            print(f'Error saving file: {e}')

    else:

        print("No data to save.")



def main():

    start_date = datetime(2024, 12, 14)  # New range: from December 14, 2024

    end_date = datetime(2025, 6, 21)     # to June 21, 2025

    date_range = generate_date_range(start_date, end_date)



    tracked_artist = "Sabrina Carpenter"



    all_songs = []



    for date in date_range:

        html_data = fetch_data(date)

        if html_data:

            songs = parse_data(html_data, tracked_artist)

            all_songs.append(songs)

        else:

            all_songs.append([])



    if all_songs:

        save_to_excel(all_songs, date_range)

    else:

        print("No songs by the artist were found in any chart entry.")



if __name__ == '__main__':

    main()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

billboard #8350

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

billboard #8350

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions