Skip to content

billboard #8350

@lucatronca

Description

@lucatronca

from selenium import webdriver

from selenium.webdriver.chrome.service import Service

from selenium.webdriver.chrome.options import Options

from selenium.webdriver.common.by import By

from selenium.webdriver.support.ui import WebDriverWait

from selenium.webdriver.support import expected_conditions as EC

import pandas as pd

from webdriver_manager.chrome import ChromeDriverManager

from bs4 import BeautifulSoup

import os

from datetime import datetime, timedelta

def generate_date_range(start_date, end_date):

"""Generates a list of weekly dates from start_date to end_date (inclusive)."""

date_list = []

current_date = start_date

while current_date <= end_date:

    date_list.append(current_date.strftime('%Y-%m-%d'))

    current_date += timedelta(days=7)

if date_list[-1] != end_date.strftime('%Y-%m-%d'):

    date_list.append(end_date.strftime('%Y-%m-%d'))

return date_list

def fetch_data(date):

url = f'https://www.billboard.com/charts/hot-100/{date}/'

print(f'Fetching data for {date}...')



options = Options()

options.add_argument('--headless')

options.add_argument('--no-sandbox')

options.add_argument('--disable-dev-shm-usage')

service = Service(ChromeDriverManager().install())

driver = webdriver.Chrome(service=service, options=options)



driver.get(url)



try:

    WebDriverWait(driver, 20).until(

        EC.presence_of_element_located((By.CSS_SELECTOR, 'li.o-chart-results-list__item h3.c-title'))

    )

except Exception as e:

    print(f"Page did not load in time: {e}")

    driver.quit()

    return None



html = driver.page_source

driver.quit()

return html

def parse_data(html, tracked_artist):

soup = BeautifulSoup(html, 'html.parser')

songs = []



chart_items = soup.select('li.o-chart-results-list__item h3.c-title')

print(f"Found {len(chart_items)} chart items.")



for index, item in enumerate(chart_items):

    title = item.text.strip()

    artist_element = item.find_next('span', class_='c-label')

    artist = artist_element.text.strip() if artist_element else ''



    if tracked_artist.lower() in artist.lower():

        position = index + 1

        full_title = f"{title} ({artist})"

        songs.append([position, full_title])

        print(f"Found song: {full_title} at position {position}")



return songs

def save_to_excel(all_songs, date_range):

desktop_path = os.path.expanduser('~/Desktop')

filename = f'{desktop_path}/Tracked_Songs_Hot_100.xlsx'



# Generate dynamic headers based on found data

all_found_titles = {song[1] for songs in all_songs for song in songs}

tracked_headers = list(all_found_titles)  # Use full titles with artists as headers



columns = ['Chart Date'] + tracked_headers

all_data = []



for date, songs in zip(date_range, all_songs):

    row = {'Chart Date': date}

    song_positions = {header: 'N/A' for header in tracked_headers}



    for song in songs:

        title_with_artist = song[1]

        position = song[0]

        if title_with_artist in song_positions:

            song_positions[title_with_artist] = position



    row.update(song_positions)

    all_data.append(row)



df = pd.DataFrame(all_data, columns=columns)



if not df.empty:

    try:

        df.to_excel(filename, index=False)

        print(f'Data saved to {filename}')

    except Exception as e:

        print(f'Error saving file: {e}')

else:

    print("No data to save.")

def main():

start_date = datetime(2024, 12, 14)  # New range: from December 14, 2024

end_date = datetime(2025, 6, 21)     # to June 21, 2025

date_range = generate_date_range(start_date, end_date)



tracked_artist = "Sabrina Carpenter"



all_songs = []



for date in date_range:

    html_data = fetch_data(date)

    if html_data:

        songs = parse_data(html_data, tracked_artist)

        all_songs.append(songs)

    else:

        all_songs.append([])



if all_songs:

    save_to_excel(all_songs, date_range)

else:

    print("No songs by the artist were found in any chart entry.")

if name == 'main':

main()

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions