From d4c78af56cc93fd15c3d98681643fd16e0fe7ae3 Mon Sep 17 00:00:00 2001 From: ramses <63118448+ramses-z@users.noreply.github.com> Date: Fri, 9 Feb 2024 21:15:16 -0500 Subject: [PATCH 1/2] fix: implements selenium for css scraping (#36) --- leetcode-api/leetcode.py | 23 +++++++++++++++++++++-- leetcode-api/leetcode_constants.py | 4 ++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/leetcode-api/leetcode.py b/leetcode-api/leetcode.py index 35b9326..1be7cd5 100644 --- a/leetcode-api/leetcode.py +++ b/leetcode-api/leetcode.py @@ -1,5 +1,9 @@ import requests import uvicorn +from selenium.webdriver import Chrome, ChromeOptions +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC from bs4 import BeautifulSoup as bs from fastapi import FastAPI, APIRouter @@ -60,6 +64,18 @@ def leetcodeScrape(username: str): # Get the user's LC URL user.name = username + # Initialize browser options for Selenium + browser_options = ChromeOptions() + + # Enables headless mode for Selenium + browser_options.add_argument("--headless=new") + + # Initialize Chrome web driver using browser options + driver = Chrome(options=browser_options) + + # Opens URL using the web driver + driver.get("https://leetcode.com/" + user.name) + # Get Raw HTML try: r = requests.get("https://leetcode.com/" + user.name) @@ -82,12 +98,15 @@ def leetcodeScrape(username: str): user.rank = int(raw_rank.replace(",", "")) # Get the most recent problem, if any - raw_recent = html_doc.find("span", class_=RECENT_DIV_CLASS).get_text() + raw_recent = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, RECENT_DIV_CLASS))).text # If submitted recently (LC uses the format "23 hours ago" or "1 day ago") user.recent = "hour" in raw_recent if user.recent: - user.recent_problem = html_doc.find("span", class_=RECENT_PROBLEM_DIV_CLASS).get_text() + user.recent_problem = driver.find_element(By.XPATH, RECENT_PROBLEM_DIV_CLASS).text + + # Shutdown web driver + driver.quit() # Prints user data return user diff --git a/leetcode-api/leetcode_constants.py b/leetcode-api/leetcode_constants.py index 4c9e09b..08a6a00 100644 --- a/leetcode-api/leetcode_constants.py +++ b/leetcode-api/leetcode_constants.py @@ -1,6 +1,6 @@ # On LC user page, div class for number of completed easy, medium, and hard DIFF_DIV_CLASS = "mr-[5px] text-base font-medium leading-[20px] text-label-1 dark:text-dark-label-1" RANK_DIV_CLASS = "ttext-label-1 dark:text-dark-label-1 font-medium" -RECENT_DIV_CLASS = "text-label-3 dark:text-dark-label-3 hidden whitespace-nowrap lc-md:inline" -RECENT_PROBLEM_DIV_CLASS = "text-label-1 dark:text-dark-label-1 font-medium line-clamp-1" +RECENT_DIV_CLASS = "//span[@class='text-label-3 dark:text-dark-label-3 lc-md:inline hidden whitespace-nowrap']" +RECENT_PROBLEM_DIV_CLASS = "//span[@class='text-label-1 dark:text-dark-label-1 line-clamp-1 font-medium']" STREAK_DIV_CLASS = "font-medium text-label-2 dark:text-dark-label-2" From 726e3c1de7561c6daf6fc159129087786b1f034e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 10 Feb 2024 02:59:39 +0000 Subject: [PATCH 2/2] ci: auto fixes from pre-commit.com hooks --- leetcode-api/leetcode.py | 8 ++++++-- leetcode-api/leetcode_constants.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/leetcode-api/leetcode.py b/leetcode-api/leetcode.py index 1be7cd5..c00547a 100644 --- a/leetcode-api/leetcode.py +++ b/leetcode-api/leetcode.py @@ -66,7 +66,7 @@ def leetcodeScrape(username: str): # Initialize browser options for Selenium browser_options = ChromeOptions() - + # Enables headless mode for Selenium browser_options.add_argument("--headless=new") @@ -98,7 +98,11 @@ def leetcodeScrape(username: str): user.rank = int(raw_rank.replace(",", "")) # Get the most recent problem, if any - raw_recent = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, RECENT_DIV_CLASS))).text + raw_recent = ( + WebDriverWait(driver, 10) + .until(EC.element_to_be_clickable((By.XPATH, RECENT_DIV_CLASS))) + .text + ) # If submitted recently (LC uses the format "23 hours ago" or "1 day ago") user.recent = "hour" in raw_recent diff --git a/leetcode-api/leetcode_constants.py b/leetcode-api/leetcode_constants.py index 08a6a00..0def239 100644 --- a/leetcode-api/leetcode_constants.py +++ b/leetcode-api/leetcode_constants.py @@ -1,6 +1,10 @@ # On LC user page, div class for number of completed easy, medium, and hard DIFF_DIV_CLASS = "mr-[5px] text-base font-medium leading-[20px] text-label-1 dark:text-dark-label-1" RANK_DIV_CLASS = "ttext-label-1 dark:text-dark-label-1 font-medium" -RECENT_DIV_CLASS = "//span[@class='text-label-3 dark:text-dark-label-3 lc-md:inline hidden whitespace-nowrap']" -RECENT_PROBLEM_DIV_CLASS = "//span[@class='text-label-1 dark:text-dark-label-1 line-clamp-1 font-medium']" +RECENT_DIV_CLASS = ( + "//span[@class='text-label-3 dark:text-dark-label-3 lc-md:inline hidden whitespace-nowrap']" +) +RECENT_PROBLEM_DIV_CLASS = ( + "//span[@class='text-label-1 dark:text-dark-label-1 line-clamp-1 font-medium']" +) STREAK_DIV_CLASS = "font-medium text-label-2 dark:text-dark-label-2"