From a2348079ade44a305fd8fb5d7534ea210c4f7138 Mon Sep 17 00:00:00 2001 From: SoClose <33631880+SoClosee@users.noreply.github.com> Date: Sun, 1 Mar 2026 06:46:48 +0100 Subject: [PATCH] fix(legacy/LinkedinScrapper.py): replace hardcoded search query with environment variables --- legacy/LinkedinScrapper.py | 90 ++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 53 deletions(-) diff --git a/legacy/LinkedinScrapper.py b/legacy/LinkedinScrapper.py index 958316d..b5b5558 100644 --- a/legacy/LinkedinScrapper.py +++ b/legacy/LinkedinScrapper.py @@ -1,10 +1,11 @@ +import os + from selenium import webdriver from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager from selenium.webdriver.chrome.options import Options from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support import expected_conditions as expect from selenium.webdriver.common.by import By from bs4 import BeautifulSoup import time @@ -24,23 +25,13 @@ from os.path import dirname, join - -username = input('Enter your linkedin Email : ') -password = input('Enter your linkedin Password : ') -file_name = input('Enter your file name : ') - - -# username = '' -# password = '' -# file_name = '' - -# search_query = "" -search_query = input('Enter your search query : ') +username = os.getenv('LINKEDIN_EMAIL') +password = os.getenv('LINKEDIN_PASSWORD') +file_name = os.getenv('FILE_NAME') +search_query = os.getenv('SEARCH_QUERY') or input('Enter your search query : ') search_query = search_query.replace(" ", "%20") -place_name = input('Enter targed place name : ') -# place_name = "" - +place_name = os.getenv('PLACE_NAME') or input('Enter targed place name : ') options = webdriver.ChromeOptions() options.add_experimental_option('excludeSwitches', ['enable-logging']) @@ -49,14 +40,12 @@ driver.maximize_window() - def loging(): driver.get('https://www.linkedin.com') - WebDriverWait(driver, 10).until(expect.visibility_of_element_located((By.XPATH, '//input[@id="session_key"]'))) + WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//input[@id="session_key"]'))) driver.find_element(By.XPATH, '//input[@id="session_key"]').send_keys(username) - driver.find_element(By.XPATH,"//input[@id='session_password']").send_keys(password) - driver.find_element(By.XPATH,"//button[contains(text(),'Sign in')]").click() - + driver.find_element(By.XPATH,"//input[@id=\'session_password\']").send_keys(password) + driver.find_element(By.XPATH,"//button[contains(text(),\'Sign in\')]").click() def scrap_available_profie(): @@ -67,26 +56,26 @@ def scrap_available_profie(): count = 0 driver.get('https://www.linkedin.com/search/results/PEOPLE/?keywords='+search_query) - WebDriverWait(driver, 10).until(expect.visibility_of_element_located((By.XPATH, '//button[text()="Locations"]'))) + WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//button[text()="Locations"]'))) driver.find_element(By.XPATH, '//button[text()="Locations"]').click() - WebDriverWait(driver, 10).until(expect.visibility_of_element_located((By.XPATH, "//input[@placeholder='Add a location']"))) + WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//input[@placeholder='Add a location']"))) driver.find_element(By.XPATH, "//input[@placeholder='Add a location']").send_keys(place_name) - time.sleep(2) - driver.find_element(By.XPATH, "//*[contains(@id, 'basic-result-')]").click() - time.sleep(2) - driver.find_element(By.XPATH, '//button[text()="Locations"]').click() +time.sleep(2) +driver.find_element(By.XPATH, "//*[contains(@id, 'basic-result-')]").click() +time.sleep(2) +driver.find_element(By.XPATH, '//button[text()="Locations"]').click() time.sleep(4) - driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") - time.sleep(2) - # html = driver.page_source - # soup = BeautifulSoup(html) - # with open("output4.html", "w", encoding = 'utf-8') as file: - # file.write(str(soup.prettify())) - # pegination = driver.find_element(By.CSS_SELECTOR, '.artdeco-pagination.artdeco-pagination--has-controls.ember-view.pv5.ph2') - # list = driver.find_element(By.CLASS_NAME, 'artdeco-pagination__indicator') +driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") +time.sleep(2) +# html = driver.page_source +# soup = BeautifulSoup(html) +# with open("output4.html", "w", encoding = 'utf-8') as file: +# file.write(str(soup.prettify())) +# pegination = driver.find_element(By.CSS_SELECTOR, '.artdeco-pagination.artdeco-pagination--has-controls.ember-view.pv5.ph2') +# list = driver.find_element(By.CLASS_NAME, 'artdeco-pagination__indicator') html = driver.page_source soup = BeautifulSoup(html, features="html.parser") @@ -108,10 +97,9 @@ def scrap_available_profie(): count = count + 1 for designation in search_rslt_tag.find_all('div', {"class":"entity-result__primary-subtitle t-14 t-black t-normal"}): - #print (designation.text) + #print (designation.text) Linked_in_designation.append(designation.text) - if(len(total_pages) == 0): last_page = 1 else: @@ -127,14 +115,14 @@ def scrap_available_profie(): driver.get(driver.current_url+'&page='+str(x)) time.sleep(4) - driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") - time.sleep(2) - # html = driver.page_source - # soup = BeautifulSoup(html) - # with open("output4.html", "w", encoding = 'utf-8') as file: - # file.write(str(soup.prettify())) - # pegination = driver.find_element(By.CSS_SELECTOR, '.artdeco-pagination.artdeco-pagination--has-controls.ember-view.pv5.ph2') - # list = driver.find_element(By.CLASS_NAME, 'artdeco-pagination__indicator') +driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") +time.sleep(2) +# html = driver.page_source +# soup = BeautifulSoup(html) +# with open("output4.html", "w", encoding = 'utf-8') as file: +# file.write(str(soup.prettify())) +# pegination = driver.find_element(By.CSS_SELECTOR, '.artdeco-pagination.artdeco-pagination--has-controls.ember-view.pv5.ph2') +# list = driver.find_element(By.CLASS_NAME, 'artdeco-pagination__indicator') html = driver.page_source soup = BeautifulSoup(html, features="html.parser") @@ -156,13 +144,12 @@ def scrap_available_profie(): count = count + 1 for designation in search_rslt_tag.find_all('div', {"class":"entity-result__primary-subtitle t-14 t-black t-normal"}): - #print (designation.text) + #print (designation.text) if (count2 % 2) == 0: Linked_in_designation.append(designation.text) else: count2 = count2 + 1 - Linkedin_link = list(dict.fromkeys(Linkedin_link)) Linked_in_designation = list(dict.fromkeys(Linked_in_designation)) @@ -179,16 +166,13 @@ def scrap_available_profie(): b = np.array(Linked_in_designation) df = pd.DataFrame({"Profile Link" : a}) - df.to_csv(file_name+"_Profile_Link.csv", index=False) +df.to_csv(file_name+"_Profile_Link.csv", index=False) df2 = pd.DataFrame({"Designation" : b}) - df2.to_csv(file_name+"_Designation.csv", index=False) - - +df2.to_csv(file_name+"_Designation.csv", index=False) loging() time.sleep(15) -scrap_available_profie() - +scrap_available_profie() \ No newline at end of file