Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 37 additions & 53 deletions legacy/LinkedinScrapper.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import os

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support import expected_conditions as expect
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
Expand All @@ -24,23 +25,13 @@

from os.path import dirname, join


username = input('Enter your linkedin Email : ')
password = input('Enter your linkedin Password : ')
file_name = input('Enter your file name : ')


# username = ''
# password = ''
# file_name = ''

# search_query = ""
search_query = input('Enter your search query : ')
username = os.getenv('LINKEDIN_EMAIL')
password = os.getenv('LINKEDIN_PASSWORD')
file_name = os.getenv('FILE_NAME')
search_query = os.getenv('SEARCH_QUERY') or input('Enter your search query : ')
search_query = search_query.replace(" ", "%20")

place_name = input('Enter targed place name : ')
# place_name = ""

place_name = os.getenv('PLACE_NAME') or input('Enter targed place name : ')

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-logging'])
Expand All @@ -49,14 +40,12 @@
driver.maximize_window()



def loging():
driver.get('https://www.linkedin.com')
WebDriverWait(driver, 10).until(expect.visibility_of_element_located((By.XPATH, '//input[@id="session_key"]')))
WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//input[@id="session_key"]')))
driver.find_element(By.XPATH, '//input[@id="session_key"]').send_keys(username)
driver.find_element(By.XPATH,"//input[@id='session_password']").send_keys(password)
driver.find_element(By.XPATH,"//button[contains(text(),'Sign in')]").click()

driver.find_element(By.XPATH,"//input[@id=\'session_password\']").send_keys(password)
driver.find_element(By.XPATH,"//button[contains(text(),\'Sign in\')]").click()


def scrap_available_profie():
Expand All @@ -67,26 +56,26 @@ def scrap_available_profie():
count = 0

driver.get('https://www.linkedin.com/search/results/PEOPLE/?keywords='+search_query)
WebDriverWait(driver, 10).until(expect.visibility_of_element_located((By.XPATH, '//button[text()="Locations"]')))
WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, '//button[text()="Locations"]')))

driver.find_element(By.XPATH, '//button[text()="Locations"]').click()

WebDriverWait(driver, 10).until(expect.visibility_of_element_located((By.XPATH, "//input[@placeholder='Add a location']")))
WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, "//input[@placeholder='Add a location']")))
driver.find_element(By.XPATH, "//input[@placeholder='Add a location']").send_keys(place_name)
time.sleep(2)
driver.find_element(By.XPATH, "//*[contains(@id, 'basic-result-')]").click()
time.sleep(2)
driver.find_element(By.XPATH, '//button[text()="Locations"]').click()
time.sleep(2)
driver.find_element(By.XPATH, "//*[contains(@id, 'basic-result-')]").click()
time.sleep(2)
driver.find_element(By.XPATH, '//button[text()="Locations"]').click()

time.sleep(4)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# html = driver.page_source
# soup = BeautifulSoup(html)
# with open("output4.html", "w", encoding = 'utf-8') as file:
# file.write(str(soup.prettify()))
# pegination = driver.find_element(By.CSS_SELECTOR, '.artdeco-pagination.artdeco-pagination--has-controls.ember-view.pv5.ph2')
# list = driver.find_element(By.CLASS_NAME, 'artdeco-pagination__indicator')
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# html = driver.page_source
# soup = BeautifulSoup(html)
# with open("output4.html", "w", encoding = 'utf-8') as file:
# file.write(str(soup.prettify()))
# pegination = driver.find_element(By.CSS_SELECTOR, '.artdeco-pagination.artdeco-pagination--has-controls.ember-view.pv5.ph2')
# list = driver.find_element(By.CLASS_NAME, 'artdeco-pagination__indicator')

html = driver.page_source
soup = BeautifulSoup(html, features="html.parser")
Expand All @@ -108,10 +97,9 @@ def scrap_available_profie():
count = count + 1

for designation in search_rslt_tag.find_all('div', {"class":"entity-result__primary-subtitle t-14 t-black t-normal"}):
#print (designation.text)
#print (designation.text)
Linked_in_designation.append(designation.text)


if(len(total_pages) == 0):
last_page = 1
else:
Expand All @@ -127,14 +115,14 @@ def scrap_available_profie():
driver.get(driver.current_url+'&page='+str(x))

time.sleep(4)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# html = driver.page_source
# soup = BeautifulSoup(html)
# with open("output4.html", "w", encoding = 'utf-8') as file:
# file.write(str(soup.prettify()))
# pegination = driver.find_element(By.CSS_SELECTOR, '.artdeco-pagination.artdeco-pagination--has-controls.ember-view.pv5.ph2')
# list = driver.find_element(By.CLASS_NAME, 'artdeco-pagination__indicator')
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# html = driver.page_source
# soup = BeautifulSoup(html)
# with open("output4.html", "w", encoding = 'utf-8') as file:
# file.write(str(soup.prettify()))
# pegination = driver.find_element(By.CSS_SELECTOR, '.artdeco-pagination.artdeco-pagination--has-controls.ember-view.pv5.ph2')
# list = driver.find_element(By.CLASS_NAME, 'artdeco-pagination__indicator')

html = driver.page_source
soup = BeautifulSoup(html, features="html.parser")
Expand All @@ -156,13 +144,12 @@ def scrap_available_profie():
count = count + 1

for designation in search_rslt_tag.find_all('div', {"class":"entity-result__primary-subtitle t-14 t-black t-normal"}):
#print (designation.text)
#print (designation.text)
if (count2 % 2) == 0:
Linked_in_designation.append(designation.text)
else:
count2 = count2 + 1


Linkedin_link = list(dict.fromkeys(Linkedin_link))
Linked_in_designation = list(dict.fromkeys(Linked_in_designation))

Expand All @@ -179,16 +166,13 @@ def scrap_available_profie():
b = np.array(Linked_in_designation)

df = pd.DataFrame({"Profile Link" : a})
df.to_csv(file_name+"_Profile_Link.csv", index=False)
df.to_csv(file_name+"_Profile_Link.csv", index=False)

df2 = pd.DataFrame({"Designation" : b})
df2.to_csv(file_name+"_Designation.csv", index=False)


df2.to_csv(file_name+"_Designation.csv", index=False)



loging()
time.sleep(15)
scrap_available_profie()

scrap_available_profie()