SIMRAN719 · Sakshisk08 · Jan 8, 2025 · Feb 22, 2025 · Feb 23, 2025 · Feb 23, 2025
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 venv
-__pycache__
+__pycache__
+.env
diff --git a/README.md b/README.md
@@ -1 +1,2 @@
-# Project
+# Project
+testing/sk
diff --git a/SPARC b/SPARC
diff --git a/app.py b/app.py
@@ -1,16 +1,73 @@
 from flask import Flask, render_template, request, jsonify
+from fpdf import FPDF
+from docx import Document
+import os
 
-app = Flask(__name__)
+app = Flask(
+    __name__,
+    template_folder=os.path.join("Application", "templates"),
+    static_folder=os.path.join("Application", "static")
+)
+
+
+data_store = {
+    "linkedin": "",
+    "github": "",
+    "chat": []
+}
 
 @app.route('/')
-def home():
+def index():
     return render_template('index.html')
 
+@app.route('/submit_links', methods=['POST'])
+def submit_links():
+    data_store["linkedin"] = request.form.get("linkedin")
+    data_store["github"] = request.form.get("github")
+    return jsonify({"message": "Links submitted successfully!"})
+
 @app.route('/chat', methods=['POST'])
 def chat():
-    user_message = request.json.get('message')
-    response_message = f"You said: {user_message}. This is a placeholder response!"
-    return jsonify({"response": response_message})
+    user_message = request.json.get("message")
+    bot_response = f"You said: {user_message}. Tell me more!"
+
+    data_store["chat"].append({"user": user_message, "bot": bot_response})
+
+    return jsonify({"response": bot_response})
+
+@app.route('/export', methods=['POST'])
+def export():
+    export_type = request.form.get("export_type")
+
+    if export_type == "pdf":
+        pdf = FPDF()
+        pdf.add_page()
+        pdf.set_font("Arial", size=12)
+        pdf.cell(200, 10, txt="Chatbot Interaction Export", ln=True, align='C')
+        pdf.cell(200, 10, txt=f"LinkedIn: {data_store['linkedin']}", ln=True)
+        pdf.cell(200, 10, txt=f"GitHub: {data_store['github']}", ln=True)
+
+        for conversation in data_store["chat"]:
+            pdf.cell(200, 10, txt=f"User: {conversation['user']}", ln=True)
+            pdf.cell(200, 10, txt=f"Bot: {conversation['bot']}", ln=True)
+
+        pdf.output("chat_export.pdf")
+        return jsonify({"message": "Exported as PDF successfully!", "file": "chat_export.pdf"})
+
+    elif export_type == "word":
+        doc = Document()
+        doc.add_heading("Chatbot Interaction Export", level=1)
+        doc.add_paragraph(f"LinkedIn: {data_store['linkedin']}")
+        doc.add_paragraph(f"GitHub: {data_store['github']}")
+
+        for conversation in data_store["chat"]:
+            doc.add_paragraph(f"User: {conversation['user']}")
+            doc.add_paragraph(f"Bot: {conversation['bot']}")
+
+        doc.save("chat_export.docx")
+        return jsonify({"message": "Exported as Word successfully!", "file": "chat_export.docx"})
+
+    return jsonify({"message": "Invalid export type!"})
 
 if __name__ == '__main__':
     app.run(debug=True)
diff --git a/application/app/github.py b/application/app/github.py
@@ -0,0 +1,74 @@
+import json
+import requests
+import base64
+import json
+
+def save_json(filename , json_data):
+    with open('{}.json'.format(filename), 'w') as fp:
+        json.dump(json_data, fp , indent= True)
+
+
+def extract_data(DataNeeded, DataFromGithub, ):
+    Data = {}
+    for (k, v) in DataFromGithub.items():
+
+            if k in DataNeeded:
+                Data[k] = v
+
+    return Data
+
+class User:
+    def __init__(self, Username):
+        self.Username = Username
+        self.UserURL = 'https://api.github.com/users/{}'.format(self.Username)        
+
+    def get_user_stats(self):
+        user_data = requests.get(self.UserURL).json()
+        data_needed = [
+            'name',
+            'type',
+            'company',
+            'blog',
+            'location',
+            'email',
+            'public_repos',
+            'followers'
+        ]
+        self.UserData = extract_data(data_needed, user_data)
+
+        repos_url = 'https://api.github.com/users/{}/repos'.format(self.Username)
+        repos_data = requests.get(repos_url).json()
+
+        repos_list = []
+
+        for repo in repos_data:
+            repo_info = {"name": repo.get("name")}
+            readme_url = "https://api.github.com/repos/{}/{}/readme".format(self.Username, repo.get("name"))
+            readme_response = requests.get(readme_url)
+            if readme_response.status_code == 200:
+                readme_json = readme_response.json()
+                if "content" in readme_json and readme_json.get("encoding") == "base64":
+                    try:
+                        decoded_content = base64.b64decode(readme_json["content"]).decode('utf-8')
+                        if len(decoded_content) > 5000:
+                            decoded_content = decoded_content[:5000]
+                        repo_info["readme"] = decoded_content
+                    except Exception as e:
+                        repo_info["readme"] = "Error decoding README: {}".format(e)
+                else:
+                    repo_info["readme"] = None
+            else:
+                repo_info["readme"] = None  
+
+            repos_list.append(repo_info)
+
+        self.UserData["repositories"] = repos_list
+
+        save_json('output_of_User', self.UserData)
+
+        return json.dumps(self.UserData, indent=4)
+
+# username_input = 'username'
+# user = User(username_input)
+# data = user.get_user_stats()
+# print(data)
diff --git a/application/app/linkedin.py b/application/app/linkedin.py
@@ -0,0 +1,161 @@
+import os
+import json
+import time
+from dotenv import load_dotenv
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.common.exceptions import TimeoutException, NoSuchElementException
+from selenium.webdriver.chrome.options import Options
+
+chrome_options = Options()
+chrome_options.add_argument("--disable-gpu")  
+# chrome_options.add_argument("--headless")  
+chrome_options.add_argument("--no-sandbox") 
+
+driver = webdriver.Chrome(options=chrome_options)
+
+load_dotenv()
+LINKEDIN_USERNAME = os.getenv('LINKEDIN_USERNAME')
+LINKEDIN_PASSWORD = os.getenv('LINKEDIN_PASSWORD')
+
+def init_driver():
+    """Initialize Selenium ChromeDriver with options."""
+    options = webdriver.ChromeOptions()
+    # options.add_argument('--headless')
+    options.add_argument('--no-sandbox')
+    options.add_argument('--disable-dev-shm-usage')
+    driver = webdriver.Chrome(options=options)
+    driver.maximize_window()
+    return driver
+
+def linkedin_login(driver):
+    """Logs into LinkedIn using credentials from environment variables."""
+    driver.get("https://www.linkedin.com/login")
+    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username")))
+
+    driver.find_element(By.ID, "username").send_keys(LINKEDIN_USERNAME)
+    driver.find_element(By.ID, "password").send_keys(LINKEDIN_PASSWORD)
+    driver.find_element(By.XPATH, "//button[@type='submit']").click()
+
+    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "global-nav-search")))
+
+def scrape_linkedin_profile(driver, profile_url):
+    """Scrapes LinkedIn profile data and returns a dictionary."""
+    data = {}
+    driver.get(profile_url)
+    time.sleep(5)  
+
+    try:
+        name_element = driver.find_element(By.CSS_SELECTOR, "h1.text-heading-xlarge")
+        data['name'] = name_element.text
+    except NoSuchElementException:
+        data['name'] = None
+
+    try:
+        company_element = driver.find_element(By.XPATH, "//div[contains(@class, 'pv-entity__secondary-title')]")
+        data['company'] = company_element.text.strip()
+    except NoSuchElementException:
+        data['company'] = None
+
+    try:
+        education_section = driver.find_element(By.ID, "education-section")
+        education_items = education_section.find_elements(By.CSS_SELECTOR, "li")
+        education_list = [item.text for item in education_items]
+        data['education'] = education_list
+    except NoSuchElementException:
+        data['education'] = []
+
+    try:
+        licenses_section = driver.find_element(By.ID, "licenses-certifications-section")
+        license_items = licenses_section.find_elements(By.CSS_SELECTOR, "li")
+        licenses_list = [item.text for item in license_items]
+        data['licenses_certifications'] = licenses_list
+    except NoSuchElementException:
+        data['licenses_certifications'] = []
+
+    try:
+        projects_section = driver.find_element(By.ID, "projects-section")
+        project_items = projects_section.find_elements(By.CSS_SELECTOR, "li")
+        projects_list = [item.text for item in project_items]
+        data['projects'] = projects_list
+    except NoSuchElementException:
+        data['projects'] = []
+
+    try:
+        volunteering_section = driver.find_element(By.ID, "volunteering-section")
+        volunteering_items = volunteering_section.find_elements(By.CSS_SELECTOR, "li")
+        volunteering_list = [item.text for item in volunteering_items]
+        data['volunteering'] = volunteering_list
+    except NoSuchElementException:
+        data['volunteering'] = []
+
+    try:
+        skills_section = driver.find_element(By.ID, "skills-section")
+        skill_items = skills_section.find_elements(By.CSS_SELECTOR, "li")
+        skills_list = [item.text for item in skill_items]
+        data['skills'] = skills_list
+    except NoSuchElementException:
+        data['skills'] = []
+
+    try:
+        recommendations_section = driver.find_element(By.ID, "recommendations-section")
+        recommendation_items = recommendations_section.find_elements(By.CSS_SELECTOR, "li")
+        recommendations_list = [item.text for item in recommendation_items]
+        data['recommendations'] = recommendations_list
+    except NoSuchElementException:
+        data['recommendations'] = []
+
+    try:
+        honors_section = driver.find_element(By.ID, "honors-awards-section")
+        honors_items = honors_section.find_elements(By.CSS_SELECTOR, "li")
+        honors_list = [item.text for item in honors_items]
+        data['honors_awards'] = honors_list
+    except NoSuchElementException:
+        data['honors_awards'] = []
+
+    try:
+        posts_tab = driver.find_element(By.XPATH, "//a[contains(@href, '/detail/recent-activity/')]")
+        posts_tab.click()
+        time.sleep(5)  # Wait for posts to load
+        posts = driver.find_elements(By.CSS_SELECTOR, "div.feed-shared-update-v2")
+        posts_captions = []
+        for post in posts:
+            try:
+                caption = post.find_element(By.CSS_SELECTOR, "span.break-words").text
+                posts_captions.append(caption)
+            except NoSuchElementException:
+                continue
+        data['posts_captions'] = posts_captions
+    except NoSuchElementException:
+        data['posts_captions'] = []
+
+    return data
+
+def save_data(data, json_filename="linkedin_data.json", txt_filename="linkedin_data.txt"):
+    """Saves the scraped data to JSON and TXT files."""
+    with open(json_filename, "w", encoding="utf-8") as json_file:
+        json.dump(data, json_file, indent=4, ensure_ascii=False)
+
+    with open(txt_filename, "w", encoding="utf-8") as txt_file:
+        for key, value in data.items():
+            txt_file.write(f"{key}:\n{value}\n\n")
+
+if __name__ == "__main__":
+    # profile_url = input("Enter the LinkedIn profile URL to scrape: ")
+    # Testing with sans profile
+    profile_url ="https://www.linkedin.com/in/sanskriti-joshi-408575266/"
+    driver = init_driver()
+    try:
+        linkedin_login(driver)
+        time.sleep(3) 
+        scraped_data = scrape_linkedin_profile(driver, profile_url)
+        save_data(scraped_data)
+        print("Scraping complete. Data saved in 'linkedin_data.json' and 'linkedin_data.txt'.")
+    finally:
+        try:
+            driver.quit()
+        except OSError as e:
+            print("Error terminating service process (ignored):", e)
+