Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added __pycache__/profileExtractor.cpython-310.pyc
Binary file not shown.
113 changes: 44 additions & 69 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,45 @@

import ollama
import os
import time
import profileExtractor

os.system("cls")

custom_information = "You are a Reddit Profile Analyzer. I will give you the following information about a reddit account: username, total karma, date of creation of the account, list of subreddits the user has posted in, posts of the user, and mod status (if the user is a moderator or not). Your job is to return the following details of the account (based on the info I gave you) organized in bullet points: - Real name of the person (if found or based on assumptions of information in posts, if not, write 'NOT FOUND') - Gender (if found or based on assumptions of information in posts, if not, write 'NOT FOUND') - Occupation (if found or based on assumptions of information in posts, if not, write 'NOT FOUND') - Country (if found or based on assumptions of information in posts, if not, write 'NOT FOUND') - City (if found or based on assumptions of information in posts, if not, write 'NOT FOUND') - Overview description of the person (paragraph/paragraphs) - Psychological analysis (paragraph/paragraphs) (based on assumptions and information in posts) - General assumptions (paragraph/paragraphs) (based on assumptions and information in posts) - Brief conclusion (paragraph/paragraphs) (summary of the profile) If you cannot find some of the information nor have any assumptions based on the content, you will write 'NOT FOUND' in the corresponding not found information. The information should be presented directly without any introductory phrases like 'here is the info' or 'this is what I found'."

modelfile = f"""
from llama3
system {custom_information}
"""

# Start time of model creation
start_time = time.time()

print("\033[34mCreating model...\033[0m")

try:

ollama.create(model="redditalan", modelfile=modelfile)

except Exception as e:

print("\033[31mError creating model\033[0m")
print(e)
exit()

# End time of model creation
end_time = time.time()

print("\033[34mModel created in", end_time - start_time, "seconds\033[0m")
print("\033[32mModel ready to chat\033[0m")

username = input("\n Enter the username of the Reddit user (without u/): ")

#Extract information from the user
print("\033[34mExtracting information...\033[0m")
userdata = profileExtractor.getUserInfo(username)

print("\033[32mInformation extracted successfully\033[0m")
print("\033[34mFormatting information...\033[0m")
#Format the information extracted
userString = profileExtractor.infoFormatter(userdata[0], userdata[1], userdata[2], userdata[3], userdata[4], userdata[5])
print("\033[32mInformation formatted successfully)\033[0m")


print("\033[34mThe model is thinking...\033[0m")

#Time it takes to run the prompt
start_time2 = time.time()
#Chat with the model
response = ollama.chat(model="redditalan", messages=[{

'role': 'user',
'content': userString

}])
print(f"\033[34mModel took {time.time() - start_time2} seconds to answer \033[0m")

print("\033[32mModel has responded\033[0m")

#Print the response
print(f"\n[BOT] {response["message"]["content"]}")

#Save response["message"]["content"] in a txt file
with open(f"./profiles/{username}_profile.txt", "w") as file:
file.write(response["message"]["content"])
from google import genai

# Replace with your actual API key
GEMINI_API_KEY = "AIzaSyAiZuxpcEG6sFLJewy9cEz-7ZwSUf_Ua8M" # Replace with your API key

def analyze_reddit_profile(file_path):
if not os.path.exists(file_path):
print(f"File '{file_path}' does not exist.")
return

# Read the content of the text file
with open(file_path, "r", encoding="utf-8") as file:
reddit_data = file.read()

# Initialize the GenAI client
client = genai.Client(api_key=GEMINI_API_KEY)

# Send the request to the GenAI API
try:
instruction = (
"Analyze the following Reddit data and extract the user's name, age, location, "
"occupation, interests, and any other notable insights:\n\n"
)
response = client.models.generate_content(
model="gemini-2.5-flash", contents=instruction + reddit_data
)
analysis = response.text

# Save insights to a new file
output_file = file_path.replace(".txt", "_insights.txt")
with open(output_file, "w", encoding="utf-8") as output:
output.write("Reddit Profile Insights:\n")
output.write("-" * 40 + "\n")
output.write(analysis)

print(f"Insights saved to {output_file}")

except Exception as e:
print(f"Error while connecting to GenAI API: {e}")

if __name__ == "__main__":
username = input("Enter the Reddit username: ")
file_path = f"./userdata/{username}_reddit_data.txt"
analyze_reddit_profile(file_path)
7 changes: 7 additions & 0 deletions modelfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM llama3:8b

# Optional: Add system prompt or fine-tune parameters
SYSTEM "You are a helpful assistant for analyzing Reddit user data."

# Optional: Preload some context or file
# COPY your_data.txt /data/your_data.txt
93 changes: 0 additions & 93 deletions profileExtractor.py

This file was deleted.

15 changes: 15 additions & 0 deletions profiles/Amogh_17_profile.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Here are the details about the Reddit account:

**Real name**: NOT FOUND
**Gender**: NOT FOUND
**Occupation**: NOT FOUND
**Country**: India (based on comments and posts)
**City**: Pune, Mumbai, Indore, Maharashtra (based on comments and posts)

**Overview description of the person**: The user seems to be an Indian individual who is passionate about various topics such as cricket, food, memes, and his hometown of Pune. He appears to have strong opinions and a sense of humor, often using sarcasm and humor in his comments.

**Psychological analysis**: Based on the comments and posts, it can be inferred that the user has a tendency to express himself strongly, often using hyperbole and exaggeration. He seems to enjoy making jokes and poking fun at others, but also shows empathy towards certain individuals and groups. The user's language is often informal and conversational, suggesting a casual tone.

**General assumptions**: It appears that the user is likely a young adult or an older teenager who has grown up in India. His love for Indian culture, particularly Maharashtrian cuisine and Marathi language, suggests a strong connection to his regional identity. He may also have some biases towards certain groups of people based on his comments.

**Brief conclusion**: The Reddit user, "Amogh_17", is an enthusiastic and opinionated individual who enjoys sharing his thoughts and experiences with others. His posts and comments showcase his love for Indian culture, humor, and strong opinions, but may also reflect some biases and prejudices.
31 changes: 31 additions & 0 deletions profiles/AravRAndG_profile.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
**Reddit Profile Analysis**

Based on the provided information, here are some key insights about the Reddit account:

1. **Username**: The username is not provided, making it difficult to analyze the profile based on this factor.
2. **Post Frequency**: With a total of 1138 comments, it appears that the user is an active contributor to various subreddits.
3. **Comment Quality**: The comments appear to be a mix of humor, frustration, and personal experiences. Some comments may be considered off-topic or irrelevant to the original post, while others show a willingness to engage with other users.
4. **Subreddit Participation**: The user has participated in various subreddits, including but not limited to:
* IndianGaming
* Minecraftbuilds
* AmongUsDiscord
* AmongUsGameCodes
* memes
* cursedcomments
* HistoryMemes
* Minecraftseeds

This suggests that the user has a broad range of interests, including gaming, pop culture, and humor.

5. **Personal Experiences**: The comments reveal personal experiences and struggles related to education, family dynamics, and relationships. These insights may provide valuable information about the user's background and motivations.
6. **Language Proficiency**: The user appears to be proficient in English, as the comments are written primarily in this language. However, there is also mention of Hindi, which suggests that the user may have a native or non-native proficiency in this language.

**Recommendations**

Based on this analysis, I would recommend:

1. **Engage with other users**: The user's willingness to engage with others and share personal experiences could lead to meaningful connections and potentially even friendships.
2. **Focus on specific subreddits**: Given the user's interest in gaming and pop culture, focusing on specific subreddits like r/gaming or r/memes might be more effective in terms of engagement and reputation building.
3. **Develop a unique tone and style**: The user's humor and frustration could be leveraged to create a distinct tone that sets them apart from other users. This would help build a loyal following and make their comments stand out.

By focusing on these areas, the Reddit account can continue to grow and thrive while providing value to other users.
35 changes: 35 additions & 0 deletions profiles/Haramdour_profile.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
**Reddit Profile Analyzer Report**

**Username:** [Not provided]

**Account Type:** [Not provided]

**Age:** The user's age is not specified, but based on their comments and references to current events, it appears they are likely a millennial (born between the early 1980s and the late 1990s).

**Interests:**

1. **Harry Potter**: The user has commented extensively about the Harry Potter series, sharing opinions and insights.
2. **Gaming**: They have mentioned various games, including Baldur's Gate III and Horizon Forbidden West.
3. **Teaching**: Their comments suggest they have experience or interest in teaching, with discussions on lesson planning and classroom dynamics.
4. **Pop culture**: The user has engaged with topics like TV shows, movies, and music (e.g., discussing Chris Brown and R. Kelly).
5. **Social media**: They have expressed opinions on social media platforms, including Twitter, Facebook, Instagram, and TikTok.

**Personality Traits:**

1. **Wit and humor**: The user's comments often display a sense of humor and quick wit.
2. **Passion for discussion**: They engage with others, sharing their thoughts and listening to different perspectives.
3. **Pop culture savvy**: Their knowledge of popular media is evident in many comments.

**Concerns and Fears:**

1. **Age-related anxiety**: The user has expressed concerns about aging and losing one's youthful appearance (e.g., worrying about wrinkles).
2. **Social media fatigue**: They have mentioned deleting social media accounts or feeling overwhelmed by the constant influx of information online.
3. **Fear of missing out (FOMO)**: A few comments hint at feelings of FOMO, particularly in regards to cultural and entertainment trends.

**Life Experiences:**

1. **Marriage and family**: The user has mentioned their wife and children, indicating a stable personal life.
2. **Career or education**: Their experience as a teacher and interest in gaming suggest a background in education or a related field.
3. **Hobbies and interests**: They have shared enthusiasm for hobbies like Lego building and gardening.

**Overall:** This Reddit user appears to be a well-rounded individual with diverse interests, a sense of humor, and a willingness to engage in discussions on various topics.
16 changes: 16 additions & 0 deletions profiles/nthaleph_profile.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Here is the information you requested:

* Name: nthaleph
* Karma: 425
* Date of creation: 2019-06-29 20:22:46
* Posts:
+ Post 1 (2020-12-19 18:07:14): The Myki browser extension for Chrome has a feature where it will only offer to autofill passwords from the currently selected profile. However, this feature doesn't work on iPhone/iPad using Safari or apps. If I visit a site with multiple logins, it shows all of them no matter what profile I have selected.
+ Post 2 (2020-01-24 15:00:39): [No content]
+ Post 3 (2020-01-07 00:49:02): [No content]
+ Post 4 (2019-08-06 14:09:42): [No content]
+ Post 5 (2019-08-05 12:18:28): [No content]
+ Post 6 (2019-07-30 14:15:56): I'm planning to put in a driveway and retaining wall. One contractor suggested doing the driveway first, while another suggested doing the retaining wall first. Should I do one or the other? What factors should I consider?
+ Post 7 (2019-07-27 01:41:58): [No content]
+ Post 8 (2019-07-22 16:47:53): [No content]
+ Post 9 (2019-07-19 15:56:43): [No content]
+ Post 10 (2019-07-12 14:46:45): **TLDR:** Does EU Customs really calculate the Value of Goods based on the amount a backer pledges, rather than the cost of manufacturing/estimated MSRP?
56 changes: 56 additions & 0 deletions reddit-profile-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import praw
import os
from prawcore.exceptions import NotFound, Forbidden, TooManyRequests

# Replace with your Reddit API credentials
reddit = praw.Reddit(
client_id="Hrqi5ojWmfv4zwS3tab0fQ",
client_secret="_K6tn95hLrbiikMVSZJtPK0VuyNFzw",
user_agent="script:reddit_user_scraper:v1.0 (by u/yourusername)",
)

def get_user_data(username):
try:
user = reddit.redditor(username)
_ = user.id # Will throw NotFound if user doesn't exist

if not os.path.exists("./userdata"):
os.makedirs("./userdata")

# Get total submissions and comments count
total_submissions = sum(1 for _ in user.submissions.new(limit=None))
total_comments = sum(1 for _ in user.comments.new(limit=None))

with open(f"./userdata/{username}_reddit_data.txt", "w", encoding="utf-8") as file:
file.write(f"Fetching submissions for u/{username}...\n\n")
count = 0
for submission in user.submissions.new(limit=None): # fetch max allowed (~1000)
file.write(f"Title: {submission.title}\n")
file.write(f"Subreddit: {submission.subreddit}\n")
file.write("-" * 40 + "\n")
count += 1
file.write(f"\nTotal Posts Fetched: {count}/{total_submissions} ({(count/total_submissions)*100:.2f}%)\n\n")

file.write(f"Fetching comments for u/{username}...\n\n")
count = 0
for comment in user.comments.new(limit=None): # fetch max allowed (~1000)
file.write(f"Subreddit: {comment.subreddit}\n")
file.write(f"Comment: {comment.body}\n")
file.write("-" * 40 + "\n")
count += 1
file.write(f"\nTotal Comments Fetched: {count}/{total_comments} ({(count/total_comments)*100:.2f}%)\n")

print(f"Data saved to ./userdata/{username}_reddit_data.txt")

except NotFound:
print(f"User '{username}' not found.")
except Forbidden:
print(f"Access to user '{username}' is forbidden (maybe suspended).")
except TooManyRequests:
print("Rate limit exceeded. Try again later.")
except Exception as e:
print(f"Unexpected error: {e}")

if __name__ == "__main__":
username = input("Enter Reddit username: ")
get_user_data(username)
Loading