-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdebug_search.py
More file actions
55 lines (44 loc) · 1.8 KB
/
debug_search.py
File metadata and controls
55 lines (44 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import sys
import os
from dotenv import load_dotenv
load_dotenv()
# Add project root to sys.path
sys.path.append(os.getcwd())
from app.services.search_engine import RedditSearchEngine, SmartCascade, SearchConfig
from app.schemas import SearchResultPost
import praw
def test_extraction():
print("Testing data extraction...")
engine = RedditSearchEngine()
# Fetch 1 post from python (likely to have text)
posts = engine.fetch_posts("python performance", ["python"], SearchConfig(target_posts=1))
if not posts:
print("No posts found!")
return
post = posts[0]
print(f"Original Title: {post.title}")
# print(f"Original Permalink: {post.permalink}") # praw might load this lazily?
# Use SmartCascade's conversion logic manually since it's an instance method
# Mocking config/provider isn't needed for _submission_to_dict if we instantiate it
cascade = SmartCascade(SearchConfig())
data = cascade._submission_to_dict(post)
print("\n--- Extracted Data ---")
print(f"ID: {data.get('id')}")
print(f"Title: {data.get('title')}")
print(f"Content Length: {len(data.get('content', ''))}")
print(f"Content Preview: {data.get('content', '')[:50]}...")
print(f"URL: {data.get('url')}")
print(f"Permalink: {data.get('permalink')}")
print(f"Created UTC: {data.get('created_utc')} (Type: {type(data.get('created_utc'))})")
print("\n--- Pydantic Validation ---")
try:
# We need to mock fields that scoring usually adds
data['score'] = 100.0
data['tags'] = []
model = SearchResultPost(**data)
print("Validation Successful!")
print(model.model_dump_json(indent=2))
except Exception as e:
print(f"Validation Failed: {e}")
if __name__ == "__main__":
test_extraction()