Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 68 additions & 1 deletion api/app/main.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from fastapi import Depends, FastAPI
from fastapi import Depends, FastAPI, BackgroundTasks, Query
from fastapi.middleware.cors import CORSMiddleware
from typing import Optional, List

from .schemas.events import GetEventRecommendationsRequest, GetEventRecommendationsResponse
from .schemas.tweets import Tweet, TweetList
from .routers import auth
from .services.activity_suggestion_generator import ActivitySuggestionGenerator
from .services.context_aggregator import ContextAggregator
from .utils.keyword_extractor import extract_keywords_from_events
from .utils.scrapers.scrape_tweets import search_tweets_for_event, write_tweets_to_db

app = FastAPI(title="Pear Programming API", version="0.1.0")

Expand Down Expand Up @@ -45,11 +49,74 @@ def get_activity_suggestion_generator(
)
async def get_event_recommendations(
request: GetEventRecommendationsRequest,
background_tasks: BackgroundTasks,
generator: ActivitySuggestionGenerator = Depends(get_activity_suggestion_generator),
) -> GetEventRecommendationsResponse:
"""Return activity recommendations tailored to the caller's preferences."""
events = generator.generate_suggestions(
number_events=request.number_events,
response_preferences=request.response_preferences,
)

# Automatically extract keywords from events and scrape tweets in background
# Wrap in try-except to ensure any errors don't affect the main response
if events:
try:
keywords = extract_keywords_from_events(events, max_keywords=30)
if keywords:
# Create a safe wrapper function that handles exceptions internally
def safe_scrape_tweets():
try:
write_tweets_to_db(
limit=20,
activity_keywords=keywords,
location_terms=None # Use default location terms
)
except Exception as scrape_error:
# Silently fail in background - logging can be added if needed
import logging
logging.getLogger(__name__).debug(
f"Background tweet scraping failed: {scrape_error}"
)

# Trigger tweet scraping in background (non-blocking)
background_tasks.add_task(safe_scrape_tweets)
except Exception as e:
# Log error but don't fail the request
import logging
logging.getLogger(__name__).warning(f"Failed to extract keywords or schedule tweet scraping: {e}")

return GetEventRecommendationsResponse(events=events)


@app.get(
"/tweets",
response_model=TweetList,
response_model_exclude_none=True,
summary="Get tweets for events",
)
async def get_tweets(
limit: int = Query(default=10, ge=1, le=50, description="Number of tweets to return"),
keywords: Optional[str] = Query(default=None, description="Comma-separated keywords to filter tweets"),
event_title: Optional[str] = Query(default=None, description="Event title to search tweets for"),
) -> TweetList:
"""Retrieve tweets from the database or fetch live tweets for a specific event."""
from .utils.scrapers.scrape_tweets import get_tweets as get_tweets_from_db

filter_keywords = [kw.strip() for kw in keywords.split(",") if kw.strip()] if keywords else None

if event_title:
live_tweets = search_tweets_for_event(
event_title,
extra_keywords=filter_keywords,
limit=limit,
)
return TweetList(tweets=[Tweet.model_validate(tweet) for tweet in live_tweets])

tweets = get_tweets_from_db(
limit=limit,
threshold_hours_for_refresh=2,
filter_keywords=filter_keywords,
)

return TweetList(tweets=[Tweet.model_validate(tweet) for tweet in tweets])
2 changes: 1 addition & 1 deletion api/app/schemas/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __eq__(self, other: object) -> bool: # pragma: no cover - tiny helper
class Event(BaseModel):
"""Represents a suggested activity."""

location: Location = Field(..., description="Cartesian location coordinates")
location: Location = Field(..., description="Location coordinates where x is latitude and y is longitude")
name: str = Field(..., description="Human-readable name for the event")
emoji: str = Field(..., description="Emoji summarizing the event vibe")
event_score: float = Field(
Expand Down
30 changes: 30 additions & 0 deletions api/app/schemas/tweets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Pydantic schemas for tweet responses."""

from __future__ import annotations

from datetime import datetime
from typing import List, Optional

from pydantic import BaseModel, Field


class Tweet(BaseModel):
"""Tweet model for API responses."""

id: int = Field(..., description="Tweet database ID")
text: str = Field(..., description="Tweet text content")
like_count: int = Field(default=0, description="Number of likes")
retweet_count: int = Field(default=0, description="Number of retweets")
created_at: Optional[datetime] = Field(default=None, description="When the tweet was created on X")
scraped_at: datetime = Field(..., description="When the tweet was scraped and stored")
username: Optional[str] = Field(default=None, description="X username associated with the tweet")

class Config:
from_attributes = True # Enable ORM mode for SQLAlchemy models


class TweetList(BaseModel):
"""Response containing a list of tweets."""

tweets: List[Tweet] = Field(default_factory=list, description="List of tweets")

43 changes: 40 additions & 3 deletions api/app/services/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@
(
"You transform contextual information into structured event recommendations. "
"Always reply using the EventList schema with fields: name, description, emoji, "
"event_score (0-10), location (x,y coordinates), and optional link."
"event_score (0-10), location (x,y where x is latitude and y is longitude), and optional link.\n\n"
"IMPORTANT: When eventbrite_events are provided in the context, you MUST use the exact "
"latitude and longitude values from those events for the location field. Each event should "
"have unique coordinates based on the actual locations in the provided Eventbrite data. "
"Do not use the same coordinates for all events - extract and use the specific "
"latitude/longitude pairs from each Eventbrite event."
),
),
(
Expand Down Expand Up @@ -91,10 +96,42 @@ def _format_context(context: Any) -> str:
if isinstance(context, str):
return context
if isinstance(context, Mapping):
# Format Eventbrite events more prominently with coordinates
formatted_context = dict(context)
if "eventbrite_events" in formatted_context:
eventbrite_events = formatted_context["eventbrite_events"]
if isinstance(eventbrite_events, list) and eventbrite_events:
# Create a readable summary highlighting coordinates
formatted_events_summary = []
formatted_events_summary.append(
"IMPORTANT: Use the exact latitude/longitude coordinates below for event locations:\n"
)
for idx, event in enumerate(eventbrite_events, 1):
if isinstance(event, dict):
event_str = f"{idx}. {event.get('activity_name', 'Event')}"
if "latitude" in event and "longitude" in event:
event_str += (
f"\n COORDINATES: latitude={event['latitude']}, "
f"longitude={event['longitude']} "
f"(use these values as location=[latitude, longitude])"
)
if "location_name" in event:
event_str += f"\n Venue: {event['location_name']}"
if "url" in event:
event_str += f"\n URL: {event['url']}"
formatted_events_summary.append(event_str)

# Add summary at the beginning, then include full JSON
summary_text = "\n".join(formatted_events_summary)
try:
json_str = json.dumps(formatted_context, indent=2)
return f"{summary_text}\n\n--- Full Context JSON ---\n{json_str}"
except TypeError:
return f"{summary_text}\n\n--- Full Context ---\n{str(formatted_context)}"
try:
return json.dumps(context, indent=2)
return json.dumps(formatted_context, indent=2)
except TypeError:
return str(dict(context))
return str(formatted_context)
if isinstance(context, Iterable) and not isinstance(context, (bytes, bytearray)):
return "\n".join(str(item) for item in context)
return str(context)
Expand Down
157 changes: 157 additions & 0 deletions api/app/utils/keyword_extractor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
"""Extract activity keywords from events for tweet searching."""

import re
from typing import List
from collections import Counter

from ..schemas.events import Event

# Common stop words to exclude from keyword extraction
STOP_WORDS = {
"a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for",
"of", "with", "by", "from", "as", "is", "are", "was", "were", "be",
"been", "being", "have", "has", "had", "do", "does", "did", "will",
"would", "could", "should", "may", "might", "must", "can", "this",
"that", "these", "those", "i", "you", "he", "she", "it", "we", "they",
"what", "which", "who", "whom", "whose", "where", "when", "why", "how",
"all", "each", "every", "both", "few", "more", "most", "other", "some",
"such", "only", "own", "same", "so", "than", "too", "very", "just",
"now", "then", "here", "there", "where", "why", "how", "also", "about",
"into", "through", "during", "before", "after", "above", "below", "up",
"down", "out", "off", "over", "under", "again", "further", "once",
"edinburgh", "join", "local", "bring", "free", "get", "see", "visit"
}

# Activity-related patterns to prioritize
ACTIVITY_PATTERNS = [
r'\b(festival|fest|celebration)\w*\b',
r'\b(concert|music|gig|performance|show|gig)\w*\b',
r'\b(market|bazaar|fair|stall|vendor)\w*\b',
r'\b(food|cuisine|dining|restaurant|cafe|brunch|dinner|breakfast|lunch)\w*\b',
r'\b(exhibition|museum|gallery|art|display)\w*\b',
r'\b(workshop|class|lesson|course|training)\w*\b',
r'\b(meetup|meeting|gathering|social|event)\w*\b',
r'\b(hike|walk|trail|outdoor|park|nature)\w*\b',
r'\b(tour|guided|walking|exploration)\w*\b',
r'\b(yoga|fitness|exercise|sport|sports)\w*\b',
r'\b(comedy|theater|theatre|play|drama)\w*\b',
r'\b(workshop|craft|artisan|handmade)\w*\b',
r'\b(popup|pop-up|pop up)\w*\b',
r'\b(live|entertainment|venue)\w*\b',
]


def extract_keywords_from_events(events: List[Event], max_keywords: int = 30) -> List[str]:
"""
Extract activity-related keywords from a list of events.

Prioritizes activity-related terms from event names and descriptions.
Returns a list of keywords suitable for X API query building.

Args:
events: List of Event objects to extract keywords from
max_keywords: Maximum number of keywords to return (default: 30)

Returns:
List of keyword strings, prioritized by relevance
"""
if not events:
return []

# Collect all text from events
all_text = []
for event in events:
if event.name:
all_text.append(event.name.lower())
if event.description:
all_text.append(event.description.lower())

combined_text = " ".join(all_text)

# Extract words (handle multi-word terms by preserving common phrases)
# First, identify and preserve multi-word activity terms
preserved_phrases = []
for pattern in ACTIVITY_PATTERNS:
matches = re.findall(pattern, combined_text, re.IGNORECASE)
preserved_phrases.extend([m.lower() if isinstance(m, str) else m[0].lower() for m in matches])

# Tokenize text, handling multi-word terms
# Split on common delimiters but preserve quoted phrases
words = re.findall(r'\b\w+\b', combined_text)

# Combine single words and preserved phrases
all_terms = words + preserved_phrases

# Count term frequencies
term_counts = Counter(all_terms)

# Filter and score terms
keywords = []
keyword_scores = {}

for term, count in term_counts.items():
# Skip stop words and very short terms
if term in STOP_WORDS or len(term) < 3:
continue

# Skip pure numbers
if term.isdigit():
continue

# Calculate score: frequency + bonus for activity patterns
score = count
if any(re.search(pattern, term, re.IGNORECASE) for pattern in ACTIVITY_PATTERNS):
score += 5 # Boost activity-related terms

keyword_scores[term] = score
keywords.append(term)

# Sort by score and remove duplicates (preserving order)
keywords_sorted = sorted(set(keywords), key=lambda k: keyword_scores.get(k, 0), reverse=True)

# Handle multi-word terms from preserved phrases separately
# Extract common 2-word phrases from text
two_word_phrases = []
for text in all_text:
words_list = text.split()
for i in range(len(words_list) - 1):
phrase = f"{words_list[i]} {words_list[i+1]}"
# Only keep phrases that seem activity-related
if any(re.search(pattern, phrase, re.IGNORECASE) for pattern in ACTIVITY_PATTERNS):
two_word_phrases.append(phrase.lower())

two_word_counts = Counter(two_word_phrases)
two_word_keywords = [
phrase for phrase, count in two_word_counts.most_common(10)
if count >= 1 and phrase not in STOP_WORDS
]

# Combine single keywords with multi-word phrases, prioritizing phrases
# Create a set of words from two-word phrases for efficient lookup
two_word_words = set()
for phrase in two_word_keywords:
two_word_words.update(phrase.split())

# Only add single keywords that don't already appear in the two-word phrases
final_keywords = two_word_keywords + [
k for k in keywords_sorted
if k not in two_word_keywords and k not in two_word_words
]

# Limit to max_keywords
return final_keywords[:max_keywords]


def extract_keywords_from_event(event: Event, max_keywords: int = 10) -> List[str]:
"""
Extract keywords from a single event for filtering tweets.

Args:
event: Event object to extract keywords from
max_keywords: Maximum number of keywords to return (default: 10)

Returns:
List of keyword strings relevant to the event
"""
return extract_keywords_from_events([event], max_keywords=max_keywords)

Loading