MindY/app.py at main · ana-morais57/MindY · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
import streamlit as st
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
import openai
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re


# Load environment variables
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

# Initialize YouTube API
youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY)

# Streamlit App
st.title("Mindy")
st.subheader("Discover mindfulness techniques and personalized video recommendations for relaxation.")

st.write("Enter your query to get recommendations.")

# Categories for user guidance
categories = {
    "Mindfulness and Meditation": ["mindfulness", "guided meditation", "focus meditation"],
    "Breathing Exercises" : ["deep breathing techniques", "4-7-8 breathing", "box breathing"],
    "Somatic Practices":  ["yoga for relaxation", "tai chi", "progressive muscle relaxation"],
}

# Generate GPT-4 Recommendations
def generate_gpt_recommendations(query, selected_category):
    try:
        # Dynamically adjust the prompt based on the selected category
        if selected_category != "All":
            category_instruction = (
                f"Focus your recommendations specifically on the category: **{selected_category}**. "
                "Only suggest techniques and advice that are directly relevant to this category. "
                "If the query falls outside the scope of this category, state: 'The provided context does not contain this information.'"
            )
        else:
            category_instruction = (
                "Provide general recommendations not limited to any specific category. "
                "Cover a diverse range of actionable techniques related to mindfulness, relaxation, and self-help."
            )

        gpt_prompt = f"""
        ## SYSTEM ROLE
        You are an expert chatbot designed to provide actionable, insightful, and personalized advice on **Mindfulness**, **Relaxation**, and **Self-Help Techniques**.
        If a query is unrelated to these topics, politely inform the user and avoid generating recommendations.
        Based on the user's query, provide **specific techniques** the user can apply, along with actionable advice

        ## USER QUESTION
        The user has asked:
        "{query}"

        ## CATEGORY CONTEXT
        {category_instruction}

        ## GUIDELINES
        1. **Accuracy**:
           - Provide actionable techniques tailored to the query
           - Prioritize recommendations from CATEGORY CONTEXT and base your suggestions on the user's query.
           - If the answer cannot be found, explicitly state: "The provided context does not contain this information."
           - Use actionable language to recommend techniques for relaxation and mindfulness.

        2. **Actionable Techniques**: Suggest **specific techniques** the user can apply (e.g., guided breathing, mindfulness meditation, progressive muscle relaxation). Explain:
           - How the technique works.
           - How it helps address the query.
           - Steps to apply it.

        3. **Clarity**:
           - Use simple, professional, and user-friendly language.
           - Ensure the response is well-structured and formatted in Markdown for readability.

        4. **Category Relevance**:
           - If a category is provided, ensure all suggestions directly address that category.
           - For example:
             - For "Stress and Anxiety Relief", focus on techniques like deep breathing, mindfulness exercises, and calming practices.
             - For "Sleep and Rest", suggest techniques like guided sleep meditations, sleep hygiene tips, and relaxing yoga poses.

        5. **Response Format**:
           - Include at least two techniques.
           - Use the following structure:

        '''
        # [Custom Title Based on the Query]
        Provide a meaningful title based on the user’s query and CATEGORY CONTEXT (e.g., "How to Relax and Sleep Better").

        ## Recommendations
        1. **[Actionable Advice]**: Actionable and insightful advice.
        1. **[Technique Name]**: Detailed explanation of the technique, why it works, and how to apply it].
        2. **[Another Technique Name]**: [Detailed explanation].


        ## Note
        Focus on actionable advice. Avoid vague suggestions.
        '''
        """

        response = openai.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an expert assistant."},
                {"role": "user", "content": gpt_prompt},
            ],
            temperature=0.7,
            max_tokens=1500,
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error generating recommendations: {e}"

# Extract techniques from GPT recommendations
def extract_techniques_and_keywords(gpt_recommendations):
    try:
        techniques_with_keywords = []
        lines = gpt_recommendations.splitlines()

        # Step 1: Extract the custom title
        title_keywords = []
        for line in lines:
            if line.startswith("# "):  # Identify the custom title
                title = line[2:].strip()  # Remove the "# " prefix and extract the title
                # Extract keywords from the title
                title_keywords = [
                    word.lower()
                    for word in title.split()
                    if len(word) > 3  # Filter out short/common words
                ]
                break  # Title extraction is complete, no need to continue

        # Step 2: Extract technique name and add title keywords
        for i, line in enumerate(lines):
            if line.startswith("1.") or line.startswith("2."):
                technique_name = line.split("**")[1]  # Extract text within "**[Technique Name]**"

                # Extract additional keywords from the technique name
                technique_keywords = [
                    word.lower()
                    for word in technique_name.split()
                    if len(word) > 3
                ]

                # Combine title keywords and technique keywords
                combined_keywords = technique_keywords + title_keywords
                techniques_with_keywords.append({"technique": technique_name, "keywords": combined_keywords})

        return techniques_with_keywords
    except Exception as e:
        st.error(f"Error extracting techniques and keywords: {e}")
        return []


# Fetch videos from YouTube API
def fetch_youtube_videos(query, max_results=10, order="relevance", video_duration=None, video_definition=None):
    try:
        request_params = {
                "q": query,
                "part": "snippet",
                "type": "video",
                "maxResults": max_results,
                "order": order,
        }

        if video_duration:  # Add video duration filter if provided
            request_params["videoDuration"] = video_duration
        if video_definition:  # Add video definition filter if provided
            request_params["videoDefinition"] = video_definition

        response = youtube.search().list(**request_params).execute()

        videos = [
            {
                "title": item["snippet"]["title"],
                "description": item["snippet"]["description"],
                "video_id": item["id"]["videoId"],
                "link": f"https://www.youtube.com/watch?v={item['id']['videoId']}",
            }
            for item in response.get("items", [])
        ]
        return videos
    except Exception as e:
        st.error(f"Error fetching videos: {e}")
        return []


def preprocess_text(text):
    # Convert to lowercase and remove special characters
    text = text.lower()
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)  # Remove punctuation and special characters
    return text


# Rank videos by similarity to the technique
def rank_videos_by_query(query, videos):
    try:
        video_texts = [
            preprocess_text(video["title"] + " " + video["description"]) for video in videos
        ]
        query_text = preprocess_text(query)

        # Compute similarity using TF-IDF
        vectorizer = TfidfVectorizer(stop_words="english")
        tfidf_matrix = vectorizer.fit_transform([query_text] + video_texts)
        similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()

        # Add similarity scores to videos
        for i, video in enumerate(videos):
            video["similarity_score"] = similarity_scores[i]
        if all(score == 0.0 for score in similarity_scores):
            st.warning("No meaningful matches found based on similarity. Displaying original YouTube ranking.")
            return videos  # Return videos without re-ranking
        return sorted(videos, key=lambda x: x["similarity_score"], reverse=True)
    except Exception as e:
        st.error(f"Error ranking videos: {e}")
        return videos

def build_enriched_query(technique_data):
    # Use only unique keywords for the query
    unique_keywords = list(set(technique_data["keywords"]))  # Remove duplicates
    return " ".join(unique_keywords)


# Streamlit Input and Display
query = st.text_input("Enter your query:")
selected_category = st.selectbox("Select a category (optional):", ["All"] + list(categories.keys()))
tab_chatbot, tab_videos = st.tabs(["Chatbot Recommendations", "Video Recommendations"])
with tab_chatbot:
    st.write("### GPT-4 Recommendations")
    generate_button = st.button("Generate Recommendations")
    # Reset button to clear input
    if st.button("Reset"):
        st.experimental_rerun()  # Reset the app
    if generate_button and query:
        with st.spinner("Generating recommendations..."):
            # Step 1: Generate GPT-4 recommendations using the original query and selected category
            gpt_recommendations = generate_gpt_recommendations(query, selected_category)
            st.markdown(gpt_recommendations)  # Display GPT recommendations

            # Step 2: Extract techniques and related keywords (including title keywords)
            techniques_with_keywords = extract_techniques_and_keywords(gpt_recommendations)

            # Save recommendations in session state
            st.session_state.techniques_with_keywords = techniques_with_keywords

            st.success("Recommendations generated successfully!")

with tab_videos:
    st.write("### Video Recommendations")
    if "techniques_with_keywords" not in st.session_state or not st.session_state.techniques_with_keywords:
        st.warning("Please generate recommendations in the 'Chatbot Recommendations' tab first.")
    else:
        with st.spinner("Fetching and ranking videos..."):
            for technique_data in st.session_state.techniques_with_keywords:
                technique = technique_data["technique"]
                enriched_query = build_enriched_query(technique_data)

                # Fetch and rank videos using the enriched query
                videos = fetch_youtube_videos(
                    enriched_query,
                    max_results=10,
                    order="relevance",  # Fetch the most-viewed videos
                    video_duration="medium",  # Fetch videos between 4 and 20 minutes
                    video_definition="high"  # Fetch only high-definition videos
                )
                ranked_videos = rank_videos_by_query(enriched_query, videos)

                # Display ranked videos
                st.subheader(f"Top Videos for: {technique}")
                for video in ranked_videos[:3]:  # Show top 3 videos per technique
                    st.markdown(f"""
                    - **{video['title']}**
                      {video['description']}
                      [Watch here]({video['link']})
                      **Similarity Score**: {video['similarity_score']:.2f}
                        """)