AdInsight/app_copy.py at main · PCN-LAB/AdInsight · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
import re
import time
import json
from typing import Dict, List, Optional, Union, Any
import pandas as pd

class AdAnalysisChatbot:
    """
    A chatbot for analyzing advertising data using a local LLM through Ollama.
    Processes gaze tracking data and provides insights for ad optimization.
    """

    def __init__(self, model_name: str = "gemma2:2b"):
        """
        Initialize the chatbot with the specified LLM model.

        Args:
            model_name: The name of the Ollama model to use
        """
        self.model_name = model_name
        self.conversation_history = []
        self.ad_context = {}
        self.gaze_data = {}
        self.has_context = False

        # System prompt template that will be used for all interactions
        self.system_prompt_template = """
        You are an advertising analytics expert specializing in eye-tracking and emotional response data.
        Your task is to analyze gaze, emotion and demographic data and provide actionable insights for ad optimization.

        CONTEXT:
        Company: {company}
        Target Audience: {target_audience}
        Ad Goals: {ad_goals}

        DATA:
        {data}

        INSTRUCTIONS:
        - Analyze the data in the context of the company, target audience, and ad goals
        - Identify strengths and weaknesses of the ad based on gaze emotion and demographic patterns
        - Suggest specific, actionable improvements
        - Relate gaze data to specific objects of the ad
        - Provide insights based on established advertising principles
        - Consider audience segmentation and engagement patterns

        Be concise but thorough in your analysis. Focus on actionable insights.
        The user may ask follow-up questions about specific objects or hypothetical scenarios.
        """

    def load_gaze_data(self, file_content: str) -> bool:
        """
        Load and parse gaze data from a formatted report file or JSON.

        Args:
            file_content: Content of the gaze data file (text or JSON)

        Returns:
            bool: True if data was loaded successfully, False otherwise
        """
        try:
            print(f"DEBUG load_gaze_data: Received file_content type: {type(file_content)}")
            print(f"DEBUG load_gaze_data: File content length: {len(file_content) if file_content else 'None'}")

            if not file_content or len(file_content) < 10:
                print("DEBUG load_gaze_data: File content is empty or too short")
                return False

            # Print first 100 chars of data for debugging
            print(f"DEBUG load_gaze_data: First 100 chars: {file_content[:100]}")

            # First, try parsing as JSON
            try:
                print("DEBUG load_gaze_data: Attempting to parse as JSON")
                json_data = json.loads(file_content)
                print(f"DEBUG load_gaze_data: Successfully parsed as JSON with keys: {list(json_data.keys())}")
                # If we got here, it's valid JSON
                result = self._parse_json_report(json_data)
                print(f"DEBUG load_gaze_data: JSON parsing result: {result}")
                return result
            except json.JSONDecodeError as e:
                # Not JSON, try the legacy text format
                print(f"DEBUG load_gaze_data: Not valid JSON, error: {str(e)}")
                print("DEBUG load_gaze_data: Falling back to text parsing")
                result = self._parse_text_report(file_content)
                print(f"DEBUG load_gaze_data: Text parsing result: {result}")
                return result

        except Exception as e:
            print(f"DEBUG load_gaze_data: Error loading gaze data: {str(e)}")
            import traceback
            traceback.print_exc()
            return False

    def _parse_json_report(self, json_data: Dict) -> bool:
        """
        Parse the JSON report format from metrics_generator.py.

        Args:
            json_data: The JSON data from the report

        Returns:
            bool: True if successful
        """
        try:
            data = {
                "overall_stats": {},
                "demographics": {"gender_distribution": {}, "age_groups": {}},
                "objects": {},
                "emotion_distribution": {},
                "viewer_patterns": {},
                "conclusion": {}
            }

            # Extract metadata and overall statistics
            if "report_metadata" in json_data:
                metadata = json_data["report_metadata"]
                data["overall_stats"]["Generated on"] = metadata.get("generated_on", "Unknown")
                data["overall_stats"]["Video ID"] = metadata.get("video_id", "Unknown")
                data["overall_stats"]["Number of viewers analyzed"] = metadata.get("num_viewers_analyzed", 0)

            if "overall_statistics" in json_data:
                stats = json_data["overall_statistics"]
                data["overall_stats"]["Total frames"] = stats.get("total_frames", 0)
                data["overall_stats"]["Frames with objects"] = stats.get("frames_with_objects", 0)
                data["overall_stats"]["Frames without objects"] = stats.get("frames_without_objects", 0)
                data["overall_stats"]["Frames with objects percentage"] = stats.get("frames_with_objects_percentage", 0)
                data["overall_stats"]["Frames without objects percentage"] = stats.get("frames_without_objects_percentage", 0)

            # Extract demographics
            if "viewer_demographics" in json_data:
                demographics = json_data["viewer_demographics"]

                # Gender distribution
                if "gender_distribution" in demographics:
                    for gender, percentage in demographics["gender_distribution"].items():
                        # Estimate count based on number of viewers
                        viewers = json_data.get("report_metadata", {}).get("num_viewers_analyzed", 1)
                        count = round((percentage / 100) * viewers) if viewers > 0 else 1
                        data["demographics"]["gender_distribution"][gender] = {
                            "count": count,
                            "percentage": percentage
                        }

                # Age groups
                if "age_groups" in demographics:
                    for age_group, percentage in demographics["age_groups"].items():
                        # Estimate count based on number of viewers
                        viewers = json_data.get("report_metadata", {}).get("num_viewers_analyzed", 1)
                        count = round((percentage / 100) * viewers) if viewers > 0 else 1
                        data["demographics"]["age_groups"][age_group] = {
                            "count": count,
                            "percentage": percentage
                        }

            # Extract object attention analysis
            if "object_attention_analysis" in json_data:
                objects_data = json_data["object_attention_analysis"]
                for obj_name, obj_info in objects_data.items():
                    data["objects"][obj_name] = {
                        "metrics": {},
                        "emotions": {}
                    }

                    # Extract object metrics
                    metrics = data["objects"][obj_name]["metrics"]

                    # For multi-viewer data
                    if "viewed_by_count" in obj_info:
                        metrics["Viewed by count"] = obj_info.get("viewed_by_count", 0)
                        metrics["Viewed by percentage"] = obj_info.get("viewed_by_percentage", 0)
                        metrics["Attention percentage"] = obj_info.get("avg_attention_percentage", 0)

                        if "attention_range" in obj_info:
                            metrics["Min attention"] = obj_info["attention_range"].get("min", 0)
                            metrics["Max attention"] = obj_info["attention_range"].get("max", 0)

                        metrics["Attention variance"] = obj_info.get("attention_variance", 0)

                        # Extract dominant emotion
                        if "dominant_emotion" in obj_info:
                            emotion_name = obj_info["dominant_emotion"].get("name", "unknown")
                            emotion_time = obj_info["dominant_emotion"].get("time", 0)

                            if emotion_name != "unknown":
                                data["objects"][obj_name]["emotions"][emotion_name] = {
                                    "time": emotion_time,
                                    "percentage": 100  # Single emotion, so 100%
                                }

                    # For single viewer data
                    else:
                        metrics["Total gaze time"] = obj_info.get("total_gaze_time", 0)
                        metrics["Direct gaze time"] = obj_info.get("direct_gaze_time", 0)
                        metrics["Proximity gaze time"] = obj_info.get("proximity_gaze_time", 0)
                        metrics["Attention percentage"] = obj_info.get("attention_percentage", 0)

                        # Extract emotion data for this object
                        if "emotions" in obj_info:
                            for emotion, time_or_percentage in obj_info["emotions"].items():
                                # Handle both cases: direct value or dict with time/percentage
                                if isinstance(time_or_percentage, dict):
                                    data["objects"][obj_name]["emotions"][emotion] = time_or_percentage
                                else:
                                    # Convert simple value to the expected format
                                    data["objects"][obj_name]["emotions"][emotion] = {
                                        "time": time_or_percentage,
                                        "percentage": 0  # Will calculate percentages later
                                    }

            # Calculate emotion percentages if not provided
            for obj_name, obj_data in data["objects"].items():
                emotions = obj_data["emotions"]
                total_emotion_time = sum(e.get("time", 0) for e in emotions.values())

                if total_emotion_time > 0:
                    for emotion, emotion_data in emotions.items():
                        if "percentage" not in emotion_data or emotion_data["percentage"] == 0:
                            emotion_data["percentage"] = (emotion_data.get("time", 0) / total_emotion_time) * 100

            # Extract emotion distribution
            if "emotion_distribution" in json_data:
                data["emotion_distribution"] = json_data["emotion_distribution"]

            # Extract gaze patterns (viewer data)
            if "gaze_patterns" in json_data:
                for i, viewer in enumerate(json_data["gaze_patterns"]):
                    viewer_id = viewer.get("viewer_id", f"Viewer {i+1}")
                    data["viewer_patterns"][viewer_id] = {
                        "demographics": viewer.get("demographics", {}),
                        "metrics": {
                            "Total view time": viewer.get("total_view_time", 0),
                            "Time on objects": viewer.get("time_on_objects", 0),
                            "Time off objects": viewer.get("time_off_objects", 0),
                            "On objects percentage": viewer.get("on_objects_percentage", 0),
                            "Off objects percentage": viewer.get("off_objects_percentage", 0),
                        },
                        "objects": {}
                    }

                    # Extract object attention for this viewer
                    if "object_attention" in viewer:
                        for obj_name, obj_data in viewer["object_attention"].items():
                            data["viewer_patterns"][viewer_id]["objects"][obj_name] = {
                                "percentage": obj_data.get("attention_percentage", 0),
                                "time": obj_data.get("total_gaze_time", 0)
                            }

            # Extract conclusions
            if "conclusions" in json_data:
                conclusion_data = json_data["conclusions"]

                # Extract most/least engaging objects
                if "most_engaging_object" in conclusion_data:
                    obj_data = conclusion_data["most_engaging_object"]
                    data["conclusion"]["most_engaging_object"] = (
                        f"{obj_data.get('name', 'Unknown')} "
                        f"({obj_data.get('attention_percentage', 0):.2f}%)"
                    )

                if "least_engaging_object" in conclusion_data:
                    obj_data = conclusion_data["least_engaging_object"]
                    data["conclusion"]["least_engaging_object"] = (
                        f"{obj_data.get('name', 'Unknown')} "
                        f"({obj_data.get('attention_percentage', 0):.2f}%)"
                    )

                # Extract dominant emotion
                if "dominant_emotion" in conclusion_data:
                    data["conclusion"]["dominant_emotion"] = conclusion_data["dominant_emotion"]

                # Extract recommendations
                if "recommendations" in conclusion_data:
                    for i, recommendation in enumerate(conclusion_data["recommendations"]):
                        data["conclusion"][f"recommendation_{i+1}"] = recommendation

                # Extract high variance objects
                if "high_variance_objects" in conclusion_data:
                    variance_objects = conclusion_data["high_variance_objects"]
                    if isinstance(variance_objects, list) and variance_objects:
                        data["conclusion"]["high_variance_objects"] = ", ".join(variance_objects)

            self.gaze_data = data
            return True

        except Exception as e:
            print(f"Error parsing JSON data: {str(e)}")
            import traceback
            traceback.print_exc()
            return False

    def _parse_text_report(self, file_content: str) -> bool:
        """
        Legacy method to parse the text report format.

        Args:
            file_content: Content of the gaze data file

        Returns:
            bool: True if successful
        """
        try:
            data = {
                "overall_stats": {},
                "demographics": {"gender_distribution": {}, "age_groups": {}},
                "objects": {},
                "emotion_distribution": {},
                "viewer_patterns": {},
                "conclusion": {}
            }

            current_section = None
            current_object = None
            current_viewer = None

            for line in file_content.split('\n'):
                line = line.strip()

                # Skip empty lines or separator lines
                if not line or line.startswith('====') or line.startswith('----'):
                    continue

                # Identify the current section
                if "GAZE AND OBJECT DETECTION ANALYSIS REPORT" in line:
                    continue
                elif "OVERALL VIDEO STATISTICS" in line:
                    current_section = "overall_stats"
                    current_object = None
                    current_viewer = None
                    continue
                elif "VIEWER DEMOGRAPHICS" in line:
                    current_section = "demographics"
                    current_object = None
                    current_viewer = None
                    continue
                elif "OBJECT ATTENTION ANALYSIS" in line:
                    current_section = "objects"
                    current_object = None
                    current_viewer = None
                    continue
                elif "OVERALL EMOTION DISTRIBUTION" in line:
                    current_section = "emotion_distribution"
                    current_object = None
                    current_viewer = None
                    continue
                elif "DETAILED GAZE PATTERN ANALYSIS" in line:
                    current_section = "viewer_patterns"
                    current_object = None
                    current_viewer = None
                    continue
                elif "CONCLUSION AND RECOMMENDATIONS" in line:
                    current_section = "conclusion"
                    current_object = None
                    current_viewer = None
                    continue
                elif "END OF REPORT" in line:
                    break

                # Process content based on current section
                if current_section == "overall_stats":
                    if ":" in line:
                        key, value = line.split(":", 1)
                        data["overall_stats"][key.strip()] = value.strip()

                elif current_section == "demographics":
                    if "Gender Distribution:" in line:
                        continue  # Skip header line
                    elif "Age Group Distribution:" in line:
                        continue  # Skip header line
                    elif line.startswith("  - ") and ":" in line:
                        parts = line[4:].split(":", 1)
                        if len(parts) == 2:
                            key, value_str = parts[0].strip(), parts[1].strip()
                            # Extract the count and percentage
                            match = re.search(r"(\d+)\s+\((.+?)%\)", value_str)
                            if match:
                                count = int(match.group(1))
                                percentage = float(match.group(2))

                                # Determine if this is gender or age group
                                if "Age Group Distribution" not in file_content.split('\n')[-10:]:
                                    # We're still in the gender section
                                    data["demographics"]["gender_distribution"][key] = {
                                        "count": count,
                                        "percentage": percentage
                                    }
                                else:
                                    # We've moved to age groups
                                    data["demographics"]["age_groups"][key] = {
                                        "count": count,
                                        "percentage": percentage
                                    }

                elif current_section == "objects":
                    if line.startswith("Object:"):
                        current_object = line.split(":", 1)[1].strip()
                        data["objects"][current_object] = {
                            "metrics": {},
                            "emotions": {}
                        }
                    elif current_object and line.startswith("  - "):
                        # This is either a metric line or emotions header
                        content = line[4:].strip()

                        if "Emotions during gaze:" in content:
                            continue  # Skip the emotions header
                        elif ":" in content:
                            key, value_str = content.split(":", 1)
                            key = key.strip()
                            value_str = value_str.strip()

                            # Try to extract numeric value
                            try:
                                if "%" in value_str:
                                    value = float(value_str.replace("%", "").strip())
                                else:
                                    value = float(re.search(r"([\d.]+)", value_str).group(1))
                            except (ValueError, AttributeError):
                                value = value_str

                            data["objects"][current_object]["metrics"][key] = value

                    elif current_object and line.startswith("    - ") and ":" in line:
                        # This is an emotion line under current object
                        content = line[6:].strip()
                        emotion, time_info = content.split(":", 1)
                        emotion = emotion.strip()
                        time_info = time_info.strip()

                        # Extract time and percentage
                        time_match = re.search(r"([\d.]+)\s+seconds", time_info)
                        time_value = float(time_match.group(1)) if time_match else 0.0

                        perc_match = re.search(r"\(([\d.]+)%\)", time_info)
                        percentage = float(perc_match.group(1)) if perc_match else 0.0

                        data["objects"][current_object]["emotions"][emotion] = {
                            "time": time_value,
                            "percentage": percentage
                        }

                elif current_section == "emotion_distribution":
                    if line.startswith("  - ") and ":" in line:
                        parts = line[4:].split(":", 1)
                        emotion = parts[0].strip()
                        percentage_match = re.search(r"([\d.]+)%", parts[1])
                        if percentage_match:
                            percentage = float(percentage_match.group(1))
                            data["emotion_distribution"][emotion] = percentage

                elif current_section == "viewer_patterns":
                    if re.match(r"^Viewer\s+\d+:$", line):
                        current_viewer = line.rstrip(":")
                        data["viewer_patterns"][current_viewer] = {
                            "demographics": {},
                            "metrics": {},
                            "objects": {}
                        }
                    elif current_viewer and line.startswith("  - "):
                        content = line[4:].strip()

                        if "Object attention breakdown:" in content:
                            continue  # Skip header
                        elif ":" in content:
                            key, value = content.split(":", 1)
                            key = key.strip()
                            value = value.strip()

                            # Determine if this is demographics or metrics
                            if key.lower() in ["gender", "age", "age group"]:
                                data["viewer_patterns"][current_viewer]["demographics"][key.lower()] = value
                            else:
                                # Try to extract numeric value
                                try:
                                    if "%" in value:
                                        match = re.search(r"\(([\d.]+)%\)", value)
                                        if match:
                                            value = float(match.group(1))
                                    else:
                                        match = re.search(r"([\d.]+)\s+seconds", value)
                                        if match:
                                            value = float(match.group(1))
                                except:
                                    pass  # Keep value as string if conversion fails

                                data["viewer_patterns"][current_viewer]["metrics"][key] = value

                    elif current_viewer and line.startswith("    - ") and ":" in line:
                        # This is object breakdown for the viewer
                        content = line[6:].strip()
                        obj_parts = content.split(":", 1)
                        obj_name = obj_parts[0].strip()
                        obj_stats = obj_parts[1].strip()

                        # Extract percentage and time
                        perc_match = re.search(r"([\d.]+)%", obj_stats)
                        time_match = re.search(r"\(([\d.]+)\s+seconds\)", obj_stats)

                        percentage = float(perc_match.group(1)) if perc_match else 0.0
                        time_val = float(time_match.group(1)) if time_match else 0.0

                        data["viewer_patterns"][current_viewer]["objects"][obj_name] = {
                            "percentage": percentage,
                            "time": time_val
                        }

                elif current_section == "conclusion":
                    if "Recommendations:" in line:
                        continue  # Skip header
                    elif line.startswith(str(1)) or line.startswith(str(2)) or line.startswith(str(3)) or line.startswith(str(4)):
                        # This is a recommendation
                        rec_num = re.search(r"^(\d+)\.", line)
                        if rec_num:
                            num = rec_num.group(1)
                            text = line[len(num)+1:].strip()
                            data["conclusion"][f"recommendation_{num}"] = text
                    elif ":" in line:
                        # This is a key-value pair in the conclusion
                        parts = line.split(":", 1)
                        key = parts[0].strip().lower().replace(" ", "_")
                        value = parts[1].strip()
                        data["conclusion"][key] = value

            self.gaze_data = data
            return True

        except Exception as e:
            print(f"Error loading gaze data: {str(e)}")
            return False

    def set_ad_context(self, company: str, target_audience: str, ad_goals: str) -> None:
        """
        Set context information about the advertisement.

        Args:
            company: Company or brand name
            target_audience: Target audience information
            ad_goals: Ad campaign goals
        """
        self.ad_context = {
            "company": company,
            "target_audience": target_audience,
            "ad_goals": ad_goals if ad_goals else "Not specified"
        }

        self.has_context = bool(company and target_audience)

    def format_gaze_data_for_prompt(self) -> str:
        """
        Format the loaded gaze data into a concise, summarized format for the LLM prompt.
        Creates a focused summary instead of including all raw data.

        Returns:
            str: Summarized gaze data for optimal LLM processing
        """
        if not self.gaze_data:
            return "No gaze data available."

        lines = ["GAZE ANALYSIS SUMMARY:"]

        # SECTION 1: Quick overview stats
        video_id = self.gaze_data.get("overall_stats", {}).get("Video ID", "Unknown")
        num_viewers = self.gaze_data.get("overall_stats", {}).get("Number of viewers analyzed", "Unknown")

        lines.append(f"\nVIDEO SUMMARY:")
        lines.append(f"  - Video ID: {video_id}")
        lines.append(f"  - Number of viewers: {num_viewers}")

        # SECTION 2: Top objects by attention (only include top 3-5)
        if "objects" in self.gaze_data and self.gaze_data["objects"]:
            try:
                # Get objects sorted by attention
                sorted_objects = sorted(
                    self.gaze_data["objects"].items(),
                    key=lambda x: x[1].get("metrics", {}).get("Attention percentage", 0)
                        if isinstance(x[1], dict) and isinstance(x[1].get("metrics"), dict) else 0,
                    reverse=True
                )

                # Include only top objects (max 5)
                top_objects = sorted_objects[:min(5, len(sorted_objects))]

                lines.append("\nTOP OBJECTS BY ATTENTION:")
                for i, (obj_name, obj_data) in enumerate(top_objects, 1):
                    if not isinstance(obj_data, dict) or not isinstance(obj_data.get("metrics"), dict):
                        continue

                    attention = obj_data.get("metrics", {}).get("Attention percentage", 0)
                    lines.append(f"  {i}. {obj_name}: {attention:.2f}% attention")

                    # Add dominant emotion for this object if available
                    emotions = obj_data.get("emotions", {})
                    if emotions and isinstance(emotions, dict):
                        top_emotion = max(emotions.items(), key=lambda x: x[1].get("percentage", 0) if isinstance(x[1], dict) else 0, default=(None, None))
                        if top_emotion[0]:
                            lines.append(f"     Dominant emotion: {top_emotion[0]}")
            except Exception as e:
                print(f"DEBUG: Error creating top objects summary: {str(e)}")
                lines.append("  Error summarizing top objects")

        # SECTION 3: Overall emotion distribution - only top 3 emotions
        if "emotion_distribution" in self.gaze_data and isinstance(self.gaze_data["emotion_distribution"], dict):
            emotion_dist = self.gaze_data["emotion_distribution"]
            if emotion_dist:
                try:
                    # Sort emotions by percentage and take top 3
                    sorted_emotions = sorted(
                        emotion_dist.items(),
                        key=lambda x: x[1] if isinstance(x[1], (int, float)) else 0,
                        reverse=True
                    )[:min(3, len(emotion_dist))]

                    lines.append("\nDOMINANT EMOTIONS:")
                    for emotion, percentage in sorted_emotions:
                        if isinstance(percentage, (int, float)):
                            lines.append(f"  - {emotion}: {percentage:.2f}%")
                except Exception as e:
                    print(f"DEBUG: Error creating emotion summary: {str(e)}")
                    lines.append("  Error summarizing emotions")

        # SECTION 4: Gender and age demographics - simplified
        if "demographics" in self.gaze_data and self.gaze_data["demographics"]:
            demographics = self.gaze_data["demographics"]

            # Gender summary - just percentages
            if "gender_distribution" in demographics and demographics["gender_distribution"]:
                lines.append("\nAUDIENCE GENDER:")
                for gender, data in demographics["gender_distribution"].items():
                    if isinstance(data, dict):
                        lines.append(f"  - {gender}: {data.get('percentage', 0):.0f}%")

            # Age summary - just percentages for top 2 age groups
            if "age_groups" in demographics and demographics["age_groups"]:
                try:
                    sorted_ages = sorted(
                        demographics["age_groups"].items(),
                        key=lambda x: x[1].get("percentage", 0) if isinstance(x[1], dict) else 0,
                        reverse=True
                    )[:min(2, len(demographics["age_groups"]))]

                    lines.append("\nPRIMARY AGE GROUPS:")
                    for age_group, data in sorted_ages:
                        if isinstance(data, dict):
                            lines.append(f"  - {age_group}: {data.get('percentage', 0):.0f}%")
                except Exception as e:
                    print(f"DEBUG: Error creating age summary: {str(e)}")

        # SECTION 5: Key conclusions (most engaging, least engaging, dominant emotion)
        if "conclusion" in self.gaze_data and isinstance(self.gaze_data["conclusion"], dict):
            conclusions = self.gaze_data["conclusion"]

            lines.append("\nKEY INSIGHTS:")

            # Most engaging object
            if "most_engaging_object" in conclusions:
                lines.append(f"  - Most engaging: {conclusions['most_engaging_object']}")

            # Least engaging object
            if "least_engaging_object" in conclusions:
                lines.append(f"  - Least engaging: {conclusions['least_engaging_object']}")

            # Dominant emotion
            if "dominant_emotion" in conclusions:
                lines.append(f"  - Dominant emotion: {conclusions['dominant_emotion']}")

            # High variance objects (just names)
            if "high_variance_objects" in conclusions:
                high_var = conclusions["high_variance_objects"]
                if high_var and isinstance(high_var, str):
                    lines.append(f"  - High variance objects: {high_var}")

        # SECTION 6: Top 3 recommendations
        recommendation_items = [(k, v) for k, v in self.gaze_data.get("conclusion", {}).items()
                             if k.startswith("recommendation_") and isinstance(v, str)]

        if recommendation_items:
            lines.append("\nTOP RECOMMENDATIONS:")

            # Sort by recommendation number and limit to 3
            try:
                sorted_recs = sorted(
                    recommendation_items,
                    key=lambda x: int(x[0].split("_")[1]) if len(x[0].split("_")) > 1 and x[0].split("_")[1].isdigit() else 0
                )[:min(3, len(recommendation_items))]

                for i, (_, recommendation) in enumerate(sorted_recs, 1):
                    # Truncate long recommendations
                    if len(recommendation) > 150:
                        recommendation = recommendation[:147] + "..."
                    lines.append(f"  {i}. {recommendation}")
            except Exception as e:
                print(f"DEBUG: Error creating recommendations summary: {str(e)}")
                # Just show first 3 unsorted as fallback
                for i, (_, recommendation) in enumerate(recommendation_items[:3], 1):
                    if len(recommendation) > 150:
                        recommendation = recommendation[:147] + "..."
                    lines.append(f"  {i}. {recommendation}")

        return "\n".join(lines)

    def generate_system_prompt(self, query: str = "") -> str:
        """
        Generate the system prompt based on the current context and data.

        Args:
            query: Optional specific query from the user

        Returns:
            str: Formatted system prompt
        """
        data_str = self.format_gaze_data_for_prompt()

        # Debug output of gaze data
        print(f"DEBUG generate_system_prompt: Data size: {len(data_str)}")
        print(f"DEBUG generate_system_prompt: Data available: {'Yes' if self.gaze_data else 'No'}")
        print(f"DEBUG generate_system_prompt: Has context: {self.has_context}")

        if self.gaze_data:
            # Show summary of available data
            print(f"DEBUG generate_system_prompt: Object count: {len(self.gaze_data.get('objects', {}))}")
            print(f"DEBUG generate_system_prompt: Overall stats keys: {list(self.gaze_data.get('overall_stats', {}).keys())}")

        system_prompt = self.system_prompt_template.format(
            company=self.ad_context.get("company", "Not specified"),
            target_audience=self.ad_context.get("target_audience", "Not specified"),
            ad_goals=self.ad_context.get("ad_goals", "Not specified"),
            data=data_str
        )

        if query:
            system_prompt += f"\n\nUSER QUERY: {query}\n"

            # Add specific instructions based on query type
            if "what if" in query.lower() or "would happen if" in query.lower() or "what about" in query.lower():
                system_prompt += """
                This is a hypothetical scenario question. Consider how the changes would
                likely impact user attention, engagement, and ad effectiveness based on
                advertising principles and the existing data. Keep in mind that the coversation is in a
                chat format, so the response should be conversational and engaging and concise.
                """
            elif any(keyword in query.lower() for keyword in ["tell me more about", "more information on", "details on"]):
                system_prompt += """
                Provide detailed analysis on the specific object mentioned, including its
                importance in the overall ad context, engagement metrics, and potential optimizations.
                Keep in mind that the coversation is in a chat format, so the response should be conversational and engaging and concise.
                """

        print(f"DEBUG generate_system_prompt: Final prompt length: {len(system_prompt)}")
        return system_prompt

    def query_llm(self, prompt: str, system_prompt: str):
        """
        Send a query to the Ollama LLM and get the response.

        Args:
            prompt: The user prompt to send
            system_prompt: The system instructions

        Yields:
            str: Response chunks
        """
        try:
            import ollama

            # Use a tracking mechanism to prevent duplicate content at boundaries
            last_token = ""
            accumulated_text = ""

            for chunk in ollama.chat(
                model=self.model_name,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": prompt}
                ],
                stream=True
            ):
                content = chunk.get('message', {}).get('content', '')
                if content:
                    # Only yield the content without processing to avoid introducing artifacts
                    # The FastAPI endpoint will handle the proper stream processing
                    yield content

        except Exception as e:
            error_msg = f"Error querying the LLM: {str(e)}"
            yield error_msg

    def get_objects_for_visualization(self) -> pd.DataFrame:
        """
        Convert gaze data to a pandas DataFrame.

        Returns:
            pd.DataFrame: DataFrame with gaze metrics
        """
        if not self.gaze_data:
            return pd.DataFrame()

        data = []
        for obj_name, obj_data in self.gaze_data.items():
            row = {"Object": obj_name, "Class ID": obj_data["class_id"]}
            row.update(obj_data["metrics"])
            data.append(row)

        return pd.DataFrame(data)

def get_available_models():
    """Get available Ollama models"""
    try:
        import ollama
        models_list = ollama.list()
        return [model.model for model in models_list.models]
    except Exception as e:
        # Default fallback models in case of error
        print(f"Error getting models: {e}")
        return ["gemma2:2b", "llama3:8b", "mistral:7b"]