VisualIndexer/main.py at main · IlyasFardaouix/VisualIndexer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
```python
"""
Photothèque Intelligente - Pipeline principal
Traitement complet des images: ingestion, OCR, tags, embeddings
"""

import os
import sys
import argparse
from datetime import datetime
import traceback
import subprocess

from scripts.ingest import ImageIngestor
from scripts.extract_metadata import MetadataExtractor
from scripts.tag_clip import ClipTagger
from scripts.ocr import OcrProcessor
from scripts.embeddings import EmbeddingManager
from config.settings import IMAGE_DIR, PROCESSED_IMAGE_DIR, EMBEDDING_PATH

def print_banner():
    """Affiche le logo de l'application."""
    print("\n=== PHOTOTHÈQUE INTELLIGENTE ===\n")

def print_section(title):
    """Affiche un titre de section."""
    print(f"\n--- {title} ---\n")

def run_pipeline():
    """
    Exécute la pipeline de traitement des images.

    La pipeline comprend les étapes suivantes :
    1. Ingestion des images
    2. Extraction des métadonnées
    3. Reconnaissance optique de caractères (OCR)
    4. Tagging automatique
    5. Génération des vecteurs (embeddings)

    :return: True si la pipeline est terminée avec succès, False sinon
    """
    print_banner()

    if not os.path.exists(IMAGE_DIR) or not os.listdir(IMAGE_DIR):
        print(f"Pas d'images dans: {IMAGE_DIR}")
        return False

    try:
        # ÉTAPE 1: Ingestion
        print_section("📥 ÉTAPE 1: Ingestion des images")
        ImageIngestor.ingest_images(IMAGE_DIR)
        stats = ImageIngestor.get_statistics()
        print(f"\n✅ Ingestion terminée:")
        print(f"   • Images traitées: {stats['total_processed']}")
        print(f"   • Doublons trouvés: {stats['duplicates_found']}")

        # ÉTAPE 2: Métadonnées
        print_section("🔍 ÉTAPE 2: Extraction des métadonnées")
        MetadataExtractor.save_metadata(PROCESSED_IMAGE_DIR)

        # ÉTAPE 3: OCR
        print_section("📄 ÉTAPE 3: Reconnaissance Optique de Caractères (OCR)")
        print("Extraction du texte des images...\n")

        # ÉTAPE 4: Tagging
        print_section("🏷️  ÉTAPE 4: Tagging automatique")
        print("Génération des tags...\n")

        # ÉTAPE 5: Embeddings
        print_section("🧠 ÉTAPE 5: Génération des vecteurs")
        print("Création des représentations vectorielles...\n")

        embeddings_dict = {}
        images_to_process = [
            f for f in os.listdir(PROCESSED_IMAGE_DIR)
            if f.lower().endswith(('.jpg', '.jpeg', '.png', '.webp'))
        ]

        for idx, filename in enumerate(images_to_process, 1):
            print(f"  [{idx}/{len(images_to_process)}] Traitement: {filename}")

            image_path = os.path.join(PROCESSED_IMAGE_DIR, filename)

            try:
                # OCR
                text = OcrProcessor.run_ocr(image_path)

                # Tags CLIP
                tags = ClipTagger.get_clip_tags(image_path, top_k=5)

                # Combinaison texte + tags
                caption = f"{text} {' '.join(tags)}"

                # Embedding
                embedding = EmbeddingManager.generate_embedding(caption)
                if embedding:
                    embeddings_dict[filename] = embedding
                    print(f"      ✅ Traité avec succès")
                else:
                    print(f"      ⚠️  Embedding échoué")

            except Exception as e:
                print(f"      ❌ Erreur: {str(e)[:50]}")

        # Sauvegarder les embeddings
        EmbeddingManager.store_embeddings(embeddings_dict)
        OcrProcessor.save_ocr_results()

        # RÉSUMÉ FINAL
        print_section("✅ RÉSUMÉ FINAL")
        print(f"✓ Pipeline complété avec succès!")
        print(f"\n📊 Statistiques:")
        print(f"   • Images ingérées: {len(images_to_process)}")
        print(f"   • Embeddings générés: {len(embeddings_dict)}")
        print(f"   • OCR résultats: {len(OcrProcessor.ocr_cache)}")
        print(f"\n💾 Fichiers de sortie:")
        print(f"   • Métadonnées: data/metadata.csv")
        print(f"   • Embeddings: data/embeddings.json")
        print(f"   • OCR: data/ocr_results.json")
        print(f"\n🚀 Prochaine étape:")
        print(f"   Lancez l'interface: streamlit run ui/interface.py")
        print(f"\n⏰ Fin: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}\n")

        return True

    except Exception as e:
        print(f"\n❌ Erreur fatale: {e}")
        traceback.print_exc()
        return False

def run_ui():
    """
    Démarrage de l'interface.

    Lance l'interface Streamlit si elle est installée, sinon affiche un message d'erreur.
    """
    print_banner()
    print("Démarrage de l'interface...\n")

    try:
        subprocess.run([
            sys.executable, '-m', 'streamlit', 'run',
            'ui/interface.py',
            '--logger.level=warning'
        ])
    except ImportError:
        print("Streamlit n'est pas installé")
    except Exception as e:
        print(f"Erreur: {e}")

def run_ingest_only():
    """
    Ingestion des images uniquement.

    Exécute la fonction d'ingestion des images sans passer par la pipeline complète.
    """
    print_banner()
    print_section("INGESTION UNIQUEMENT")
    ImageIngestor.ingest_images(IMAGE_DIR)

def main():
    """
    Programme principal.

    Gère les arguments de ligne de commande et lance la pipeline ou l'interface en conséquence.
    """
    parser = argparse.ArgumentParser(description="Photothèque Intelligente")
    parser.add_argument('--mode', choices=['pipeline', 'ui', 'ingest'], default='pipeline',
                        help='Mode d\'exécution')
    args = parser.parse_args()

    if args.mode == 'pipeline':
        success = run_pipeline()
        sys.exit(0 if success else 1)
    elif args.mode == 'ui':
        run_ui()
    elif args.mode == 'ingest':
        run_ingest_only()

if __name__ == "__main__":
    main()
```