From 4baabf289f95beddd584ff26e7acd4e548c3fbed Mon Sep 17 00:00:00 2001
From: TAP <TAP@Manus-MacBook-Pro.local>
Date: Wed, 17 Dec 2025 22:42:41 +0530
Subject: [PATCH 1/7] feedback payload fix

---
 .env.prod                           | 119 ++++
 api/api.py                          |  11 +
 app.py                              |   4 +
 database/db_manager.py              |  11 +
 mq/rmq_client.py                    |  41 +-
 plag_checker/submissions_checker.py |   7 +
 requirements.txt                    |   6 +-
 start-dev-env.README.md             | 698 ++++++++++----------
 start-dev-env.sh                    | 944 ++++++++++++++--------------
 9 files changed, 1009 insertions(+), 832 deletions(-)
 create mode 100644 .env.prod
 mode change 100644 => 100755 start-dev-env.sh

diff --git a/.env.prod b/.env.prod
new file mode 100644
index 0000000..101b48d
--- /dev/null
+++ b/.env.prod
@@ -0,0 +1,119 @@
+# RABBITMQ CONFIGURATION
+RABBITMQ_HOST=armadillo.rmq.cloudamqp.com
+RABBITMQ_PORT=5672
+RABBITMQ_USER=fzdqidte
+RABBITMQ_PASS=0SMrDogBVcWUcu9brWwp2QhET_kArl59
+RABBITMQ_VHOST=fzdqidte
+RABBITMQ_MANAGEMENT_PORT=15672
+RABBITMQ_PREFETCH_COUNT=1
+
+# Message retry configuration
+# Maximum number of retries before sending to DLQ (prevents poison messages)
+MAX_RETRIES=3
+
+# Queue Names
+SUBMISSION_QUEUE=plagiarism_submissions
+FEEDBACK_QUEUE=plagiarism_feedback
+# Dead Letter Queue (optional - leave empty to disable)
+DEAD_LETTER_QUEUE=plagiarism_failed_submissions
+
+# POSTGRESQL CONFIGURATION
+POSTGRES_HOST=db.example.com
+POSTGRES_PORT=5432
+POSTGRES_DB=plagiarism_db
+POSTGRES_USER=postgres
+POSTGRES_PASSWORD=postgres
+
+# PGADMIN CONFIGURATION (Optional - for development only)
+PGADMIN_EMAIL=admin@admin.com
+PGADMIN_PASSWORD=admin123
+
+# Connection Pool
+POSTGRES_POOL_SIZE=10
+POSTGRES_MAX_OVERFLOW=20
+
+# PLAGIARISM DETECTION THRESHOLDS
+EXACT_DUPLICATE_THRESHOLD=0.95
+NEAR_DUPLICATE_THRESHOLD=0.90
+SEMANTIC_MATCH_THRESHOLD=0.80
+
+# ==== PRODUCTION: Uncomment below for 7-day window ====
+RESUBMISSION_WINDOW_DAYS=14
+
+# Hash comparison threshold (Hamming distance)
+HASH_MATCH_THRESHOLD=10
+
+# IMAGE PROCESSING
+# Maximum image size in MB
+MAX_IMAGE_SIZE_MB=10
+
+# Image download timeout in seconds
+IMAGE_DOWNLOAD_TIMEOUT=30
+
+# Image validation thresholds
+# Min variance to detect blank images (lower = more strict)
+IMAGE_MIN_VARIANCE=5.0
+# Min unique colors required
+IMAGE_MIN_UNIQUE_COLORS=10
+# Max ratio of dominant color (higher = more permissive)
+IMAGE_MAX_SOLID_COLOR_RATIO=0.95
+
+# CLIP Model Configuration
+CLIP_MODEL=ViT-L/14
+CLIP_DEVICE=cpu
+CLIP_PRETRAINED=laion2B-s32B-b82K
+
+# Local Model Path (Optional - use pre-downloaded models)
+# If set, the system will load the model from this path instead of downloading from HuggingFace
+# Example: CLIP_LOCAL_MODEL_PATH=./models/clip/open_clip_pytorch_model.bin
+# Download models from: https://huggingface.co/laion/CLIP-ViT-L-14-laion2B-s32B-b82K
+CLIP_LOCAL_MODEL_PATH=/app/models/clip/open_clip_pytorch_model.bin
+
+# Disable SSL verification for HuggingFace downloads (for corporate proxy/self-signed certs)
+# Set to "true" only if you encounter SSL certificate errors
+DISABLE_SSL_VERIFY=true
+PYTHONHTTPSVERIFY=0
+
+# VECTOR SEARCH CONFIGURATION
+# Use pgvector (PostgreSQL) or FAISS for vector similarity search
+USE_PGVECTOR=true
+
+# FAISS Configuration
+FAISS_INDEX_PATH=/app/data/faiss_index.bin
+FAISS_METADATA_PATH=/app/data/faiss_metadata.json
+FAISS_DIMENSION=768
+FAISS_TOP_K=4  # Number of top candidates to retrieve from FAISS search
+
+# STORAGE PATHS
+# Reference images directory
+REFERENCE_IMAGES_DIR=./data/reference_images
+
+# Temporary storage for downloaded submissions
+TEMP_IMAGES_DIR=./data/temp_images
+
+# Logs directory
+#LOGS_DIR=./logs
+
+# APPLICATION SETTINGS
+LOG_LEVEL=INFO
+
+# Worker concurrency (number of threads)
+#WORKER_THREADS=4
+
+# Enable performance metrics(ignore)
+#ENABLE_METRICS=true
+
+# DEVELOPMENT SETTINGS
+# Set to "development" or "production"
+#ENVIRONMENT=development
+
+#DEBUG=true
+
+
+
+# Mock Glific API (for testing without WhatsApp)
+#Used to skip WhatsApp delivery in testing mode
+MOCK_GLIFIC=true
+# ==== TESTING: 2-minute resubmission window (comment out for production) ====
+RESUBMISSION_WINDOW_MINUTES=2
+
diff --git a/api/api.py b/api/api.py
index 0d42d81..e8bc7ff 100644
--- a/api/api.py
+++ b/api/api.py
@@ -11,6 +11,7 @@
 import aio_pika
 import json
 import uuid
+from dotenv import load_dotenv
 
 from dotenv import load_dotenv
 import os
@@ -26,6 +27,8 @@
 )
 logger = logging.getLogger(__name__)
 
+load_dotenv()
+
 # RabbitMQ Configuration
 RABBITMQ_HOST = os.getenv("RABBITMQ_HOST", "localhost")
 RABBITMQ_PORT = os.getenv("RABBITMQ_PORT", "5672")
@@ -33,6 +36,14 @@
 RABBITMQ_USER = os.getenv("RABBITMQ_USER", "admin")
 RABBITMQ_PASS = os.getenv("RABBITMQ_PASS", "admin123")
 
+#PRINT THE RABBITMQ CONFIG FOR DEBUGGING
+# logger.info("###################")
+# logger.info(f"RABBITMQ_HOST={RABBITMQ_HOST}")
+# logger.info(f"RABBITMQ_PORT={RABBITMQ_PORT}")
+# logger.info(f"RABBITMQ_VHOST={RABBITMQ_VHOST}")
+# logger.info(f"RABBITMQ_USER={RABBITMQ_USER}")
+# logger.info("###################")
+
 SUBMISSION_QUEUE = os.getenv("SUBMISSION_QUEUE", "plagiarism_submissions")
 FEEDBACK_QUEUE = os.getenv("FEEDBACK_QUEUE", "plagiarism_feedback")
 
diff --git a/app.py b/app.py
index b54e76d..344e037 100644
--- a/app.py
+++ b/app.py
@@ -30,6 +30,10 @@ def validate_configuration():
     ]
 
     missing = [var for var in required_env_vars if not os.getenv(var)]
+    #print the required env vars and their values for debugging
+    # for var in required_env_vars:
+    #     logger.info("###################")
+    #     logger.info(f"{var}={os.getenv(var)}")
     if missing:
         logger.error(f"Missing required environment variables: {missing}")
         raise ValueError(f"Missing required environment variables: {missing}")
diff --git a/database/db_manager.py b/database/db_manager.py
index 9263f3d..6db7936 100644
--- a/database/db_manager.py
+++ b/database/db_manager.py
@@ -67,6 +67,17 @@ async def init_pool(self):
         db_name = os.getenv("POSTGRES_DB") or os.getenv("DB_NAME")
         db_host = os.getenv("POSTGRES_HOST") or os.getenv("DB_HOST", "localhost")
         db_port = int(os.getenv("POSTGRES_PORT") or os.getenv("DB_PORT", "5432"))
+        # db_port = 5435  # TEMP OVERRIDE FOR TESTING
+
+        #print the db connection details for debugging
+        # logger.info("###################")
+        # logger.info(f"DB Host: {db_host}")
+        # logger.info(f"DB Port: {db_port}")
+        # logger.info(f"DB Name: {db_name}")
+        # logger.info(f"DB User: {db_user}")
+        # logger.info(f"DB db_password: {db_password}")
+        # logger.info("###################")
+
 
         if not all([db_user, db_password, db_name]):
             raise ValueError("Missing required database environment variables")
diff --git a/mq/rmq_client.py b/mq/rmq_client.py
index f99877f..6571c0b 100644
--- a/mq/rmq_client.py
+++ b/mq/rmq_client.py
@@ -103,17 +103,38 @@ async def connect(self):
                     )
                     logger.info(f"Dead Letter Queue declared: {self.DEAD_LETTER_QUEUE}")
 
-                # Declare main submission queue
-                self.submission_queue = await self.channel.declare_queue(
-                    self.SUBMISSION_QUEUE, durable=True
-                )
-                logger.info(f"Submission queue declared: {self.SUBMISSION_QUEUE}")
+                try:
+                    # First try passive declaration to check if queue exists
+                    self.submission_queue = await self.channel.declare_queue(
+                        self.SUBMISSION_QUEUE, 
+                        durable=True,
+                        passive=True  # Only check, don't create
+                    )
+                    logger.info(f"Submission queue already exists: {self.SUBMISSION_QUEUE}")
+                except Exception:
+                    # Queue doesn't exist, create it
+                    self.submission_queue = await self.channel.declare_queue(
+                        self.SUBMISSION_QUEUE, 
+                        durable=True
+                    )
+                    logger.info(f"Submission queue created: {self.SUBMISSION_QUEUE}")
+
+                try:
+                    # First try passive declaration to check if queue exists
+                    self.feedback_queue = await self.channel.declare_queue(
+                        self.FEEDBACK_QUEUE, 
+                        durable=True,
+                        passive=True  # Only check, don't create
+                    )
+                    logger.info(f"Feedback queue already exists: {self.FEEDBACK_QUEUE}")
+                except Exception:
+                    # Queue doesn't exist, create it
+                    self.feedback_queue = await self.channel.declare_queue(
+                        self.FEEDBACK_QUEUE, 
+                        durable=True
+                    )
+                    logger.info(f"Feedback queue created: {self.FEEDBACK_QUEUE}")
 
-                # Declare feedback queue for publishing results
-                self.feedback_queue = await self.channel.declare_queue(
-                    self.FEEDBACK_QUEUE, durable=True
-                )
-                logger.info(f"Feedback queue declared: {self.FEEDBACK_QUEUE}")
 
                 logger.info(
                     f"Connected to RabbitMQ with prefetch_count={self.PREFETCH_COUNT}, all queues declared"
diff --git a/plag_checker/submissions_checker.py b/plag_checker/submissions_checker.py
index 0787003..4fb46a8 100644
--- a/plag_checker/submissions_checker.py
+++ b/plag_checker/submissions_checker.py
@@ -119,6 +119,8 @@ async def initialize(self):
         if self.image_worker is None:
             raise RuntimeError("ImageWorker failed to initialize")
 
+        logger.info("Submission Checker initialized successfully")
+
         await self.start_consumer()
 
     async def process_submission(self, submission):
@@ -282,6 +284,11 @@ async def process_submission(self, submission):
             data["similarity_score"] = result_text.get("similarity_score")
             data["is_plagiarized"] = result_text.get("is_plagiarized")
             data["match_type"] = result_text.get("match_type")
+            data["assignment_id"] = data.pop("assign_id")
+            data["is_ai_generated"] = result_text.get("is_ai_generated", False)
+            data["ai_detection_source"] = result_text.get("ai_detection_source", "")
+            data["ai_confidence"] = result_text.get("ai_confidence", 0.0)
+            data["plagiarism_source"] = result_text.get("plagiarism_source", "")
 
             publish_data = {k: v for k, v in data.items() if k != "db_record_id"}
 
diff --git a/requirements.txt b/requirements.txt
index c2db3ae..37d4505 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -28,4 +28,8 @@ numpy==2.3.4
 # Utilities
 tqdm==4.67.1
 
-podman-compose==1.5.0
\ No newline at end of file
+podman-compose==1.5.0
+
+# Web Framework - FastAPI
+fastapi==0.115.0
+uvicorn[standard]==0.34.0
\ No newline at end of file
diff --git a/start-dev-env.README.md b/start-dev-env.README.md
index c49ad6d..a13e591 100644
--- a/start-dev-env.README.md
+++ b/start-dev-env.README.md
@@ -1,349 +1,349 @@
-# Local Development Setup Scripts - Quick Guide
-
-## Overview
-Automated scripts to set up local development environment for MentorMe Plagiarism Detection System.
-
----
-
-## Files
-- `start-dev-env.sh` - Bash script (Linux/macOS/Git Bash)
-- `start-dev-env.ps1` - PowerShell script (Windows)
-
----
-
-## Quick Start
-
-### **Option 1: Complete Setup (Recommended)** 
-Everything in one command - infrastructure + Python environment + dependencies:
-
-```bash
-# Linux/macOS/Git Bash
-chmod +x start-dev-env.sh
-./start-dev-env.sh --full-setup
-
-# Windows PowerShell
-.\start-dev-env.ps1 --full-setup
-```
-
-**What it does:**
-- Creates PostgreSQL container (port 5432)
-- Creates RabbitMQ container (ports 5672, 15672)
-- Initializes database schema
-- Creates `.env` configuration file
-- Creates Python virtual environment
-- Installs all dependencies (~5-10 minutes)
-- Downloads CLIP model from HuggingFace (~3.5GB)
-- Verifies setup
-
-**Time:** ~10-15 minutes (first run)
-
----
-
-### **Option 2: Infrastructure Only** (Default)
-Just containers + database, manual Python setup:
-
-```bash
-# Linux/macOS/Git Bash
-./start-dev-env.sh
-
-# Windows PowerShell
-.\start-dev-env.ps1
-```
-
-**What it does:**
-- Creates PostgreSQL + RabbitMQ containers
-- Initializes database
-- Creates `.env` file
-
-**Then manually:**
-```bash
-python -m venv venv
-source venv/bin/activate  # Linux/macOS
-# OR
-.\venv\Scripts\Activate.ps1  # Windows
-
-pip install -r requirements.txt
-```
-
----
-
-### **Option 3: With Wheelhouse** (Offline Installs)
-Pre-compile dependencies for faster/offline installs:
-
-```bash
-# Linux/macOS/Git Bash
-./start-dev-env.sh --build-wheelhouse
-
-# Then install offline:
-pip install --no-index --find-links=wheelhouse -r requirements.txt
-```
-
----
-
-## Available Flags
-
-| Flag | Description |
-|------|-------------|
-| `--full-setup` | Complete setup: infrastructure + Python + dependencies |
-| `--build-wheelhouse` | Build wheelhouse for offline dependency installation |
-| (no flags) | Infrastructure only (containers + database) |
-
----
-
-## What Gets Created
-
-### **Infrastructure**
-| Service | Port | Access | Credentials |
-|---------|------|--------|-------------|
-| PostgreSQL | 5432 | localhost:5432 | postgres/postgres |
-| RabbitMQ (AMQP) | 5672 | localhost:5672 | admin/admin123 |
-| RabbitMQ (Management UI) | 15672 | http://localhost:15672 | guest/guest |
-
-### **Files**
-- `.env` - Environment configuration (from `.env.example`)
-- `venv/` - Python virtual environment (if `--full-setup`)
-- `data/` - Data directories (reference_images, temp_images)
-- `models/` - Model cache directory
-- `logs/` - Application logs directory
-
-### **Database**
-- Database: `plagiarism_db`
-- Tables: `submissions`, `reference_images`, `feedback_logs`
-- Extension: pgvector
-- Indexes: B-tree, HNSW vector indexes
-
----
-
-## After Setup
-
-### **Start the Application**
-
-**Terminal 1 - Worker:**
-```bash
-source venv/bin/activate  # Linux/macOS
-# OR
-.\venv\Scripts\Activate.ps1  # Windows
-
-python app.py
-```
-
-**Terminal 2 - API Server:**
-```bash
-source venv/bin/activate
-
-uvicorn api:app --reload --host 0.0.0.0 --port 8000
-```
-
-**Access API:** http://localhost:8000/docs
-
----
-
-## Prerequisites
-
-### **Required**
-- **Podman** (or Docker) - Container runtime
-- **Python 3.10+** - Application runtime
-- **8GB+ RAM** - For CLIP model
-- **10GB+ disk** - For dependencies and models
-
-### **Optional**
-- **CUDA GPU** - For faster CLIP inference (10x speedup)
-- **curl** - For health checks
-
----
-
-## Troubleshooting
-
-### **"Podman not found"**
-```bash
-# Install Podman: https://podman.io/getting-started/installation
-```
-
-### **"Python not found"**
-```bash
-# Install Python 3.10+: https://www.python.org/downloads/
-```
-
-### **"Port already in use"**
-```bash
-# Stop existing containers
-podman stop mentorme-postgres mentorme-rabbitmq
-podman rm mentorme-postgres mentorme-rabbitmq
-
-# Or change ports in script (POSTGRES_PORT, RABBITMQ_PORT)
-```
-
-### **"Database connection failed"**
-```bash
-# Check PostgreSQL is running
-podman ps | grep mentorme-postgres
-
-# Check logs
-podman logs mentorme-postgres
-
-# Restart container
-podman restart mentorme-postgres
-```
-
-### **"RabbitMQ not ready"**
-```bash
-# Check RabbitMQ is running
-podman ps | grep mentorme-rabbitmq
-
-# Access management UI
-open http://localhost:15672  # guest/guest
-
-# Restart container
-podman restart mentorme-rabbitmq
-```
-
----
-
-## Container Management
-
-### **View Logs**
-```bash
-podman logs mentorme-postgres
-podman logs mentorme-rabbitmq
-podman logs -f mentorme-postgres  # Follow mode
-```
-
-### **Stop Containers**
-```bash
-podman stop mentorme-postgres mentorme-rabbitmq
-```
-
-### **Remove Containers**
-```bash
-podman rm mentorme-postgres mentorme-rabbitmq
-```
-
-### **Restart Containers**
-```bash
-podman restart mentorme-postgres mentorme-rabbitmq
-```
-
-### **Check Running Containers**
-```bash
-podman ps
-```
-
----
-
-## Configuration Override
-
-### **Edit `.env` After Creation**
-Script creates `.env` from `.env.example` with localhost overrides. You can modify:
-
-```bash
-# Example: Use different CLIP model
-CLIP_MODEL=ViT-B/32  # Smaller, faster model (512D)
-
-# Example: Enable GPU
-CLIP_DEVICE=cuda
-
-# Example: Enable pgvector instead of FAISS
-USE_PGVECTOR=true
-```
-
-### **Environment Variables Priority**
-1. System environment variables (highest)
-2. `.env` file
-3. `config.py` defaults (lowest)
-
----
-
-## What This Script Does NOT Do
-
- **Does not start the application** - You must run `python app.py` and `uvicorn api:app`  
- **Does not seed reference images** - Use `./seeding/seed-data.sh` or `python seeding/seed_ref_images.py`  
- **Does not expose port 8000** - Only exposed when API is running  
- **Does not use Docker Compose** - Uses Podman containers directly  
-
----
-
-## Comparison: Script vs Docker Compose
-
-| Feature | This Script | Docker Compose |
-|---------|-------------|----------------|
-| **Tool** | Podman | Docker |
-| **Python App** | Runs on host | Runs in container |
-| **Development** |  Faster (direct edits) |  Requires rebuild |
-| **Debugging** |  Native debugger |  Remote debugging |
-| **Production** |  Not recommended |  Best practice |
-| **Dependencies** | Installed on host | Isolated in container |
-
----
-
-## Examples
-
-### **First-Time Setup**
-```bash
-# Complete automated setup
-./start-dev-env.sh --full-setup
-
-# Start worker
-source venv/bin/activate
-python app.py
-
-# In another terminal, start API
-source venv/bin/activate
-uvicorn api:app --host 0.0.0.0 --port 8000
-```
-
-### **Daily Development**
-```bash
-# Containers already exist, just start them
-podman start mentorme-postgres mentorme-rabbitmq
-
-# Activate venv and run
-source venv/bin/activate
-python app.py
-```
-
-### **Clean Restart**
-```bash
-# Stop and remove everything
-podman stop mentorme-postgres mentorme-rabbitmq
-podman rm mentorme-postgres mentorme-rabbitmq
-
-# Run script again
-./start-dev-env.sh --full-setup
-```
-
----
-
-## Next Steps After Setup
-
-1. **(Optional) Seed reference images:**
-   ```bash
-   ./seeding/seed-data.sh --ref-images
-   # Or directly: python seeding/seed_ref_images.py --directory data/reference_images
-   ```
-
-2. **Test the system:**
-   ```bash
-   python tests/simulation_e2e.py --vm-ip localhost \
-     --image https://example.com/test.jpg \
-     --student-id ST001 --assign-id A001
-   ```
-
-3. **Access API documentation:**
-   - OpenAPI: http://localhost:8000/docs
-   - ReDoc: http://localhost:8000/redoc
-
-4. **Monitor queues:**
-   - RabbitMQ UI: http://localhost:15672
-
----
-
-## Support
-
-- **Documentation:** See `DOCUMENTATION.md` for complete system documentation
-- **Issues:** Check logs in `logs/` directory
-- **Database:** Connect with any PostgreSQL client to `localhost:5432`
-
----
-
-**Last Updated:** November 6, 2025  
-**Version:** 1.0.0
+# Local Development Setup Scripts - Quick Guide
+
+## Overview
+Automated scripts to set up local development environment for MentorMe Plagiarism Detection System.
+
+---
+
+## Files
+- `start-dev-env.sh` - Bash script (Linux/macOS/Git Bash)
+- `start-dev-env.ps1` - PowerShell script (Windows)
+
+---
+
+## Quick Start
+
+### **Option 1: Complete Setup (Recommended)** 
+Everything in one command - infrastructure + Python environment + dependencies:
+
+```bash
+# Linux/macOS/Git Bash
+chmod +x start-dev-env.sh
+./start-dev-env.sh --full-setup
+
+# Windows PowerShell
+.\start-dev-env.ps1 --full-setup
+```
+
+**What it does:**
+- Creates PostgreSQL container (port 5432)
+- Creates RabbitMQ container (ports 5672, 15672)
+- Initializes database schema
+- Creates `.env` configuration file
+- Creates Python virtual environment
+- Installs all dependencies (~5-10 minutes)
+- Downloads CLIP model from HuggingFace (~3.5GB)
+- Verifies setup
+
+**Time:** ~10-15 minutes (first run)
+
+---
+
+### **Option 2: Infrastructure Only** (Default)
+Just containers + database, manual Python setup:
+
+```bash
+# Linux/macOS/Git Bash
+./start-dev-env.sh
+
+# Windows PowerShell
+.\start-dev-env.ps1
+```
+
+**What it does:**
+- Creates PostgreSQL + RabbitMQ containers
+- Initializes database
+- Creates `.env` file
+
+**Then manually:**
+```bash
+python -m venv venv
+source venv/bin/activate  # Linux/macOS
+# OR
+.\venv\Scripts\Activate.ps1  # Windows
+
+pip install -r requirements.txt
+```
+
+---
+
+### **Option 3: With Wheelhouse** (Offline Installs)
+Pre-compile dependencies for faster/offline installs:
+
+```bash
+# Linux/macOS/Git Bash
+./start-dev-env.sh --build-wheelhouse
+
+# Then install offline:
+pip install --no-index --find-links=wheelhouse -r requirements.txt
+```
+
+---
+
+## Available Flags
+
+| Flag | Description |
+|------|-------------|
+| `--full-setup` | Complete setup: infrastructure + Python + dependencies |
+| `--build-wheelhouse` | Build wheelhouse for offline dependency installation |
+| (no flags) | Infrastructure only (containers + database) |
+
+---
+
+## What Gets Created
+
+### **Infrastructure**
+| Service | Port | Access | Credentials |
+|---------|------|--------|-------------|
+| PostgreSQL | 5432 | localhost:5432 | postgres/postgres |
+| RabbitMQ (AMQP) | 5672 | localhost:5672 | admin/admin123 |
+| RabbitMQ (Management UI) | 15672 | http://localhost:15672 | guest/guest |
+
+### **Files**
+- `.env` - Environment configuration (from `.env.example`)
+- `venv/` - Python virtual environment (if `--full-setup`)
+- `data/` - Data directories (reference_images, temp_images)
+- `models/` - Model cache directory
+- `logs/` - Application logs directory
+
+### **Database**
+- Database: `plagiarism_db`
+- Tables: `submissions`, `reference_images`, `feedback_logs`
+- Extension: pgvector
+- Indexes: B-tree, HNSW vector indexes
+
+---
+
+## After Setup
+
+### **Start the Application**
+
+**Terminal 1 - Worker:**
+```bash
+source venv/bin/activate  # Linux/macOS
+# OR
+.\venv\Scripts\Activate.ps1  # Windows
+
+python app.py
+```
+
+**Terminal 2 - API Server:**
+```bash
+source venv/bin/activate
+
+uvicorn api:app --reload --host 0.0.0.0 --port 8000
+```
+
+**Access API:** http://localhost:8000/docs
+
+---
+
+## Prerequisites
+
+### **Required**
+- **Podman** (or Docker) - Container runtime
+- **Python 3.10+** - Application runtime
+- **8GB+ RAM** - For CLIP model
+- **10GB+ disk** - For dependencies and models
+
+### **Optional**
+- **CUDA GPU** - For faster CLIP inference (10x speedup)
+- **curl** - For health checks
+
+---
+
+## Troubleshooting
+
+### **"Podman not found"**
+```bash
+# Install Podman: https://podman.io/getting-started/installation
+```
+
+### **"Python not found"**
+```bash
+# Install Python 3.10+: https://www.python.org/downloads/
+```
+
+### **"Port already in use"**
+```bash
+# Stop existing containers
+podman stop mentorme-postgres mentorme-rabbitmq
+podman rm mentorme-postgres mentorme-rabbitmq
+
+# Or change ports in script (POSTGRES_PORT, RABBITMQ_PORT)
+```
+
+### **"Database connection failed"**
+```bash
+# Check PostgreSQL is running
+podman ps | grep mentorme-postgres
+
+# Check logs
+podman logs mentorme-postgres
+
+# Restart container
+podman restart mentorme-postgres
+```
+
+### **"RabbitMQ not ready"**
+```bash
+# Check RabbitMQ is running
+podman ps | grep mentorme-rabbitmq
+
+# Access management UI
+open http://localhost:15672  # guest/guest
+
+# Restart container
+podman restart mentorme-rabbitmq
+```
+
+---
+
+## Container Management
+
+### **View Logs**
+```bash
+podman logs mentorme-postgres
+podman logs mentorme-rabbitmq
+podman logs -f mentorme-postgres  # Follow mode
+```
+
+### **Stop Containers**
+```bash
+podman stop mentorme-postgres mentorme-rabbitmq
+```
+
+### **Remove Containers**
+```bash
+podman rm mentorme-postgres mentorme-rabbitmq
+```
+
+### **Restart Containers**
+```bash
+podman restart mentorme-postgres mentorme-rabbitmq
+```
+
+### **Check Running Containers**
+```bash
+podman ps
+```
+
+---
+
+## Configuration Override
+
+### **Edit `.env` After Creation**
+Script creates `.env` from `.env.example` with localhost overrides. You can modify:
+
+```bash
+# Example: Use different CLIP model
+CLIP_MODEL=ViT-B/32  # Smaller, faster model (512D)
+
+# Example: Enable GPU
+CLIP_DEVICE=cuda
+
+# Example: Enable pgvector instead of FAISS
+USE_PGVECTOR=true
+```
+
+### **Environment Variables Priority**
+1. System environment variables (highest)
+2. `.env` file
+3. `config.py` defaults (lowest)
+
+---
+
+## What This Script Does NOT Do
+
+ **Does not start the application** - You must run `python app.py` and `uvicorn api:app`  
+ **Does not seed reference images** - Use `./seeding/seed-data.sh` or `python seeding/seed_ref_images.py`  
+ **Does not expose port 8000** - Only exposed when API is running  
+ **Does not use Docker Compose** - Uses Podman containers directly  
+
+---
+
+## Comparison: Script vs Docker Compose
+
+| Feature | This Script | Docker Compose |
+|---------|-------------|----------------|
+| **Tool** | Podman | Docker |
+| **Python App** | Runs on host | Runs in container |
+| **Development** |  Faster (direct edits) |  Requires rebuild |
+| **Debugging** |  Native debugger |  Remote debugging |
+| **Production** |  Not recommended |  Best practice |
+| **Dependencies** | Installed on host | Isolated in container |
+
+---
+
+## Examples
+
+### **First-Time Setup**
+```bash
+# Complete automated setup
+./start-dev-env.sh --full-setup
+
+# Start worker
+source venv/bin/activate
+python app.py
+
+# In another terminal, start API
+source venv/bin/activate
+uvicorn api:app --host 0.0.0.0 --port 8000
+```
+
+### **Daily Development**
+```bash
+# Containers already exist, just start them
+podman start mentorme-postgres mentorme-rabbitmq
+
+# Activate venv and run
+source venv/bin/activate
+python app.py
+```
+
+### **Clean Restart**
+```bash
+# Stop and remove everything
+podman stop mentorme-postgres mentorme-rabbitmq
+podman rm mentorme-postgres mentorme-rabbitmq
+
+# Run script again
+./start-dev-env.sh --full-setup
+```
+
+---
+
+## Next Steps After Setup
+
+1. **(Optional) Seed reference images:**
+   ```bash
+   ./seeding/seed-data.sh --ref-images
+   # Or directly: python seeding/seed_ref_images.py --directory data/reference_images
+   ```
+
+2. **Test the system:**
+   ```bash
+   python tests/simulation_e2e.py --vm-ip localhost \
+     --image https://example.com/test.jpg \
+     --student-id ST001 --assign-id A001
+   ```
+
+3. **Access API documentation:**
+   - OpenAPI: http://localhost:8000/docs
+   - ReDoc: http://localhost:8000/redoc
+
+4. **Monitor queues:**
+   - RabbitMQ UI: http://localhost:15672
+
+---
+
+## Support
+
+- **Documentation:** See `DOCUMENTATION.md` for complete system documentation
+- **Issues:** Check logs in `logs/` directory
+- **Database:** Connect with any PostgreSQL client to `localhost:5432`
+
+---
+
+**Last Updated:** November 6, 2025  
+**Version:** 1.0.0
diff --git a/start-dev-env.sh b/start-dev-env.sh
old mode 100644
new mode 100755
index 298f205..15a9729
--- a/start-dev-env.sh
+++ b/start-dev-env.sh
@@ -1,472 +1,472 @@
-#!/usr/bin/env bash
-# Local Development Startup Script
-# Starts PostgreSQL and RabbitMQ containers using docker-compose
-
-set -e
-FULL_SETUP=0
-START_API=0
-COMPOSE_FILE="docker-compose-dev.yml"
-
-while [[ "$#" -gt 0 ]]; do
-    case "$1" in
-        --full-setup) FULL_SETUP=1; shift ;;
-        --with-api) START_API=1; shift ;;
-        --prod) COMPOSE_FILE="docker-compose-prod.yml"; shift ;;
-        --dev) COMPOSE_FILE="docker-compose-dev.yml"; shift ;;
-        *) break ;;
-    esac
-done
-
-# Detect platform
-OS_TYPE="unknown"
-UNAME_OUT=$(uname -s 2>/dev/null || true)
-case "${UNAME_OUT}" in
-  MINGW*|MSYS*|CYGWIN*) OS_TYPE="windows-msys" ;;
-  Darwin) OS_TYPE="macos" ;;
-  Linux) OS_TYPE="linux" ;;
-  *) OS_TYPE="unix" ;;
-esac
-
-# Detect Python executable
-PY=""
-if [ "${OS_TYPE}" = "windows-msys" ]; then
-    PY_CANDIDATES=("py" "python3" "python" "python.exe")
-else
-    PY_CANDIDATES=("python3" "python" "py" "python.exe")
-fi
-
-for candidate in "${PY_CANDIDATES[@]}"; do
-    if [ "$candidate" = "py" ]; then
-        if command -v py >/dev/null 2>&1; then
-            if py -3 -c "import sys; sys.stdout.write('Python found!!\n')" 2>/dev/null; then
-                PY='py -3'
-                break
-            fi
-        fi
-        continue
-    fi
-
-    candidate_path=$(command -v "$candidate" 2>/dev/null || true)
-    if [ -n "$candidate_path" ]; then
-        case "$candidate_path" in
-            *WindowsApps*|*windowsapps*) continue ;;
-        esac
-
-        if "$candidate" -c "import sys; sys.stdout.write('ok')" 2>/dev/null; then
-            PY="$candidate"
-            break
-        fi
-    fi
-done
-
-if [ -z "$PY" ]; then
-    echo "ERROR: No Python 3.10+ found. Install Python or ensure it's in PATH." >&2
-    exit 1
-fi
-
-# Detect container runtime (Podman or Docker)
-CONTAINER_CMD=""
-COMPOSE_CMD=""
-if command -v podman &> /dev/null; then
-    CONTAINER_CMD="podman"
-    if command -v podman-compose &> /dev/null; then
-        COMPOSE_CMD="podman-compose"
-    else
-        echo "ERROR: podman-compose not found. Install it:" >&2
-        echo "  pip install podman-compose" >&2
-        exit 1
-    fi
-elif command -v docker &> /dev/null; then
-    CONTAINER_CMD="docker"
-    if command -v docker-compose &> /dev/null; then
-        COMPOSE_CMD="docker-compose"
-    elif docker compose version &> /dev/null; then
-        COMPOSE_CMD="docker compose"
-    else
-        echo "ERROR: docker-compose not found. Install it:" >&2
-        echo "  https://docs.docker.com/compose/install/" >&2
-        exit 1
-    fi
-else
-    echo "ERROR: Neither Podman nor Docker found. Install one of them:" >&2
-    echo "  Podman: https://podman.io/getting-started/installation" >&2
-    echo "  Docker: https://docs.docker.com/get-docker/" >&2
-    exit 1
-fi
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-CYAN='\033[0;36m'
-GRAY='\033[0;37m'
-NC='\033[0m'
-
-echo -e "${CYAN}=================================================================="
-echo -e "  MentorMe Plagiarism Checker - Local Development Setup"
-echo -e "  Container Runtime: ${CONTAINER_CMD}"
-echo -e "  Compose File: ${COMPOSE_FILE}"
-echo -e "==================================================================${NC}"
-echo ""
-
-# Validate compose file exists
-if [ ! -f "$COMPOSE_FILE" ]; then
-    echo -e "${RED}ERROR: $COMPOSE_FILE not found${NC}"
-    exit 1
-fi
-
-# Load configuration from .env if it exists
-if [ -f ".env" ]; then
-    set -a
-    source <(grep -v '^#' .env | grep -v '^$' | sed 's/\r$//')
-    set +a
-fi
-
-# Configuration (with defaults)
-POSTGRES_CONTAINER="mentorme-plagiarism-postgres"
-RABBITMQ_CONTAINER="mentorme-plagiarism-rabbitmq"
-POSTGRES_PORT="${POSTGRES_PORT:-5432}"
-RABBITMQ_PORT="${RABBITMQ_PORT:-5672}"
-RABBITMQ_MGMT_PORT="${RABBITMQ_MANAGEMENT_PORT:-15672}"
-POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-postgres}"
-POSTGRES_DB="${POSTGRES_DB:-plagiarism_db}"
-POSTGRES_USER="${POSTGRES_USER:-postgres}"
-RABBITMQ_USER="${RABBITMQ_USER:-admin}"
-RABBITMQ_PASS="${RABBITMQ_PASS:-admin123}"
-CLIP_MODEL_URL="${CLIP_MODEL_URL:-https://huggingface.co/laion/CLIP-ViT-L-14-laion2B-s32B-b82K/resolve/main/open_clip_pytorch_model.bin}"
-
-stop_existing_containers() {
-    local containers_exist=false
-    
-    # Check if compose stack is running
-    if $COMPOSE_CMD -f $COMPOSE_FILE ps 2>/dev/null | grep -q "Up\|running"; then
-        containers_exist=true
-    fi
-    
-    if [ "$containers_exist" = true ]; then
-        echo -e "${YELLOW}Existing containers found:${NC}"
-        $COMPOSE_CMD -f $COMPOSE_FILE ps
-        echo ""
-        echo -e "${YELLOW}This will stop and remove existing containers.${NC}"
-        read -p "Continue? (y/N): " -n 1 -r
-        echo
-        
-        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
-            echo -e "${RED}Aborted by user${NC}"
-            # exit 0
-        else
-            echo -e "${CYAN}Stopping existing containers...${NC}"
-            $COMPOSE_CMD -f $COMPOSE_FILE down
-        fi   
-    fi
-    
-    echo -e "${GREEN}[OK] Ready to start containers${NC}"
-}
-
-start_containers() {
-    echo -e "${CYAN}Starting containers with $COMPOSE_FILE...${NC}"
-    
-    # Determine which services to start
-    #local services="postgres rabbitmq pgadmin plagiarism-checker"
-    local services="postgres rabbitmq plagiarism-checker"
-    
-    if [ "$START_API" -eq 1 ]; then
-        services="$services api"
-        echo -e "${CYAN}Including API service${NC}"
-        
-        # Force rebuild API container to ensure it uses Dockerfile.api (not Dockerfile)
-        echo -e "${YELLOW}Rebuilding API container with Dockerfile.api...${NC}"
-        $COMPOSE_CMD -f $COMPOSE_FILE build --no-cache api
-        
-        if [ $? -ne 0 ]; then
-            echo -e "${RED}ERROR: Failed to build API container${NC}"
-            exit 1
-        fi
-        echo -e "${GREEN}[OK] API container rebuilt${NC}"
-    fi
-    
-    $COMPOSE_CMD -f $COMPOSE_FILE up -d $services
-    
-    if [ $? -ne 0 ]; then
-        echo -e "${RED}ERROR: Failed to start containers${NC}"
-        exit 1
-    fi
-    
-    echo -e "${GREEN}[OK] Containers started${NC}"
-}
-
-wait_for_postgres() {
-    echo -e "${YELLOW}Waiting for PostgreSQL...${NC}"
-    
-    local max_attempts=30
-    local attempt=0
-    
-    while [ $attempt -lt $max_attempts ]; do
-        attempt=$((attempt + 1))
-        
-        if $CONTAINER_CMD exec $POSTGRES_CONTAINER pg_isready -U $POSTGRES_USER &>/dev/null; then
-            echo -e "${GREEN}[OK] PostgreSQL ready${NC}"
-            return 0
-        fi
-        
-        sleep 2
-    done
-    
-    echo -e "${RED}ERROR: PostgreSQL timeout${NC}"
-    exit 1
-}
-
-wait_for_rabbitmq() {
-    echo -e "${YELLOW}Waiting for RabbitMQ...${NC}"
-    
-    local max_attempts=30
-    local attempt=0
-    
-    while [ $attempt -lt $max_attempts ]; do
-        attempt=$((attempt + 1))
-        
-        if curl -s -f http://localhost:$RABBITMQ_MGMT_PORT &>/dev/null; then
-            echo -e "${GREEN}[OK] RabbitMQ ready${NC}"
-            return 0
-        fi
-        
-        sleep 2
-    done
-    
-    echo -e "${RED}ERROR: RabbitMQ timeout${NC}"
-    exit 1
-}
-
-wait_for_api() {
-    if [ "$START_API" -ne 1 ]; then
-        return 0
-    fi
-    
-    echo -e "${YELLOW}Waiting for API service...${NC}"
-    
-    local max_attempts=30
-    local attempt=0
-    
-    while [ $attempt -lt $max_attempts ]; do
-        attempt=$((attempt + 1))
-        
-        if curl -s -f http://localhost:8000/health &>/dev/null; then
-            echo -e "${GREEN}[OK] API service ready${NC}"
-            return 0
-        fi
-        
-        sleep 2
-    done
-    
-    echo -e "${RED}ERROR: API service timeout${NC}"
-    echo -e "${YELLOW}Checking API container logs:${NC}"
-    $COMPOSE_CMD -f $COMPOSE_FILE logs --tail=50 api
-    exit 1
-}
-
-initialize_database() {
-    if [ ! -f "database/init.sql" ]; then
-        echo -e "${YELLOW}WARNING: database/init.sql not found${NC}"
-        return
-    fi
-    
-    echo -e "${CYAN}Initializing database...${NC}"
-    
-    # Run init.sql
-    if $CONTAINER_CMD exec -i $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB < database/init.sql 2>/dev/null; then
-        echo -e "${GREEN}[OK] Database schema initialized${NC}"
-    else
-        echo -e "${GRAY}Database schema already exists${NC}"
-    fi
-    
-    # Create migrations tracking table if it doesn't exist
-    $CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -c "
-        CREATE TABLE IF NOT EXISTS schema_migrations (
-            id SERIAL PRIMARY KEY,
-            migration_name VARCHAR(255) UNIQUE NOT NULL,
-            applied_at TIMESTAMP DEFAULT NOW()
-        );
-    " 2>/dev/null
-    
-    # Run migration scripts
-    if [ -d "database/migrations" ]; then
-        local migration_count=0
-        for migration_file in database/migrations/*.sql; do
-            if [ -f "$migration_file" ]; then
-                local migration_name=$(basename "$migration_file")
-                
-                # Check if migration already applied
-                local already_applied=$($CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -t -c "
-                    SELECT COUNT(*) FROM schema_migrations WHERE migration_name = '$migration_name';
-                " 2>/dev/null | tr -d '[:space:]')
-                
-                if [ "$already_applied" = "0" ]; then
-                    echo -e "${CYAN}Applying migration: $migration_name${NC}"
-                    
-                    if $CONTAINER_CMD exec -i $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB < "$migration_file" 2>/dev/null; then
-                        # Record migration as applied
-                        $CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -c "
-                            INSERT INTO schema_migrations (migration_name) VALUES ('$migration_name');
-                        " 2>/dev/null
-                        echo -e "${GREEN}[OK] Applied: $migration_name${NC}"
-                        migration_count=$((migration_count + 1))
-                    else
-                        echo -e "${YELLOW}WARNING: Failed to apply $migration_name${NC}"
-                    fi
-                fi
-            fi
-        done
-        
-        if [ $migration_count -eq 0 ]; then
-            echo -e "${GRAY}All migrations already applied${NC}"
-        else
-            echo -e "${GREEN}[OK] Applied $migration_count migration(s)${NC}"
-        fi
-    fi
-}
-
-create_env_file() {
-    if [ -f ".env" ]; then
-        echo -e "${GREEN}[OK] .env exists${NC}"
-        return
-    fi
-    
-    if [ ! -f ".env.example" ]; then
-        echo -e "${RED}ERROR: .env.example not found${NC}"
-        exit 1
-    fi
-    
-    echo -e "${CYAN}Creating .env from template...${NC}"
-    cp .env.example .env
-    
-    # Keep service names for docker-compose (containers communicate via service names)
-    # No transformation needed - .env.example already has correct service names
-    
-    echo -e "${GREEN}[OK] .env created${NC}"
-}
-
-show_summary() {
-    echo -e "\n${CYAN}=================================================================="
-    echo -e "  Environment Ready!"
-    echo -e "==================================================================${NC}"
-    echo ""
-    echo -e "${GREEN}✓ Services Running:${NC}"
-    echo -e "  PostgreSQL:  localhost:$POSTGRES_PORT (with pgvector)"
-    echo -e "  RabbitMQ:    localhost:$RABBITMQ_PORT"
-    echo -e "  RabbitMQ UI: http://localhost:$RABBITMQ_MGMT_PORT ($RABBITMQ_USER/$RABBITMQ_PASS)"
-    
-    if [ "$START_API" -eq 1 ]; then
-        echo -e "  API:         http://localhost:8000"
-        echo -e "  API Docs:    http://localhost:8000/docs"
-    fi
-    
-    echo ""
-    echo -e "${CYAN}💡 Quick Start:${NC}"
-    echo -e "  ${YELLOW}./start-dev-env.sh --full-setup${NC}       ${GRAY}# Development mode (default)${NC}"
-    echo -e "  ${YELLOW}./start-dev-env.sh --with-api${NC}         ${GRAY}# Include API container${NC}"
-    echo -e "  ${YELLOW}./start-dev-env.sh --prod${NC}              ${GRAY}# Production mode${NC}"
-    echo ""
-    echo -e "${CYAN}📋 Manual Setup:${NC}"
-    echo -e "  1. ${YELLOW}${PY} -m venv venv${NC}"
-
-    case "${OS_TYPE}" in
-        windows-msys) echo -e "     ${YELLOW}source venv/Scripts/activate${NC}" ;;
-        *) echo -e "     ${YELLOW}source venv/bin/activate${NC}" ;;
-    esac
-
-    echo -e "  2. ${YELLOW}${PY} -m pip install -r requirements.txt${NC}"
-    echo -e "  3. ${YELLOW}${PY} app.py${NC}"
-    echo ""
-    echo -e "${CYAN}🔧 Container Commands:${NC}"
-    echo -e "  ${GRAY}Logs:   $COMPOSE_CMD -f $COMPOSE_FILE logs -f${NC}"
-    echo -e "  ${GRAY}Stop:   $COMPOSE_CMD -f $COMPOSE_FILE stop${NC}"
-    echo -e "  ${GRAY}Remove: $COMPOSE_CMD -f $COMPOSE_FILE down${NC}"
-    echo ""
-    echo -e "${CYAN}==================================================================${NC}"
-}
-
-main() {
-    create_env_file
-    stop_existing_containers
-    start_containers
-    wait_for_postgres
-    wait_for_rabbitmq
-    wait_for_api
-    initialize_database
-    show_summary
-    echo -e "\n${GRAY}Containers running in background. Press Ctrl+C to exit this script.${NC}"
-}
-
-setup_python_environment() {
-    echo -e "\n${CYAN}=================================================================="
-    echo -e "  Full Setup: Python Environment"
-    echo -e "==================================================================${NC}"
-    
-    if [ -d "venv" ]; then
-        echo -e "${GRAY}Virtual environment exists${NC}"
-    else
-        echo -e "${CYAN}Creating virtual environment...${NC}"
-        $PY -m venv venv
-        echo -e "${GREEN}[OK] venv created${NC}"
-    fi
-    
-    echo -e "${CYAN}Activating virtual environment...${NC}"
-    if [ "${OS_TYPE}" = "windows-msys" ]; then
-        source venv/Scripts/activate
-    else
-        source venv/bin/activate
-    fi
-    
-    echo -e "${CYAN}Installing dependencies (5-10 minutes)...${NC}"
-    python -m pip install --upgrade pip setuptools wheel
-    python -m pip install -r requirements.txt
-    echo -e "${GREEN}[OK] Dependencies installed${NC}"
-    
-    echo -e "${CYAN}Creating directories...${NC}"
-    mkdir -p data/reference_images data/models/clip logs
-    echo -e "${GREEN}[OK] Directories created${NC}"
-    
-    # Download CLIP model using curl
-    echo -e "${CYAN}Checking CLIP model...${NC}"
-    if [ ! -f "data/models/clip/open_clip_pytorch_model.bin" ]; then
-        echo -e "${YELLOW}Downloading CLIP model (this may take a while)...${NC}"
-        curl -L -o data/models/clip/open_clip_pytorch_model.bin \
-            ${CLIP_MODEL_URL} || {
-            echo -e "${YELLOW}WARNING: CLIP model download failed, will download on first run${NC}"
-        }
-        
-        if [ -f "data/models/clip/open_clip_pytorch_model.bin" ]; then
-            echo -e "${GREEN}[OK] CLIP model downloaded${NC}"
-        fi
-    else
-        echo -e "${GRAY}CLIP model already exists${NC}"
-    fi
-    
-    echo -e "${CYAN}Verifying environment...${NC}"
-    python -c "
-import open_clip, asyncpg, aio_pika, PIL, imagehash
-print('✓ All imports successful')
-" || {
-        echo -e "${RED}ERROR: Environment verification failed${NC}"
-        exit 1
-    }
-    
-    echo -e "\n${CYAN}=================================================================="
-    echo -e "  Setup Complete!"
-    echo -e "==================================================================${NC}"
-    echo -e "\n${GREEN}✓ Next Steps:${NC}"
-    echo -e "\n${CYAN}Terminal 1 - Worker:${NC}"
-    [ "${OS_TYPE}" = "windows-msys" ] && echo -e "  ${YELLOW}source venv/Scripts/activate${NC}" || echo -e "  ${YELLOW}source venv/bin/activate${NC}"
-    echo -e "  ${YELLOW}python app.py${NC}"
-    echo -e "\n${CYAN}Terminal 2 - API:${NC}"
-    [ "${OS_TYPE}" = "windows-msys" ] && echo -e "  ${YELLOW}source venv/Scripts/activate${NC}" || echo -e "  ${YELLOW}source venv/bin/activate${NC}"
-    echo -e "  ${YELLOW}cd api && uvicorn api:app --reload --host 0.0.0.0 --port 8000${NC}"
-    echo -e "\n${CYAN}API Docs:${NC} ${YELLOW}http://localhost:8000/docs${NC}"
-    echo ""
-}
-
-main
-
-if [ "$FULL_SETUP" -eq 1 ]; then
-    setup_python_environment
-fi
+#!/usr/bin/env bash
+# Local Development Startup Script
+# Starts PostgreSQL and RabbitMQ containers using docker-compose
+
+set -e
+FULL_SETUP=0
+START_API=0
+COMPOSE_FILE="docker-compose-dev.yml"
+
+while [[ "$#" -gt 0 ]]; do
+    case "$1" in
+        --full-setup) FULL_SETUP=1; shift ;;
+        --with-api) START_API=1; shift ;;
+        --prod) COMPOSE_FILE="docker-compose-prod.yml"; shift ;;
+        --dev) COMPOSE_FILE="docker-compose-dev.yml"; shift ;;
+        *) break ;;
+    esac
+done
+
+# Detect platform
+OS_TYPE="unknown"
+UNAME_OUT=$(uname -s 2>/dev/null || true)
+case "${UNAME_OUT}" in
+  MINGW*|MSYS*|CYGWIN*) OS_TYPE="windows-msys" ;;
+  Darwin) OS_TYPE="macos" ;;
+  Linux) OS_TYPE="linux" ;;
+  *) OS_TYPE="unix" ;;
+esac
+
+# Detect Python executable
+PY=""
+if [ "${OS_TYPE}" = "windows-msys" ]; then
+    PY_CANDIDATES=("py" "python3" "python" "python.exe")
+else
+    PY_CANDIDATES=("python3" "python" "py" "python.exe")
+fi
+
+for candidate in "${PY_CANDIDATES[@]}"; do
+    if [ "$candidate" = "py" ]; then
+        if command -v py >/dev/null 2>&1; then
+            if py -3 -c "import sys; sys.stdout.write('Python found!!\n')" 2>/dev/null; then
+                PY='py -3'
+                break
+            fi
+        fi
+        continue
+    fi
+
+    candidate_path=$(command -v "$candidate" 2>/dev/null || true)
+    if [ -n "$candidate_path" ]; then
+        case "$candidate_path" in
+            *WindowsApps*|*windowsapps*) continue ;;
+        esac
+
+        if "$candidate" -c "import sys; sys.stdout.write('ok')" 2>/dev/null; then
+            PY="$candidate"
+            break
+        fi
+    fi
+done
+
+if [ -z "$PY" ]; then
+    echo "ERROR: No Python 3.10+ found. Install Python or ensure it's in PATH." >&2
+    exit 1
+fi
+
+# Detect container runtime (Podman or Docker)
+CONTAINER_CMD=""
+COMPOSE_CMD=""
+if command -v podman &> /dev/null; then
+    CONTAINER_CMD="podman"
+    if command -v podman-compose &> /dev/null; then
+        COMPOSE_CMD="podman-compose"
+    else
+        echo "ERROR: podman-compose not found. Install it:" >&2
+        echo "  pip install podman-compose" >&2
+        exit 1
+    fi
+elif command -v docker &> /dev/null; then
+    CONTAINER_CMD="docker"
+    if command -v docker-compose &> /dev/null; then
+        COMPOSE_CMD="docker-compose"
+    elif docker compose version &> /dev/null; then
+        COMPOSE_CMD="docker compose"
+    else
+        echo "ERROR: docker-compose not found. Install it:" >&2
+        echo "  https://docs.docker.com/compose/install/" >&2
+        exit 1
+    fi
+else
+    echo "ERROR: Neither Podman nor Docker found. Install one of them:" >&2
+    echo "  Podman: https://podman.io/getting-started/installation" >&2
+    echo "  Docker: https://docs.docker.com/get-docker/" >&2
+    exit 1
+fi
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+GRAY='\033[0;37m'
+NC='\033[0m'
+
+echo -e "${CYAN}=================================================================="
+echo -e "  MentorMe Plagiarism Checker - Local Development Setup"
+echo -e "  Container Runtime: ${CONTAINER_CMD}"
+echo -e "  Compose File: ${COMPOSE_FILE}"
+echo -e "==================================================================${NC}"
+echo ""
+
+# Validate compose file exists
+if [ ! -f "$COMPOSE_FILE" ]; then
+    echo -e "${RED}ERROR: $COMPOSE_FILE not found${NC}"
+    exit 1
+fi
+
+# Load configuration from .env if it exists
+if [ -f ".env" ]; then
+    set -a
+    source <(grep -v '^#' .env | grep -v '^$' | sed 's/\r$//')
+    set +a
+fi
+
+# Configuration (with defaults)
+POSTGRES_CONTAINER="mentorme-plagiarism-postgres"
+RABBITMQ_CONTAINER="mentorme-plagiarism-rabbitmq"
+POSTGRES_PORT="${POSTGRES_PORT:-5432}"
+RABBITMQ_PORT="${RABBITMQ_PORT:-5672}"
+RABBITMQ_MGMT_PORT="${RABBITMQ_MANAGEMENT_PORT:-15672}"
+POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-postgres}"
+POSTGRES_DB="${POSTGRES_DB:-plagiarism_db}"
+POSTGRES_USER="${POSTGRES_USER:-postgres}"
+RABBITMQ_USER="${RABBITMQ_USER:-admin}"
+RABBITMQ_PASS="${RABBITMQ_PASS:-admin123}"
+CLIP_MODEL_URL="${CLIP_MODEL_URL:-https://huggingface.co/laion/CLIP-ViT-L-14-laion2B-s32B-b82K/resolve/main/open_clip_pytorch_model.bin}"
+
+stop_existing_containers() {
+    local containers_exist=false
+    
+    # Check if compose stack is running
+    if $COMPOSE_CMD -f $COMPOSE_FILE ps 2>/dev/null | grep -q "Up\|running"; then
+        containers_exist=true
+    fi
+    
+    if [ "$containers_exist" = true ]; then
+        echo -e "${YELLOW}Existing containers found:${NC}"
+        $COMPOSE_CMD -f $COMPOSE_FILE ps
+        echo ""
+        echo -e "${YELLOW}This will stop and remove existing containers.${NC}"
+        read -p "Continue? (y/N): " -n 1 -r
+        echo
+        
+        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+            echo -e "${RED}Aborted by user${NC}"
+            # exit 0
+        else
+            echo -e "${CYAN}Stopping existing containers...${NC}"
+            $COMPOSE_CMD -f $COMPOSE_FILE down
+        fi   
+    fi
+    
+    echo -e "${GREEN}[OK] Ready to start containers${NC}"
+}
+
+start_containers() {
+    echo -e "${CYAN}Starting containers with $COMPOSE_FILE...${NC}"
+    
+    # Determine which services to start
+    #local services="postgres rabbitmq pgadmin plagiarism-checker"
+    local services="postgres rabbitmq plagiarism-checker"
+    
+    if [ "$START_API" -eq 1 ]; then
+        services="$services api"
+        echo -e "${CYAN}Including API service${NC}"
+        
+        # Force rebuild API container to ensure it uses Dockerfile.api (not Dockerfile)
+        echo -e "${YELLOW}Rebuilding API container with Dockerfile.api...${NC}"
+        $COMPOSE_CMD -f $COMPOSE_FILE build --no-cache api
+        
+        if [ $? -ne 0 ]; then
+            echo -e "${RED}ERROR: Failed to build API container${NC}"
+            exit 1
+        fi
+        echo -e "${GREEN}[OK] API container rebuilt${NC}"
+    fi
+    
+    $COMPOSE_CMD -f $COMPOSE_FILE up -d $services
+    
+    if [ $? -ne 0 ]; then
+        echo -e "${RED}ERROR: Failed to start containers${NC}"
+        exit 1
+    fi
+    
+    echo -e "${GREEN}[OK] Containers started${NC}"
+}
+
+wait_for_postgres() {
+    echo -e "${YELLOW}Waiting for PostgreSQL...${NC}"
+    
+    local max_attempts=30
+    local attempt=0
+    
+    while [ $attempt -lt $max_attempts ]; do
+        attempt=$((attempt + 1))
+        
+        if $CONTAINER_CMD exec $POSTGRES_CONTAINER pg_isready -U $POSTGRES_USER &>/dev/null; then
+            echo -e "${GREEN}[OK] PostgreSQL ready${NC}"
+            return 0
+        fi
+        
+        sleep 2
+    done
+    
+    echo -e "${RED}ERROR: PostgreSQL timeout${NC}"
+    exit 1
+}
+
+wait_for_rabbitmq() {
+    echo -e "${YELLOW}Waiting for RabbitMQ...${NC}"
+    
+    local max_attempts=30
+    local attempt=0
+    
+    while [ $attempt -lt $max_attempts ]; do
+        attempt=$((attempt + 1))
+        
+        if curl -s -f http://localhost:$RABBITMQ_MGMT_PORT &>/dev/null; then
+            echo -e "${GREEN}[OK] RabbitMQ ready${NC}"
+            return 0
+        fi
+        
+        sleep 2
+    done
+    
+    echo -e "${RED}ERROR: RabbitMQ timeout${NC}"
+    exit 1
+}
+
+wait_for_api() {
+    if [ "$START_API" -ne 1 ]; then
+        return 0
+    fi
+    
+    echo -e "${YELLOW}Waiting for API service...${NC}"
+    
+    local max_attempts=30
+    local attempt=0
+    
+    while [ $attempt -lt $max_attempts ]; do
+        attempt=$((attempt + 1))
+        
+        if curl -s -f http://localhost:8000/health &>/dev/null; then
+            echo -e "${GREEN}[OK] API service ready${NC}"
+            return 0
+        fi
+        
+        sleep 2
+    done
+    
+    echo -e "${RED}ERROR: API service timeout${NC}"
+    echo -e "${YELLOW}Checking API container logs:${NC}"
+    $COMPOSE_CMD -f $COMPOSE_FILE logs --tail=50 api
+    exit 1
+}
+
+initialize_database() {
+    if [ ! -f "database/init.sql" ]; then
+        echo -e "${YELLOW}WARNING: database/init.sql not found${NC}"
+        return
+    fi
+    
+    echo -e "${CYAN}Initializing database...${NC}"
+    
+    # Run init.sql
+    if $CONTAINER_CMD exec -i $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB < database/init.sql 2>/dev/null; then
+        echo -e "${GREEN}[OK] Database schema initialized${NC}"
+    else
+        echo -e "${GRAY}Database schema already exists${NC}"
+    fi
+    
+    # Create migrations tracking table if it doesn't exist
+    $CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -c "
+        CREATE TABLE IF NOT EXISTS schema_migrations (
+            id SERIAL PRIMARY KEY,
+            migration_name VARCHAR(255) UNIQUE NOT NULL,
+            applied_at TIMESTAMP DEFAULT NOW()
+        );
+    " 2>/dev/null
+    
+    # Run migration scripts
+    if [ -d "database/migrations" ]; then
+        local migration_count=0
+        for migration_file in database/migrations/*.sql; do
+            if [ -f "$migration_file" ]; then
+                local migration_name=$(basename "$migration_file")
+                
+                # Check if migration already applied
+                local already_applied=$($CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -t -c "
+                    SELECT COUNT(*) FROM schema_migrations WHERE migration_name = '$migration_name';
+                " 2>/dev/null | tr -d '[:space:]')
+                
+                if [ "$already_applied" = "0" ]; then
+                    echo -e "${CYAN}Applying migration: $migration_name${NC}"
+                    
+                    if $CONTAINER_CMD exec -i $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB < "$migration_file" 2>/dev/null; then
+                        # Record migration as applied
+                        $CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -c "
+                            INSERT INTO schema_migrations (migration_name) VALUES ('$migration_name');
+                        " 2>/dev/null
+                        echo -e "${GREEN}[OK] Applied: $migration_name${NC}"
+                        migration_count=$((migration_count + 1))
+                    else
+                        echo -e "${YELLOW}WARNING: Failed to apply $migration_name${NC}"
+                    fi
+                fi
+            fi
+        done
+        
+        if [ $migration_count -eq 0 ]; then
+            echo -e "${GRAY}All migrations already applied${NC}"
+        else
+            echo -e "${GREEN}[OK] Applied $migration_count migration(s)${NC}"
+        fi
+    fi
+}
+
+create_env_file() {
+    if [ -f ".env" ]; then
+        echo -e "${GREEN}[OK] .env exists${NC}"
+        return
+    fi
+    
+    if [ ! -f ".env.example" ]; then
+        echo -e "${RED}ERROR: .env.example not found${NC}"
+        exit 1
+    fi
+    
+    echo -e "${CYAN}Creating .env from template...${NC}"
+    cp .env.example .env
+    
+    # Keep service names for docker-compose (containers communicate via service names)
+    # No transformation needed - .env.example already has correct service names
+    
+    echo -e "${GREEN}[OK] .env created${NC}"
+}
+
+show_summary() {
+    echo -e "\n${CYAN}=================================================================="
+    echo -e "  Environment Ready!"
+    echo -e "==================================================================${NC}"
+    echo ""
+    echo -e "${GREEN}✓ Services Running:${NC}"
+    echo -e "  PostgreSQL:  localhost:$POSTGRES_PORT (with pgvector)"
+    echo -e "  RabbitMQ:    localhost:$RABBITMQ_PORT"
+    echo -e "  RabbitMQ UI: http://localhost:$RABBITMQ_MGMT_PORT ($RABBITMQ_USER/$RABBITMQ_PASS)"
+    
+    if [ "$START_API" -eq 1 ]; then
+        echo -e "  API:         http://localhost:8000"
+        echo -e "  API Docs:    http://localhost:8000/docs"
+    fi
+    
+    echo ""
+    echo -e "${CYAN}💡 Quick Start:${NC}"
+    echo -e "  ${YELLOW}./start-dev-env.sh --full-setup${NC}       ${GRAY}# Development mode (default)${NC}"
+    echo -e "  ${YELLOW}./start-dev-env.sh --with-api${NC}         ${GRAY}# Include API container${NC}"
+    echo -e "  ${YELLOW}./start-dev-env.sh --prod${NC}              ${GRAY}# Production mode${NC}"
+    echo ""
+    echo -e "${CYAN}📋 Manual Setup:${NC}"
+    echo -e "  1. ${YELLOW}${PY} -m venv venv${NC}"
+
+    case "${OS_TYPE}" in
+        windows-msys) echo -e "     ${YELLOW}source venv/Scripts/activate${NC}" ;;
+        *) echo -e "     ${YELLOW}source venv/bin/activate${NC}" ;;
+    esac
+
+    echo -e "  2. ${YELLOW}${PY} -m pip install -r requirements.txt${NC}"
+    echo -e "  3. ${YELLOW}${PY} app.py${NC}"
+    echo ""
+    echo -e "${CYAN}🔧 Container Commands:${NC}"
+    echo -e "  ${GRAY}Logs:   $COMPOSE_CMD -f $COMPOSE_FILE logs -f${NC}"
+    echo -e "  ${GRAY}Stop:   $COMPOSE_CMD -f $COMPOSE_FILE stop${NC}"
+    echo -e "  ${GRAY}Remove: $COMPOSE_CMD -f $COMPOSE_FILE down${NC}"
+    echo ""
+    echo -e "${CYAN}==================================================================${NC}"
+}
+
+main() {
+    create_env_file
+    stop_existing_containers
+    start_containers
+    wait_for_postgres
+    wait_for_rabbitmq
+    wait_for_api
+    initialize_database
+    show_summary
+    echo -e "\n${GRAY}Containers running in background. Press Ctrl+C to exit this script.${NC}"
+}
+
+setup_python_environment() {
+    echo -e "\n${CYAN}=================================================================="
+    echo -e "  Full Setup: Python Environment"
+    echo -e "==================================================================${NC}"
+    
+    if [ -d "venv" ]; then
+        echo -e "${GRAY}Virtual environment exists${NC}"
+    else
+        echo -e "${CYAN}Creating virtual environment...${NC}"
+        $PY -m venv venv
+        echo -e "${GREEN}[OK] venv created${NC}"
+    fi
+    
+    echo -e "${CYAN}Activating virtual environment...${NC}"
+    if [ "${OS_TYPE}" = "windows-msys" ]; then
+        source venv/Scripts/activate
+    else
+        source venv/bin/activate
+    fi
+    
+    echo -e "${CYAN}Installing dependencies (5-10 minutes)...${NC}"
+    python -m pip install --upgrade pip setuptools wheel
+    python -m pip install -r requirements.txt
+    echo -e "${GREEN}[OK] Dependencies installed${NC}"
+    
+    echo -e "${CYAN}Creating directories...${NC}"
+    mkdir -p data/reference_images data/models/clip logs
+    echo -e "${GREEN}[OK] Directories created${NC}"
+    
+    # Download CLIP model using curl
+    echo -e "${CYAN}Checking CLIP model...${NC}"
+    if [ ! -f "data/models/clip/open_clip_pytorch_model.bin" ]; then
+        echo -e "${YELLOW}Downloading CLIP model (this may take a while)...${NC}"
+        curl -L -o data/models/clip/open_clip_pytorch_model.bin \
+            ${CLIP_MODEL_URL} || {
+            echo -e "${YELLOW}WARNING: CLIP model download failed, will download on first run${NC}"
+        }
+        
+        if [ -f "data/models/clip/open_clip_pytorch_model.bin" ]; then
+            echo -e "${GREEN}[OK] CLIP model downloaded${NC}"
+        fi
+    else
+        echo -e "${GRAY}CLIP model already exists${NC}"
+    fi
+    
+    echo -e "${CYAN}Verifying environment...${NC}"
+    python -c "
+import open_clip, asyncpg, aio_pika, PIL, imagehash
+print('✓ All imports successful')
+" || {
+        echo -e "${RED}ERROR: Environment verification failed${NC}"
+        exit 1
+    }
+    
+    echo -e "\n${CYAN}=================================================================="
+    echo -e "  Setup Complete!"
+    echo -e "==================================================================${NC}"
+    echo -e "\n${GREEN}✓ Next Steps:${NC}"
+    echo -e "\n${CYAN}Terminal 1 - Worker:${NC}"
+    [ "${OS_TYPE}" = "windows-msys" ] && echo -e "  ${YELLOW}source venv/Scripts/activate${NC}" || echo -e "  ${YELLOW}source venv/bin/activate${NC}"
+    echo -e "  ${YELLOW}python app.py${NC}"
+    echo -e "\n${CYAN}Terminal 2 - API:${NC}"
+    [ "${OS_TYPE}" = "windows-msys" ] && echo -e "  ${YELLOW}source venv/Scripts/activate${NC}" || echo -e "  ${YELLOW}source venv/bin/activate${NC}"
+    echo -e "  ${YELLOW}cd api && uvicorn api:app --reload --host 0.0.0.0 --port 8000${NC}"
+    echo -e "\n${CYAN}API Docs:${NC} ${YELLOW}http://localhost:8000/docs${NC}"
+    echo ""
+}
+
+main
+
+if [ "$FULL_SETUP" -eq 1 ]; then
+    setup_python_environment
+fi

From e87d4fb7c418b29ec4fbffeea5385b307c795bd0 Mon Sep 17 00:00:00 2001
From: Manu <manu.a@tap>
Date: Fri, 2 Jan 2026 10:28:09 +0530
Subject: [PATCH 2/7] plg check against reference images

---
 .gitignore                           |   2 +
 config/config.py                     |   2 +-
 database/init.sql                    |   4 +-
 image_worker/assigment_ref_images.py | 347 +++++++++++++++++++++++++++
 image_worker/worker.py               |  87 ++++++-
 processors/image_processor.py        |   3 +
 6 files changed, 438 insertions(+), 7 deletions(-)
 create mode 100644 image_worker/assigment_ref_images.py

diff --git a/.gitignore b/.gitignore
index 3e1e6d7..897c762 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
+scratch*
+
 # Python
 __pycache__/
 *.py[cod]
diff --git a/config/config.py b/config/config.py
index a9d9572..63060ff 100644
--- a/config/config.py
+++ b/config/config.py
@@ -93,7 +93,7 @@ class DetectionConfig(BaseSettings):
 
     exact_dup_threshold: float = Field(default=0.95, env="EXACT_DUPLICATE_THRESHOLD")
     near_dup_threshold: float = Field(default=0.90, env="NEAR_DUPLICATE_THRESHOLD")
-    semantic_threshold: float = Field(default=0.80, env="SEMANTIC_MATCH_THRESHOLD")
+    semantic_threshold: float = Field(default=0.70, env="SEMANTIC_MATCH_THRESHOLD")
 
     # Hash matching thresholds (Hamming distance, 0-64 bits)
     hash_threshold: int = Field(default=8, env="HASH_MATCH_THRESHOLD")
diff --git a/database/init.sql b/database/init.sql
index 0448456..265a61b 100644
--- a/database/init.sql
+++ b/database/init.sql
@@ -72,12 +72,12 @@ USING hnsw (clip_embedding vector_ip_ops);
 -- Reference images corpus
 CREATE TABLE IF NOT EXISTS reference_images (
     id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    reference_id VARCHAR(100) UNIQUE NOT NULL,
+    reference_id VARCHAR(200) UNIQUE NOT NULL, -- will be used as assignment id when fetching references from assignments
     image_path TEXT NOT NULL,
     phash VARCHAR(64) NOT NULL,
     dhash VARCHAR(64) NOT NULL,
     ahash VARCHAR(64) NOT NULL,
-    category VARCHAR(100),
+    category VARCHAR(200),
     description TEXT,
     source VARCHAR(200),
     faiss_index_position INTEGER,
diff --git a/image_worker/assigment_ref_images.py b/image_worker/assigment_ref_images.py
new file mode 100644
index 0000000..10617f4
--- /dev/null
+++ b/image_worker/assigment_ref_images.py
@@ -0,0 +1,347 @@
+import base64
+import io
+import os
+from PIL import Image
+import requests
+from typing import List, Dict, Optional
+from dotenv import load_dotenv
+import asyncpg
+from datetime import datetime
+import logging
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+# Environment variables
+ASSIGNMENT_CACHE_DAYS = int(os.getenv("ASSIGNMENT_CACHE_DAYS", "2"))
+ENABLE_CACHE = os.getenv("ENABLE_CACHE", "true").lower() == "true"
+PURGE_CACHE = os.getenv("PURGE_CACHE", "false").lower() == "true"
+
+# Database configuration
+DB_CONFIG = {
+    "host": os.getenv("POSTGRES_HOST", "localhost"),
+    "port": int(os.getenv("POSTGRES_PORT", 5432)),
+    "database": os.getenv("POSTGRES_DB", "plagiarism_db"),
+    "user": os.getenv("POSTGRES_USER", "postgres"),
+    "password": os.getenv("POSTGRES_PASSWORD", "postgres"),
+}
+
+
+async def get_db_connection():
+    """Create async database connection."""
+    conn_string = (
+        f"postgresql://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
+        f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
+    )
+    return await asyncpg.connect(conn_string)
+
+
+async def cleanup_cache():
+    """Delete cached assignment reference images older than ASSIGNMENT_CACHE_DAYS."""
+    try:
+        conn = await get_db_connection()
+        try:
+            # cutoff_date = datetime.utcnow() - timedelta(days=ASSIGNMENT_CACHE_DAYS)
+            
+            # Delete old assignment caches (reference_ids starting with "ASSIGN-")
+            result = await conn.execute(
+                """
+                DELETE FROM reference_images 
+                WHERE reference_id LIKE 'ASSIGN-%' 
+                """
+            )
+            
+            deleted_count = int(result.split()[-1]) if result else 0
+            if deleted_count > 0:
+                logger.info(f"Cleaned up {deleted_count} cached assignment reference images")
+            
+        finally:
+            await conn.close()
+            
+    except Exception as e:
+        logger.error(f"Cache cleanup failed: {e}")
+
+
+async def get_cached_assignment(assignment_id: str) -> Optional[List[Dict]]:
+    """
+    Retrieve cached reference images for an assignment from database.
+    
+    Args:
+        assignment_id: Assignment identifier
+        
+    Returns:
+        List of reference image dicts with precomputed hashes (no embeddings), or None if not cached
+    """
+    try:
+        conn = await get_db_connection()
+        try:
+            # Query for cached images with this assignment_id (no clip_embedding in SELECT)
+            rows = await conn.fetch(
+                """
+                SELECT reference_id, image_path, phash, dhash, ahash, created_at
+                FROM reference_images
+                WHERE reference_id LIKE $1
+                ORDER BY reference_id
+                """,
+                f"ASSIGN-{assignment_id}-%"
+            )
+            
+            if not rows:
+                logger.info(f"No cached reference images found for assignment: {assignment_id}")
+                return None
+            
+            # Check if cache is still valid
+            oldest_created = min(row['created_at'] for row in rows)
+            age_days = (datetime.utcnow() - oldest_created).days
+            
+            if age_days > ASSIGNMENT_CACHE_DAYS:
+                logger.info(f"Cache expired for assignment {assignment_id} (age: {age_days} days)")
+                return None
+            
+            logger.info(f"Retrieved {len(rows)} cached reference images for assignment: {assignment_id}")
+            
+            # Return just the hashes and name (no embeddings)
+            images = []
+            for row in rows:
+                images.append({
+                    "name": row['reference_id'] + row['image_path'],  # Image name stored in image_path
+                    "phash": row['phash'],
+                    "dhash": row['dhash'],
+                    "ahash": row['ahash']
+                })
+            
+            return images
+            
+        finally:
+            await conn.close()
+            
+    except Exception as e:
+        logger.error(f"Failed to retrieve cached assignment: {e}")
+        return None
+
+
+async def save_to_cache(assignment_id: str, images: List[Dict]):
+    """
+    Save assignment reference images (hashes + embeddings) to database cache.
+    
+    Args:
+        assignment_id: Assignment identifier
+        images: List of image dicts with "name", "phash", "dhash", "ahash", and optionally "embedding"
+    """
+    try:
+        conn = await get_db_connection()
+        try:
+            cached_count = 0
+            
+            for idx, img_data in enumerate(images):
+                try:
+                    # Create unique reference_id for this assignment's reference image
+                    reference_id = f"ASSIGN-{assignment_id}-{idx:03d}"
+                    image_name = img_data.get("name", f"ref_{idx}")
+                    
+                    # Check if hashes are precomputed
+                    if "phash" not in img_data or "dhash" not in img_data or "ahash" not in img_data:
+                        logger.warning(f"Hashes not precomputed for image {idx}, skipping cache")
+                        continue
+                    
+                    # Extract embedding if present
+                    embedding = img_data.get("embedding")
+                    
+                    if embedding is not None:
+                        # Convert numpy array to pgvector format
+                        embedding_str = '[' + ','.join(map(str, embedding.tolist())) + ']'
+                        clip_generated = True
+                    else:
+                        embedding_str = None
+                        clip_generated = False
+                    
+                    # Insert into database with embedding
+                    await conn.execute(
+                        """
+                        INSERT INTO reference_images 
+                        (reference_id, image_path, phash, dhash, ahash, category, description, source,
+                         clip_embedding_generated, clip_embedding)
+                        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10::vector)
+                        ON CONFLICT (reference_id) DO UPDATE SET
+                            image_path = EXCLUDED.image_path,
+                            phash = EXCLUDED.phash,
+                            dhash = EXCLUDED.dhash,
+                            ahash = EXCLUDED.ahash,
+                            clip_embedding_generated = EXCLUDED.clip_embedding_generated,
+                            clip_embedding = EXCLUDED.clip_embedding,
+                            updated_at = NOW()
+                        """,
+                        reference_id,
+                        image_name,
+                        img_data["phash"],
+                        img_data["dhash"],
+                        img_data["ahash"],
+                        "assignment_cache",
+                        f"Reference image from {image_name}",
+                        f"assignment_{assignment_id}",
+                        clip_generated,
+                        embedding_str
+                    )
+                    
+                    cached_count += 1
+                    
+                except Exception as img_err:
+                    logger.error(f"Failed to cache image {idx} for assignment {assignment_id}: {img_err}")
+                    continue
+            
+            logger.info(f"Cached {cached_count}/{len(images)} reference images for assignment: {assignment_id}")
+            
+        finally:
+            await conn.close()
+            
+    except Exception as e:
+        logger.error(f"Failed to save to cache: {e}")
+
+
+async def get_reference_images(
+    assignment_id: str,
+    clip_handler=None,
+    hash_handler=None
+) -> Optional[List[Dict]]:
+    """
+    Fetch reference images for an assignment from TAP LMS API with caching support.
+    
+    Args:
+        assignment_id: Assignment identifier
+        clip_handler: CLIPHandler instance from worker (optional)
+        hash_handler: HashHandler instance from worker (optional)
+
+    Returns:
+        List of reference image dictionaries with precomputed hashes (no embeddings in return)
+    """
+    try:
+        # Cleanup old cache if enabled
+        if PURGE_CACHE:
+            await cleanup_cache()
+        
+        # Check cache first if enabled
+        if ENABLE_CACHE:
+            cached_images = await get_cached_assignment(assignment_id)
+            if cached_images is not None:
+                logger.info(f"Using cached reference images for assignment: {assignment_id}")
+                return cached_images
+        
+        # Fetch from API
+        logger.info(f"Fetching reference images from API for assignment: {assignment_id}")
+        images = await fetch_from_api(assignment_id, clip_handler, hash_handler)
+        
+        # Save to cache if enabled (embeddings will be saved to DB but not returned)
+        if ENABLE_CACHE and images:
+            await save_to_cache(assignment_id, images)
+        
+        # Remove embeddings from return object
+        if images:
+            for img in images:
+                img.pop("embedding", None)
+        
+        return images
+        
+    except Exception as e:
+        logger.error(f"Error fetching reference images: {e}")
+        return None
+
+
+async def fetch_from_api(
+    assignment_id: str,
+    clip_handler=None,
+    hash_handler=None
+) -> Optional[List[Dict]]:
+    """
+    Fetch reference images for an assignment from TAP LMS API and compute hashes + embeddings.
+    
+    Args:
+        assignment_id: Assignment identifier
+        clip_handler: CLIPHandler instance from worker (optional)
+        hash_handler: HashHandler instance from worker (optional)
+        
+    Returns:
+        List of reference image dictionaries with precomputed hashes and embeddings
+    """
+    api_key = os.getenv("FRAPPE_API_KEY")
+    api_secret = os.getenv("FRAPPE_API_SECRET")
+    base_url = os.getenv("FRAPPE_API_BASE_URL")
+    
+    if not all([api_key, api_secret, base_url]):
+        logger.error("Missing API configuration: FRAPPE_API_KEY, FRAPPE_API_SECRET, or FRAPPE_API_BASE_URL")
+        return None
+    
+    if hash_handler is None:
+        logger.error("No hash_handler provided, cannot compute hashes")
+        return None
+    
+    assignment_context_endpoint = "api/method/tap_lms.imgana.submission.get_assignment_context"
+    
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"token {api_key}:{api_secret}"
+    }
+
+    api_url = f"{base_url.rstrip('/')}/{assignment_context_endpoint.lstrip('/')}"
+    
+    try:
+        # Use synchronous requests in async context (consider aiohttp for true async)
+        response = requests.post(
+            api_url,
+            headers=headers,
+            json={"assignment_id": assignment_id},
+            timeout=30
+        )
+        
+        response.raise_for_status()
+        data = response.json()
+        
+        reference_images = data.get("message", {}).get("assignment", {}).get("reference_images", [])
+        
+        if not reference_images:
+            logger.warning(f"No reference images found for assignment: {assignment_id}")
+            return []
+        
+        # Process images: decode, compute hashes and embeddings, then discard PIL objects
+        processed_images = []
+        for image in reference_images:
+            try:
+                decoded_bytes = base64.b64decode(image["content"])
+                image_obj = Image.open(io.BytesIO(decoded_bytes))
+                
+                # Compute hashes using passed handler
+                hashes = hash_handler.compute_hashes(image_obj)
+                
+                # Generate CLIP embedding if handler provided
+                embedding = None
+                if clip_handler is not None:
+                    try:
+                        embedding = clip_handler.generate_embedding(image_obj)
+                    except Exception as embed_err:
+                        logger.error(f"Failed to generate embedding for {image.get('name', 'unknown')}: {embed_err}")
+                
+                # Close PIL Image - we don't need it anymore
+                image_obj.close()
+                
+                # Store hashes and embedding (embedding will be saved to DB but removed before return)
+                processed_images.append({
+                    "name": image.get("name", f"ref_{len(processed_images)}"),
+                    "phash": hashes["phash"],
+                    "dhash": hashes["dhash"],
+                    "ahash": hashes["ahash"],
+                    "embedding": embedding  # Temporary, for save_to_cache
+                })
+                
+            except Exception as img_err:
+                logger.error(f"Failed to process image {image.get('name', 'unknown')}: {img_err}")
+                continue
+        
+        logger.info(f"Fetched and processed {len(processed_images)} reference images from API for assignment: {assignment_id}")
+        return processed_images
+        
+    except requests.exceptions.RequestException as e:
+        logger.error(f"API request failed for assignment {assignment_id}: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Failed to fetch from API: {e}")
+        return None
\ No newline at end of file
diff --git a/image_worker/worker.py b/image_worker/worker.py
index 0798bef..012d2e9 100644
--- a/image_worker/worker.py
+++ b/image_worker/worker.py
@@ -11,6 +11,7 @@
 from typing import Dict, Optional, Tuple, Any
 from urllib.parse import urlparse
 from dotenv import load_dotenv
+from image_worker.assigment_ref_images import get_reference_images
 
 from config.config import config
 from database.db_manager import DatabaseManager
@@ -291,7 +292,75 @@ async def _async_compare_self(self, hashes, prev):
         )
         return comparison, prev
 
-    async def check_hash_match(
+    async def check_assignment_reference_hash_match(
+        self, hashes: dict, assignment_id: str
+    ) -> Tuple[bool, Optional[str], Optional[float], Optional[str]]:
+        """
+        Check if submission matches any reference image via perceptual hash comparison.
+
+        Uses three hash types (pHash, dHash, aHash) for robust duplicate detection.
+
+        Args:
+            hashes: Dict containing 'phash', 'dhash', 'ahash' hex strings
+
+        Returns:
+            Tuple of (is_match, reference_id, similarity_score, image_url)
+            - is_match: True if hash match found
+            - reference_id: UUID of matched reference (or None)
+            - similarity_score: 0.0-1.0 similarity score (or None)
+            - image_url: URL of matched reference image (or None)
+
+        Raises:
+            Exception: If database query fails
+        """
+        try:
+
+            references = await get_reference_images(assignment_id, self.clip_handler, self.hash_handler)
+            if not references:
+                return False, None, None, None
+            
+            # for ref_image in references:
+            #     if ref_image["content"] is not None:
+            #         hashes = self.hash_handler.compute_hashes(ref_image["content"])
+            #         ref_image['phash'] = hashes['phash']
+            #         ref_image['dhash'] = hashes['dhash']
+            #         ref_image['ahash'] = hashes['ahash']
+
+            tasks = [self._async_compare_ref(hashes, ref) for ref in references]
+            results = await asyncio.gather(*tasks)
+
+
+            best_match = None
+            best_score = 999
+            best_comparison = None
+            for comparison, ref in results:
+                if comparison["is_match"] and comparison["avg_distance"] < best_score:
+                    best_score = comparison["avg_distance"]
+                    best_match = ref
+                    best_comparison = comparison
+
+            if best_match and best_comparison:
+                print("#"*70)
+                print("Best match found:",best_match["name"])
+                print("#"*70)
+                logger.info("Assignment reference match found")
+                similarity = 1 - (best_score / 64.0)
+                return (
+                    True,
+                    str(best_match["name"]),
+                    similarity,
+                    str(best_match["name"]),
+                )
+            else:
+                logger.info("No assignment reference match found")
+                return False, None, None, None
+
+        except Exception as e:
+            logger.error(f"Hash check failed: {e}", exc_info=True)
+            raise
+
+
+    async def check_db_reference_hash_match(
         self, hashes: dict
     ) -> Tuple[bool, Optional[str], Optional[float], Optional[str]]:
         """
@@ -375,6 +444,11 @@ async def check_clip_match(
 
             if not results:
                 return None, 0.0, None
+            
+            print("#"*70)
+            for ref_id, sim, meta in results:
+                print(f"  Ref ID: {ref_id}, Similarity: {sim:.4f}, Meta: {meta}")
+            print("#"*70)
 
             matches = [
                 (ref_id, sim, meta)
@@ -540,7 +614,8 @@ async def process_submission(self, data: Dict[str, Any]) -> Optional[str]:
                 student_id,
                 self_result.get("first_submission_date_for_image", None),
             )
-            hash_check_result = await self.check_hash_match(hashes)
+            # hash_check_result = await self.check_db_reference_hash_match(hashes)
+            hash_check_result = await self.check_assignment_reference_hash_match(hashes,assign_id)
 
             (
                 hash_match,
@@ -1020,6 +1095,9 @@ def _create_stock_image_result(
             "student_id": student_id,
             "assignment_id": assign_id,
             "image_url": image_url,
+            "is_ai_generated": False,
+            "ai_detection_source": "None",
+            "ai_confidence": 0.0,
             "is_plagiarized": True,
             "similarity_score": 1.0,
             "match_type": "stock_image",
@@ -1290,8 +1368,9 @@ def format_results(
                 )
                 message["ai_confidence"] = plagiarism_status.get("ai_confidence", 0.0)
 
-            payload_preview = json.dumps(message, indent=2)[:2000]
-            logger.info(f"Result payload preview (2000 chars):\n{payload_preview}...")
+            # payload_preview = json.dumps(message, indent=2)[:2000]
+            # logger.info(f"Result payload preview (2000 chars):\n{payload_preview}...")
+            
 
             return json.dumps(message)
 
diff --git a/processors/image_processor.py b/processors/image_processor.py
index 653a66c..02f0035 100644
--- a/processors/image_processor.py
+++ b/processors/image_processor.py
@@ -65,6 +65,9 @@ async def process(self, data: dict) -> dict:
                     )
                     return {"error": "Invalid JSON response from worker"}
 
+            logger.info("Result payload ")
+            logger.info(json.dumps(result, indent=2))
+
             logger.info(
                 f"Successfully processed submission: {data.get('submission_id')}"
             )

From 2b7ca67e60b9a3fd58d60a4f4a61066d8237ff15 Mon Sep 17 00:00:00 2001
From: manua-glitch <manu.a@theapprenticeproject.org>
Date: Fri, 2 Jan 2026 10:32:25 +0530
Subject: [PATCH 3/7] Delete .env.prod

---
 .env.prod | 119 ------------------------------------------------------
 1 file changed, 119 deletions(-)
 delete mode 100644 .env.prod

diff --git a/.env.prod b/.env.prod
deleted file mode 100644
index 101b48d..0000000
--- a/.env.prod
+++ /dev/null
@@ -1,119 +0,0 @@
-# RABBITMQ CONFIGURATION
-RABBITMQ_HOST=armadillo.rmq.cloudamqp.com
-RABBITMQ_PORT=5672
-RABBITMQ_USER=fzdqidte
-RABBITMQ_PASS=0SMrDogBVcWUcu9brWwp2QhET_kArl59
-RABBITMQ_VHOST=fzdqidte
-RABBITMQ_MANAGEMENT_PORT=15672
-RABBITMQ_PREFETCH_COUNT=1
-
-# Message retry configuration
-# Maximum number of retries before sending to DLQ (prevents poison messages)
-MAX_RETRIES=3
-
-# Queue Names
-SUBMISSION_QUEUE=plagiarism_submissions
-FEEDBACK_QUEUE=plagiarism_feedback
-# Dead Letter Queue (optional - leave empty to disable)
-DEAD_LETTER_QUEUE=plagiarism_failed_submissions
-
-# POSTGRESQL CONFIGURATION
-POSTGRES_HOST=db.example.com
-POSTGRES_PORT=5432
-POSTGRES_DB=plagiarism_db
-POSTGRES_USER=postgres
-POSTGRES_PASSWORD=postgres
-
-# PGADMIN CONFIGURATION (Optional - for development only)
-PGADMIN_EMAIL=admin@admin.com
-PGADMIN_PASSWORD=admin123
-
-# Connection Pool
-POSTGRES_POOL_SIZE=10
-POSTGRES_MAX_OVERFLOW=20
-
-# PLAGIARISM DETECTION THRESHOLDS
-EXACT_DUPLICATE_THRESHOLD=0.95
-NEAR_DUPLICATE_THRESHOLD=0.90
-SEMANTIC_MATCH_THRESHOLD=0.80
-
-# ==== PRODUCTION: Uncomment below for 7-day window ====
-RESUBMISSION_WINDOW_DAYS=14
-
-# Hash comparison threshold (Hamming distance)
-HASH_MATCH_THRESHOLD=10
-
-# IMAGE PROCESSING
-# Maximum image size in MB
-MAX_IMAGE_SIZE_MB=10
-
-# Image download timeout in seconds
-IMAGE_DOWNLOAD_TIMEOUT=30
-
-# Image validation thresholds
-# Min variance to detect blank images (lower = more strict)
-IMAGE_MIN_VARIANCE=5.0
-# Min unique colors required
-IMAGE_MIN_UNIQUE_COLORS=10
-# Max ratio of dominant color (higher = more permissive)
-IMAGE_MAX_SOLID_COLOR_RATIO=0.95
-
-# CLIP Model Configuration
-CLIP_MODEL=ViT-L/14
-CLIP_DEVICE=cpu
-CLIP_PRETRAINED=laion2B-s32B-b82K
-
-# Local Model Path (Optional - use pre-downloaded models)
-# If set, the system will load the model from this path instead of downloading from HuggingFace
-# Example: CLIP_LOCAL_MODEL_PATH=./models/clip/open_clip_pytorch_model.bin
-# Download models from: https://huggingface.co/laion/CLIP-ViT-L-14-laion2B-s32B-b82K
-CLIP_LOCAL_MODEL_PATH=/app/models/clip/open_clip_pytorch_model.bin
-
-# Disable SSL verification for HuggingFace downloads (for corporate proxy/self-signed certs)
-# Set to "true" only if you encounter SSL certificate errors
-DISABLE_SSL_VERIFY=true
-PYTHONHTTPSVERIFY=0
-
-# VECTOR SEARCH CONFIGURATION
-# Use pgvector (PostgreSQL) or FAISS for vector similarity search
-USE_PGVECTOR=true
-
-# FAISS Configuration
-FAISS_INDEX_PATH=/app/data/faiss_index.bin
-FAISS_METADATA_PATH=/app/data/faiss_metadata.json
-FAISS_DIMENSION=768
-FAISS_TOP_K=4  # Number of top candidates to retrieve from FAISS search
-
-# STORAGE PATHS
-# Reference images directory
-REFERENCE_IMAGES_DIR=./data/reference_images
-
-# Temporary storage for downloaded submissions
-TEMP_IMAGES_DIR=./data/temp_images
-
-# Logs directory
-#LOGS_DIR=./logs
-
-# APPLICATION SETTINGS
-LOG_LEVEL=INFO
-
-# Worker concurrency (number of threads)
-#WORKER_THREADS=4
-
-# Enable performance metrics(ignore)
-#ENABLE_METRICS=true
-
-# DEVELOPMENT SETTINGS
-# Set to "development" or "production"
-#ENVIRONMENT=development
-
-#DEBUG=true
-
-
-
-# Mock Glific API (for testing without WhatsApp)
-#Used to skip WhatsApp delivery in testing mode
-MOCK_GLIFIC=true
-# ==== TESTING: 2-minute resubmission window (comment out for production) ====
-RESUBMISSION_WINDOW_MINUTES=2
-

From 598574cf710e6fca8978318d6d914f99ec6092f5 Mon Sep 17 00:00:00 2001
From: Manu <manu.a@tap>
Date: Fri, 2 Jan 2026 11:11:54 +0530
Subject: [PATCH 4/7] podman-compose changes

---
 docker-compose-prod.yml |  57 ++++-
 docs/DOCUMENTATION.md   |   6 +-
 requirements.txt        |  61 +++++-
 start-dev-env.ps1       |   4 +-
 start-dev-env.sh        |   4 +-
 start-prod-env.sh       | 445 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 554 insertions(+), 23 deletions(-)
 create mode 100755 start-prod-env.sh

diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml
index 89a21cb..3e2e144 100644
--- a/docker-compose-prod.yml
+++ b/docker-compose-prod.yml
@@ -1,6 +1,42 @@
 version: '3.8'
 
 services:
+  # ===================================
+  # POSTGRESQL - Database
+  # ===================================
+  postgres:
+    image: pgvector/pgvector:pg16
+    container_name: plg-postgres
+    ports:
+      - "5432:5432"
+    environment:
+      POSTGRES_DB: ${POSTGRES_DB}
+      POSTGRES_USER: ${POSTGRES_USER}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
+      POSTGRES_INITDB_ARGS: "-E UTF8"
+      POSTGRES_MAX_CONNECTIONS: 20
+      PGDATA: /var/lib/postgresql/data/pgdata
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+      - ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER -d $$POSTGRES_DB"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    deploy:
+      resources:
+        limits:
+          cpus: '0.5'
+          memory: 512M
+        reservations:
+          cpus: '0.25'
+          memory: 256M
+    networks:
+      - plg-network
+    restart: unless-stopped
+
   # ===================================
   # PLAGIARISM CHECKER SERVICE
   # ===================================
@@ -8,32 +44,37 @@ services:
     build:
       context: .
       dockerfile: Dockerfile
-    container_name: mentorme-plagiarism-checker
+    container_name: plg-checker
     env_file:
       - .env
     volumes:
       - ./data:/app/data
       - ./logs:/app/logs
     depends_on:
-      rabbitmq:
-        condition: service_healthy
       postgres:
         condition: service_healthy
     deploy:
       resources:
         limits:
-          cpus: '1.0'
-          memory: 4G
+          cpus: '8.0'  # Increased from 4.0 - allows up to 8 CPU cores
+          memory: 8G
         reservations:
-          cpus: '0.5'
+          cpus: '2'    # Increased from 1 - guarantees 2 cores minimum
           memory: 2G
     restart: unless-stopped
     networks:
-      - mentorme-plagiarism-network
+      - plg-network
+
+# ===================================
+# VOLUMES
+# ===================================
+volumes:
+  postgres_data:
+    driver: local
 
 # ===================================
 # NETWORKS
 # ===================================
 networks:
-  mentorme-plagiarism-network:
+  plg-network:
     driver: bridge
diff --git a/docs/DOCUMENTATION.md b/docs/DOCUMENTATION.md
index 9bae9d4..9747212 100644
--- a/docs/DOCUMENTATION.md
+++ b/docs/DOCUMENTATION.md
@@ -776,11 +776,11 @@ asyncio.run(test())
 #### Build Docker Image
 ```bash
 # Standard build (model downloaded on first run)
-docker build -t mentorme-plagiarism:latest .
+docker build -t plg:latest .
 
 # With HuggingFace token for model prefetch during build (optional)
 # This pre-downloads the CLIP model into the Docker image
-docker build -t mentorme-plagiarism:latest \
+docker build -t plg:latest \
   --build-arg HUGGINGFACE_HUB_TOKEN=your_token_here .
 
 # Note: HuggingFace token is optional - public models can be downloaded without authentication
@@ -804,7 +804,7 @@ docker-compose down
 # docker-compose.yml snippet
 services:
   worker:
-    image: mentorme-plagiarism:latest
+    image: plg:latest
     environment:
       - POSTGRES_HOST=postgres
       - RABBITMQ_HOST=rabbitmq
diff --git a/requirements.txt b/requirements.txt
index 37d4505..4e590f1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,28 +8,73 @@ pgvector==0.4.1
 
 # HTTP Client
 aiohttp==3.13.2
+aiohappyeyeballs==2.6.1
+aiosignal==1.4.0
 requests==2.32.3
 
 # Configuration Management (Pydantic v2 compatible)
-pydantic<2.0.0,>=1.10.0
+pydantic==1.10.24
 python-dotenv==1.2.1
 
 # Image Processing
 pillow==12.0.0
-imagehash==4.3.2
+ImageHash==4.3.2
 
 # Machine Learning - CLIP & FAISS
 torch==2.9.0
 torchvision==0.24.0
-open-clip-torch==3.2.0
+open_clip_torch==3.2.0
 faiss-cpu==1.12.0
 numpy==2.3.4
+timm==1.0.22
+safetensors==0.7.0
+huggingface_hub==1.2.1
+filelock==3.20.0
+fsspec==2025.12.0
+ftfy==6.3.1
+regex==2025.11.3
+scipy==1.16.3
+mpmath==1.3.0
+sympy==1.14.0
+networkx==3.6
+PyWavelets==1.9.0
+
+# Web Framework - FastAPI
+fastapi==0.115.0
+uvicorn==0.34.0
+starlette==0.38.6
+anyio==4.12.0
+h11==0.16.0
+httptools==0.7.1
+uvloop==0.22.1
+watchfiles==1.1.1
+websockets==15.0.1
+click==8.3.1
+typer-slim==0.20.0
+shellingham==1.5.4
+httpcore==1.0.9
+httpx==0.28.1
+hf-xet==1.2.0
 
 # Utilities
 tqdm==4.67.1
-
 podman-compose==1.5.0
-
-# Web Framework - FastAPI
-fastapi==0.115.0
-uvicorn[standard]==0.34.0
\ No newline at end of file
+annotated-doc==0.0.4
+attrs==25.4.0
+certifi==2025.11.12
+charset-normalizer==3.4.4
+frozenlist==1.8.0
+idna==3.11
+Jinja2==3.1.6
+MarkupSafe==3.0.3
+multidict==6.7.0
+packaging==25.0
+pamqp==3.3.0
+propcache==0.4.1
+PyYAML==6.0.3
+setuptools==80.9.0
+typing_extensions==4.15.0
+urllib3==2.6.0
+wcwidth==0.2.14
+wheel==0.45.1
+yarl==1.22.0
diff --git a/start-dev-env.ps1 b/start-dev-env.ps1
index 2186b0d..d59d24d 100644
--- a/start-dev-env.ps1
+++ b/start-dev-env.ps1
@@ -80,8 +80,8 @@ if (Test-Path ".env") {
 }
 
 # Configuration (with defaults from environment or hardcoded)
-$POSTGRES_CONTAINER = "mentorme-plagiarism-postgres"
-$RABBITMQ_CONTAINER = "mentorme-plagiarism-rabbitmq"
+$POSTGRES_CONTAINER = "plg-postgres"
+$RABBITMQ_CONTAINER = "plg-rabbitmq"
 $POSTGRES_PORT = if ($env:POSTGRES_PORT) { $env:POSTGRES_PORT } else { 5432 }
 $RABBITMQ_PORT = if ($env:RABBITMQ_PORT) { $env:RABBITMQ_PORT } else { 5672 }
 $RABBITMQ_MGMT_PORT = if ($env:RABBITMQ_MANAGEMENT_PORT) { $env:RABBITMQ_MANAGEMENT_PORT } else { 15672 }
diff --git a/start-dev-env.sh b/start-dev-env.sh
index 15a9729..8878a03 100755
--- a/start-dev-env.sh
+++ b/start-dev-env.sh
@@ -123,8 +123,8 @@ if [ -f ".env" ]; then
 fi
 
 # Configuration (with defaults)
-POSTGRES_CONTAINER="mentorme-plagiarism-postgres"
-RABBITMQ_CONTAINER="mentorme-plagiarism-rabbitmq"
+POSTGRES_CONTAINER="plg-postgres"
+RABBITMQ_CONTAINER="plg-rabbitmq"
 POSTGRES_PORT="${POSTGRES_PORT:-5432}"
 RABBITMQ_PORT="${RABBITMQ_PORT:-5672}"
 RABBITMQ_MGMT_PORT="${RABBITMQ_MANAGEMENT_PORT:-15672}"
diff --git a/start-prod-env.sh b/start-prod-env.sh
new file mode 100755
index 0000000..8f92a20
--- /dev/null
+++ b/start-prod-env.sh
@@ -0,0 +1,445 @@
+#!/usr/bin/env bash
+# Local Development Startup Script
+# Starts PostgreSQL and RabbitMQ containers using docker-compose
+
+set -e
+FULL_SETUP=0
+START_API=0
+COMPOSE_FILE="docker-compose-prod.yml"
+
+while [[ "$#" -gt 0 ]]; do
+    case "$1" in
+        --full-setup) FULL_SETUP=1; shift ;;
+        --with-api) START_API=1; shift ;;
+        --prod) COMPOSE_FILE="docker-compose-prod.yml"; shift ;;
+        --dev) COMPOSE_FILE="docker-compose-dev.yml"; shift ;;
+        *) break ;;
+    esac
+done
+
+# Detect platform
+OS_TYPE="unknown"
+UNAME_OUT=$(uname -s 2>/dev/null || true)
+case "${UNAME_OUT}" in
+  MINGW*|MSYS*|CYGWIN*) OS_TYPE="windows-msys" ;;
+  Darwin) OS_TYPE="macos" ;;
+  Linux) OS_TYPE="linux" ;;
+  *) OS_TYPE="unix" ;;
+esac
+
+# Detect Python executable
+PY=""
+if [ "${OS_TYPE}" = "windows-msys" ]; then
+    PY_CANDIDATES=("py" "python3" "python" "python.exe")
+else
+    PY_CANDIDATES=("python3" "python" "py" "python.exe")
+fi
+
+for candidate in "${PY_CANDIDATES[@]}"; do
+    if [ "$candidate" = "py" ]; then
+        if command -v py >/dev/null 2>&1; then
+            if py -3 -c "import sys; sys.stdout.write('Python found!!\n')" 2>/dev/null; then
+                PY='py -3'
+                break
+            fi
+        fi
+        continue
+    fi
+
+    candidate_path=$(command -v "$candidate" 2>/dev/null || true)
+    if [ -n "$candidate_path" ]; then
+        case "$candidate_path" in
+            *WindowsApps*|*windowsapps*) continue ;;
+        esac
+
+        if "$candidate" -c "import sys; sys.stdout.write('ok')" 2>/dev/null; then
+            PY="$candidate"
+            break
+        fi
+    fi
+done
+
+if [ -z "$PY" ]; then
+    echo "ERROR: No Python 3.10+ found. Install Python or ensure it's in PATH." >&2
+    exit 1
+fi
+
+# Detect container runtime (Podman or Docker)
+CONTAINER_CMD=""
+COMPOSE_CMD=""
+if command -v podman &> /dev/null; then
+    CONTAINER_CMD="podman"
+    if command -v podman-compose &> /dev/null; then
+        COMPOSE_CMD="podman-compose"
+    else
+        echo "ERROR: podman-compose not found. Install it:" >&2
+        echo "  pip install podman-compose" >&2
+        exit 1
+    fi
+elif command -v docker &> /dev/null; then
+    CONTAINER_CMD="docker"
+    if command -v docker-compose &> /dev/null; then
+        COMPOSE_CMD="docker-compose"
+    elif docker compose version &> /dev/null; then
+        COMPOSE_CMD="docker compose"
+    else
+        echo "ERROR: docker-compose not found. Install it:" >&2
+        echo "  https://docs.docker.com/compose/install/" >&2
+        exit 1
+    fi
+else
+    echo "ERROR: Neither Podman nor Docker found. Install one of them:" >&2
+    echo "  Podman: https://podman.io/getting-started/installation" >&2
+    echo "  Docker: https://docs.docker.com/get-docker/" >&2
+    exit 1
+fi
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+GRAY='\033[0;37m'
+NC='\033[0m'
+
+echo -e "${CYAN}=================================================================="
+echo -e "  Plagiarism Checker - PROD Development Setup"
+echo -e "  Container Runtime: ${CONTAINER_CMD}"
+echo -e "  Compose File: ${COMPOSE_FILE}"
+echo -e "==================================================================${NC}"
+echo ""
+
+# Validate compose file exists
+if [ ! -f "$COMPOSE_FILE" ]; then
+    echo -e "${RED}ERROR: $COMPOSE_FILE not found${NC}"
+    exit 1
+fi
+
+# Load configuration from .env if it exists
+if [ -f ".env" ]; then
+    set -a
+    source <(grep -v '^#' .env | grep -v '^$' | sed 's/\r$//')
+    set +a
+fi
+
+# Configuration (with defaults)
+POSTGRES_CONTAINER="plg-postgres"
+POSTGRES_PORT="${POSTGRES_PORT:-5432}"
+POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-postgres}"
+POSTGRES_DB="${POSTGRES_DB:-plagiarism_db}"
+POSTGRES_USER="${POSTGRES_USER:-postgres}"
+CLIP_MODEL_URL="${CLIP_MODEL_URL:-https://huggingface.co/laion/CLIP-ViT-L-14-laion2B-s32B-b82K/resolve/main/open_clip_pytorch_model.bin}"
+
+stop_existing_containers() {
+    local containers_exist=false
+    
+    # Check if compose stack is running
+    if $COMPOSE_CMD -f $COMPOSE_FILE ps 2>/dev/null | grep -q "Up\|running"; then
+        containers_exist=true
+    fi
+    
+    if [ "$containers_exist" = true ]; then
+        echo -e "${YELLOW}Existing containers found:${NC}"
+        $COMPOSE_CMD -f $COMPOSE_FILE ps
+        echo ""
+        echo -e "${YELLOW}This will stop and remove existing containers.${NC}"
+        read -p "Continue? (y/N): " -n 1 -r
+        echo
+        
+        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+            echo -e "${RED}Aborted by user${NC}"
+            # exit 0
+        else
+            echo -e "${CYAN}Stopping existing containers...${NC}"
+            $COMPOSE_CMD -f $COMPOSE_FILE down
+        fi   
+    fi
+    
+    echo -e "${GREEN}[OK] Ready to start containers${NC}"
+}
+
+start_containers() {
+    echo -e "${CYAN}Starting containers with $COMPOSE_FILE...${NC}"
+    
+    # Determine which services to start
+    #local services="postgres rabbitmq pgadmin plagiarism-checker"
+    local services="postgres plagiarism-checker"
+    
+    if [ "$START_API" -eq 1 ]; then
+        services="$services api"
+        echo -e "${CYAN}Including API service${NC}"
+        
+        # Force rebuild API container to ensure it uses Dockerfile.api (not Dockerfile)
+        echo -e "${YELLOW}Rebuilding API container with Dockerfile.api...${NC}"
+        $COMPOSE_CMD -f $COMPOSE_FILE build --no-cache api
+        
+        if [ $? -ne 0 ]; then
+            echo -e "${RED}ERROR: Failed to build API container${NC}"
+            exit 1
+        fi
+        echo -e "${GREEN}[OK] API container rebuilt${NC}"
+    fi
+    
+    $COMPOSE_CMD -f $COMPOSE_FILE up -d $services
+    
+    if [ $? -ne 0 ]; then
+        echo -e "${RED}ERROR: Failed to start containers${NC}"
+        exit 1
+    fi
+    
+    echo -e "${GREEN}[OK] Containers started${NC}"
+}
+
+wait_for_postgres() {
+    echo -e "${YELLOW}Waiting for PostgreSQL...${NC}"
+    
+    local max_attempts=30
+    local attempt=0
+    
+    while [ $attempt -lt $max_attempts ]; do
+        attempt=$((attempt + 1))
+        
+        if $CONTAINER_CMD exec $POSTGRES_CONTAINER pg_isready -U $POSTGRES_USER &>/dev/null; then
+            echo -e "${GREEN}[OK] PostgreSQL ready${NC}"
+            return 0
+        fi
+        
+        sleep 2
+    done
+    
+    echo -e "${RED}ERROR: PostgreSQL timeout${NC}"
+    exit 1
+}
+
+
+wait_for_api() {
+    if [ "$START_API" -ne 1 ]; then
+        return 0
+    fi
+    
+    echo -e "${YELLOW}Waiting for API service...${NC}"
+    
+    local max_attempts=30
+    local attempt=0
+    
+    while [ $attempt -lt $max_attempts ]; do
+        attempt=$((attempt + 1))
+        
+        if curl -s -f http://localhost:8000/health &>/dev/null; then
+            echo -e "${GREEN}[OK] API service ready${NC}"
+            return 0
+        fi
+        
+        sleep 2
+    done
+    
+    echo -e "${RED}ERROR: API service timeout${NC}"
+    echo -e "${YELLOW}Checking API container logs:${NC}"
+    $COMPOSE_CMD -f $COMPOSE_FILE logs --tail=50 api
+    exit 1
+}
+
+initialize_database() {
+    if [ ! -f "database/init.sql" ]; then
+        echo -e "${YELLOW}WARNING: database/init.sql not found${NC}"
+        return
+    fi
+    
+    echo -e "${CYAN}Initializing database...${NC}"
+    
+    # Run init.sql
+    if $CONTAINER_CMD exec -i $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB < database/init.sql 2>/dev/null; then
+        echo -e "${GREEN}[OK] Database schema initialized${NC}"
+    else
+        echo -e "${GRAY}Database schema already exists${NC}"
+    fi
+    
+    # Create migrations tracking table if it doesn't exist
+    $CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -c "
+        CREATE TABLE IF NOT EXISTS schema_migrations (
+            id SERIAL PRIMARY KEY,
+            migration_name VARCHAR(255) UNIQUE NOT NULL,
+            applied_at TIMESTAMP DEFAULT NOW()
+        );
+    " 2>/dev/null
+    
+    # Run migration scripts
+    if [ -d "database/migrations" ]; then
+        local migration_count=0
+        for migration_file in database/migrations/*.sql; do
+            if [ -f "$migration_file" ]; then
+                local migration_name=$(basename "$migration_file")
+                
+                # Check if migration already applied
+                local already_applied=$($CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -t -c "
+                    SELECT COUNT(*) FROM schema_migrations WHERE migration_name = '$migration_name';
+                " 2>/dev/null | tr -d '[:space:]')
+                
+                if [ "$already_applied" = "0" ]; then
+                    echo -e "${CYAN}Applying migration: $migration_name${NC}"
+                    
+                    if $CONTAINER_CMD exec -i $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB < "$migration_file" 2>/dev/null; then
+                        # Record migration as applied
+                        $CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -c "
+                            INSERT INTO schema_migrations (migration_name) VALUES ('$migration_name');
+                        " 2>/dev/null
+                        echo -e "${GREEN}[OK] Applied: $migration_name${NC}"
+                        migration_count=$((migration_count + 1))
+                    else
+                        echo -e "${YELLOW}WARNING: Failed to apply $migration_name${NC}"
+                    fi
+                fi
+            fi
+        done
+        
+        if [ $migration_count -eq 0 ]; then
+            echo -e "${GRAY}All migrations already applied${NC}"
+        else
+            echo -e "${GREEN}[OK] Applied $migration_count migration(s)${NC}"
+        fi
+    fi
+}
+
+create_env_file() {
+    if [ -f ".env" ]; then
+        echo -e "${GREEN}[OK] .env exists${NC}"
+        return
+    fi
+    
+    if [ ! -f ".env.example" ]; then
+        echo -e "${RED}ERROR: .env.example not found${NC}"
+        exit 1
+    fi
+    
+    echo -e "${CYAN}Creating .env from template...${NC}"
+    cp .env.example .env
+    
+    # Keep service names for docker-compose (containers communicate via service names)
+    # No transformation needed - .env.example already has correct service names
+    
+    echo -e "${GREEN}[OK] .env created${NC}"
+}
+
+show_summary() {
+    echo -e "\n${CYAN}=================================================================="
+    echo -e "  Environment Ready!"
+    echo -e "==================================================================${NC}"
+    echo ""
+    echo -e "${GREEN}✓ Services Running:${NC}"
+    echo -e "  PostgreSQL:  localhost:$POSTGRES_PORT (with pgvector)"
+    
+    if [ "$START_API" -eq 1 ]; then
+        echo -e "  API:         http://localhost:8000"
+        echo -e "  API Docs:    http://localhost:8000/docs"
+    fi
+    
+    echo ""
+    echo -e "${CYAN}💡 Quick Start:${NC}"
+    echo -e "  ${YELLOW}./start-dev-env.sh --full-setup${NC}       ${GRAY}# Development mode (default)${NC}"
+    echo -e "  ${YELLOW}./start-dev-env.sh --with-api${NC}         ${GRAY}# Include API container${NC}"
+    echo -e "  ${YELLOW}./start-dev-env.sh --prod${NC}              ${GRAY}# Production mode${NC}"
+    echo ""
+    echo -e "${CYAN}📋 Manual Setup:${NC}"
+    echo -e "  1. ${YELLOW}${PY} -m venv venv${NC}"
+
+    case "${OS_TYPE}" in
+        windows-msys) echo -e "     ${YELLOW}source venv/Scripts/activate${NC}" ;;
+        *) echo -e "     ${YELLOW}source venv/bin/activate${NC}" ;;
+    esac
+
+    echo -e "  2. ${YELLOW}${PY} -m pip install -r requirements.txt${NC}"
+    echo -e "  3. ${YELLOW}${PY} app.py${NC}"
+    echo ""
+    echo -e "${CYAN}🔧 Container Commands:${NC}"
+    echo -e "  ${GRAY}Logs:   $COMPOSE_CMD -f $COMPOSE_FILE logs -f${NC}"
+    echo -e "  ${GRAY}Stop:   $COMPOSE_CMD -f $COMPOSE_FILE stop${NC}"
+    echo -e "  ${GRAY}Remove: $COMPOSE_CMD -f $COMPOSE_FILE down${NC}"
+    echo ""
+    echo -e "${CYAN}==================================================================${NC}"
+}
+
+main() {
+    create_env_file
+    stop_existing_containers
+    start_containers
+    wait_for_postgres
+    wait_for_rabbitmq
+    wait_for_api
+    initialize_database
+    show_summary
+    echo -e "\n${GRAY}Containers running in background. Press Ctrl+C to exit this script.${NC}"
+}
+
+setup_python_environment() {
+    echo -e "\n${CYAN}=================================================================="
+    echo -e "  Full Setup: Python Environment"
+    echo -e "==================================================================${NC}"
+    
+    if [ -d "venv" ]; then
+        echo -e "${GRAY}Virtual environment exists${NC}"
+    else
+        echo -e "${CYAN}Creating virtual environment...${NC}"
+        $PY -m venv venv
+        echo -e "${GREEN}[OK] venv created${NC}"
+    fi
+    
+    echo -e "${CYAN}Activating virtual environment...${NC}"
+    if [ "${OS_TYPE}" = "windows-msys" ]; then
+        source venv/Scripts/activate
+    else
+        source venv/bin/activate
+    fi
+    
+    echo -e "${CYAN}Installing dependencies (5-10 minutes)...${NC}"
+    python -m pip install --upgrade pip setuptools wheel
+    python -m pip install -r requirements.txt
+    echo -e "${GREEN}[OK] Dependencies installed${NC}"
+    
+    echo -e "${CYAN}Creating directories...${NC}"
+    mkdir -p data/reference_images data/models/clip logs
+    echo -e "${GREEN}[OK] Directories created${NC}"
+    
+    # Download CLIP model using curl
+    echo -e "${CYAN}Checking CLIP model...${NC}"
+    if [ ! -f "data/models/clip/open_clip_pytorch_model.bin" ]; then
+        echo -e "${YELLOW}Downloading CLIP model (this may take a while)...${NC}"
+        curl -L -o data/models/clip/open_clip_pytorch_model.bin \
+            ${CLIP_MODEL_URL} || {
+            echo -e "${YELLOW}WARNING: CLIP model download failed, will download on first run${NC}"
+        }
+        
+        if [ -f "data/models/clip/open_clip_pytorch_model.bin" ]; then
+            echo -e "${GREEN}[OK] CLIP model downloaded${NC}"
+        fi
+    else
+        echo -e "${GRAY}CLIP model already exists${NC}"
+    fi
+    
+    echo -e "${CYAN}Verifying environment...${NC}"
+    python -c "
+import open_clip, asyncpg, aio_pika, PIL, imagehash
+print('✓ All imports successful')
+" || {
+        echo -e "${RED}ERROR: Environment verification failed${NC}"
+        exit 1
+    }
+    
+    echo -e "\n${CYAN}=================================================================="
+    echo -e "  Setup Complete!"
+    echo -e "==================================================================${NC}"
+    echo -e "\n${GREEN}✓ Next Steps:${NC}"
+    echo -e "\n${CYAN}Terminal 1 - Worker:${NC}"
+    [ "${OS_TYPE}" = "windows-msys" ] && echo -e "  ${YELLOW}source venv/Scripts/activate${NC}" || echo -e "  ${YELLOW}source venv/bin/activate${NC}"
+    echo -e "  ${YELLOW}python app.py${NC}"
+    echo -e "\n${CYAN}Terminal 2 - API:${NC}"
+    [ "${OS_TYPE}" = "windows-msys" ] && echo -e "  ${YELLOW}source venv/Scripts/activate${NC}" || echo -e "  ${YELLOW}source venv/bin/activate${NC}"
+    echo -e "  ${YELLOW}cd api && uvicorn api:app --reload --host 0.0.0.0 --port 8000${NC}"
+    echo -e "\n${CYAN}API Docs:${NC} ${YELLOW}http://localhost:8000/docs${NC}"
+    echo ""
+}
+
+main
+
+if [ "$FULL_SETUP" -eq 1 ]; then
+    setup_python_environment
+fi

From 578cc5dcadc385596227e0316b5e1fad9a2ce3c0 Mon Sep 17 00:00:00 2001
From: Manu <manu.a@tap>
Date: Fri, 9 Jan 2026 19:45:45 +0530
Subject: [PATCH 5/7] Reference image

---
 .gitignore                  |   1 +
 Dockerfile                  |  21 +++----
 Dockerfile.api              |  20 +++---
 database/db_manager.py      |  10 +--
 docker-compose-prod.yml     |  29 +++++++++
 image_worker/worker.py      |  11 ++--
 mq/rmq_client.py            |   5 ++
 scripts/docker-postgres.yml |  53 ++++++++++++++++
 scripts/postgres_setup.sh   | 118 ++++++++++++++++++++++++++++++++++++
 start-prod-env.sh           |   5 +-
 10 files changed, 236 insertions(+), 37 deletions(-)
 create mode 100644 scripts/docker-postgres.yml
 create mode 100644 scripts/postgres_setup.sh

diff --git a/.gitignore b/.gitignore
index 897c762..cf0db6d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ build/
 # Environment variables
 .env
 .env.local
+.env.prod
 
 # Logs
 logs/
diff --git a/Dockerfile b/Dockerfile
index 5426394..37475e8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,6 +5,7 @@ WORKDIR /app
 # Install build dependencies in a single layer
 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
+    vim \
     gcc \
     g++ \
     git \
@@ -18,10 +19,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 # Copy only requirements first for better caching
 COPY requirements.txt .
 
-# Use pip cache and install in parallel
-RUN --mount=type=cache,target=/root/.cache/pip \
-    python -m pip install --upgrade pip setuptools wheel && \
-    pip install -r requirements.txt --user --no-warn-script-location
+# Install to explicit location
+RUN python -m pip install --no-cache-dir --prefix=/install --upgrade pip setuptools wheel && \
+    pip install --no-cache-dir --no-deps --prefix=/install -r requirements.txt
+
+RUN pip install --prefix=/install -r requirements.txt --no-cache-dir
 
 # ============================================
 # Final stage - minimal runtime image
@@ -37,18 +39,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     && rm -rf /var/lib/apt/lists/*
 
 # Copy installed packages from builder
-COPY --from=builder /root/.local /root/.local
-
-ENV PATH=/root/.local/bin:$PATH
+COPY --from=builder /install /usr/local
 
 # Create necessary directories
 RUN mkdir -p /app/data /app/logs /root/.cache/clip
 
+RUN ls
+
 # Copy application code (do this last for better caching)
 COPY . .
 
-# Lightweight healthcheck
-HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
-    CMD python -c "import sys; sys.exit(0)" || exit 1
 
-CMD ["python", "app.py"]
+CMD ["python", "app.py"]
\ No newline at end of file
diff --git a/Dockerfile.api b/Dockerfile.api
index 01fcc66..d0b5c03 100644
--- a/Dockerfile.api
+++ b/Dockerfile.api
@@ -1,4 +1,4 @@
-FROM python:3.13-slim
+FROM python:3.13-slim as builder
 
 WORKDIR /app
 
@@ -7,9 +7,18 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf
 
 # Copy requirements and install dependencies
 COPY api/requirements.txt /app/api/requirements.txt
-RUN --mount=type=cache,target=/root/.cache/pip \
-    python -m pip install --upgrade pip && \
-    pip install -r /app/api/requirements.txt
+RUN python -m pip install --prefix=/install -r --upgrade pip && \
+    pip install --prefix=/install -r /app/api/requirements.txt --no-cache-dir
+
+# ============================================
+# Final stage - minimal runtime image
+# ============================================
+FROM python:3.13-slim
+
+WORKDIR /app
+
+# Copy installed packages from builder
+COPY --from=builder /install /usr/local
 
 # Copy application code
 COPY api/ /app/api/
@@ -18,9 +27,6 @@ COPY utils/ /app/utils/
 # Expose API port
 EXPOSE 8000
 
-# Healthcheck (check if uvicorn is responding)
-HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
-    CMD curl -f http://localhost:8000/ || exit 1
 
 # Run the API
 CMD ["uvicorn", "api.api:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/database/db_manager.py b/database/db_manager.py
index 6db7936..d77b70a 100644
--- a/database/db_manager.py
+++ b/database/db_manager.py
@@ -69,14 +69,6 @@ async def init_pool(self):
         db_port = int(os.getenv("POSTGRES_PORT") or os.getenv("DB_PORT", "5432"))
         # db_port = 5435  # TEMP OVERRIDE FOR TESTING
 
-        #print the db connection details for debugging
-        # logger.info("###################")
-        # logger.info(f"DB Host: {db_host}")
-        # logger.info(f"DB Port: {db_port}")
-        # logger.info(f"DB Name: {db_name}")
-        # logger.info(f"DB User: {db_user}")
-        # logger.info(f"DB db_password: {db_password}")
-        # logger.info("###################")
 
 
         if not all([db_user, db_password, db_name]):
@@ -525,7 +517,7 @@ async def fetch_reference_images_by_id(self, reference_id):
             image_path = await self._fetch(
                 """
                     SELECT image_path 
-                    FROM reference_images where id = $1;
+                    FROM reference_images where reference_id = $1;
                     """,
                 reference_id,
             )
diff --git a/docker-compose-prod.yml b/docker-compose-prod.yml
index 3e2e144..0f28bd6 100644
--- a/docker-compose-prod.yml
+++ b/docker-compose-prod.yml
@@ -65,6 +65,35 @@ services:
     networks:
       - plg-network
 
+  # ===================================
+  # API SERVICE
+  # ===================================
+  api:
+    build:
+      context: .
+      dockerfile: Dockerfile.api
+    container_name: plg-api
+    env_file:
+      - .env
+    volumes:
+      - ./data:/app/data
+      - ./logs:/app/logs
+    depends_on:
+      postgres:
+        condition: service_healthy
+    deploy:
+      resources:
+        limits:
+          cpus: '8.0'  # Increased from 4.0 - allows up to 8 CPU cores
+          memory: 8G
+        reservations:
+          cpus: '2'    # Increased from 1 - guarantees 2 cores minimum
+          memory: 2G
+    restart: unless-stopped
+    networks:
+      - plg-network
+
+
 # ===================================
 # VOLUMES
 # ===================================
diff --git a/image_worker/worker.py b/image_worker/worker.py
index 012d2e9..a54c630 100644
--- a/image_worker/worker.py
+++ b/image_worker/worker.py
@@ -340,9 +340,6 @@ async def check_assignment_reference_hash_match(
                     best_comparison = comparison
 
             if best_match and best_comparison:
-                print("#"*70)
-                print("Best match found:",best_match["name"])
-                print("#"*70)
                 logger.info("Assignment reference match found")
                 similarity = 1 - (best_score / 64.0)
                 return (
@@ -445,10 +442,10 @@ async def check_clip_match(
             if not results:
                 return None, 0.0, None
             
-            print("#"*70)
-            for ref_id, sim, meta in results:
-                print(f"  Ref ID: {ref_id}, Similarity: {sim:.4f}, Meta: {meta}")
-            print("#"*70)
+            # print("#"*70)
+            # for ref_id, sim, meta in results:
+            #     print(f"  Ref ID: {ref_id}, Similarity: {sim:.4f}, Meta: {meta}")
+            # print("#"*70)
 
             matches = [
                 (ref_id, sim, meta)
diff --git a/mq/rmq_client.py b/mq/rmq_client.py
index 6571c0b..a0f0786 100644
--- a/mq/rmq_client.py
+++ b/mq/rmq_client.py
@@ -102,6 +102,11 @@ async def connect(self):
                         self.DEAD_LETTER_QUEUE, durable=True
                     )
                     logger.info(f"Dead Letter Queue declared: {self.DEAD_LETTER_QUEUE}")
+                self.submission_queue = await self.channel.declare_queue(
+                        self.SUBMISSION_QUEUE, 
+                        durable=True
+                    )
+                logger.info(f"Submission queue created: {self.SUBMISSION_QUEUE}")
 
                 try:
                     # First try passive declaration to check if queue exists
diff --git a/scripts/docker-postgres.yml b/scripts/docker-postgres.yml
new file mode 100644
index 0000000..f6dac63
--- /dev/null
+++ b/scripts/docker-postgres.yml
@@ -0,0 +1,53 @@
+version: '3.8'
+
+services:
+  # ===================================
+  # POSTGRESQL - Database
+  # ===================================
+  postgres:
+    image: pgvector/pgvector:pg16
+    container_name: plg-postgres
+    ports:
+      - "5432:5432"
+    environment:
+      POSTGRES_DB: plagiarism_db
+      POSTGRES_USER: postgres
+      POSTGRES_PASSWORD: postgres
+      POSTGRES_INITDB_ARGS: "-E UTF8"
+      POSTGRES_MAX_CONNECTIONS: 20
+      PGDATA: /var/lib/postgresql/data/pgdata
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+      - ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U $$POSTGRES_USER -d $$POSTGRES_DB"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    deploy:
+      resources:
+        limits:
+          cpus: '0.5'
+          memory: 512M
+        reservations:
+          cpus: '0.25'
+          memory: 256M
+    networks:
+      - plg-network
+    restart: always
+
+
+# ===================================
+# VOLUMES
+# ===================================
+volumes:
+  postgres_data:
+    driver: local
+
+# ===================================
+# NETWORKS
+# ===================================
+networks:
+  plg-network:
+    driver: bridge
diff --git a/scripts/postgres_setup.sh b/scripts/postgres_setup.sh
new file mode 100644
index 0000000..92396b9
--- /dev/null
+++ b/scripts/postgres_setup.sh
@@ -0,0 +1,118 @@
+COMPOSE_FILE="docker-postgres.yml"
+COMPOSE_CMD="podman-compose"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+GRAY='\033[0;37m'
+NC='\033[0m'
+
+
+# Load configuration from .env if it exists
+if [ -f ".env" ]; then
+    set -a
+    source <(grep -v '^#' .env | grep -v '^$' | sed 's/\r$//')
+    set +a
+fi
+
+# Configuration (with defaults)
+POSTGRES_CONTAINER="plg-postgres"
+POSTGRES_PORT="${POSTGRES_PORT:-5432}"
+POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-postgres}"
+POSTGRES_DB="${POSTGRES_DB:-plagiarism_db}"
+POSTGRES_USER="${POSTGRES_USER:-postgres}"
+
+$COMPOSE_CMD -f $COMPOSE_FILE up -d
+
+wait_for_postgres() {
+    echo -e "${YELLOW}Waiting for PostgreSQL...${NC}"
+    
+    local max_attempts=30
+    local attempt=0
+    
+    while [ $attempt -lt $max_attempts ]; do
+        attempt=$((attempt + 1))
+        
+        if $CONTAINER_CMD exec $POSTGRES_CONTAINER pg_isready -U $POSTGRES_USER &>/dev/null; then
+            echo -e "${GREEN}[OK] PostgreSQL ready${NC}"
+            return 0
+        fi
+        
+        sleep 2
+    done
+    
+    echo -e "${RED}ERROR: PostgreSQL timeout${NC}"
+    exit 1
+}
+
+initialize_database() {
+    if [ ! -f "database/init.sql" ]; then
+        echo -e "${YELLOW}WARNING: database/init.sql not found${NC}"
+        return
+    fi
+    
+    echo -e "${CYAN}Initializing database...${NC}"
+    
+    # Run init.sql
+    if $CONTAINER_CMD exec -i $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB < database/init.sql 2>/dev/null; then
+        echo -e "${GREEN}[OK] Database schema initialized${NC}"
+    else
+        echo -e "${GRAY}Database schema already exists${NC}"
+    fi
+    
+    # Create migrations tracking table if it doesn't exist
+    $CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -c "
+        CREATE TABLE IF NOT EXISTS schema_migrations (
+            id SERIAL PRIMARY KEY,
+            migration_name VARCHAR(255) UNIQUE NOT NULL,
+            applied_at TIMESTAMP DEFAULT NOW()
+        );
+    " 2>/dev/null
+    
+    # Run migration scripts
+    if [ -d "database/migrations" ]; then
+        local migration_count=0
+        for migration_file in database/migrations/*.sql; do
+            if [ -f "$migration_file" ]; then
+                local migration_name=$(basename "$migration_file")
+                
+                # Check if migration already applied
+                local already_applied=$($CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -t -c "
+                    SELECT COUNT(*) FROM schema_migrations WHERE migration_name = '$migration_name';
+                " 2>/dev/null | tr -d '[:space:]')
+                
+                if [ "$already_applied" = "0" ]; then
+                    echo -e "${CYAN}Applying migration: $migration_name${NC}"
+                    
+                    if $CONTAINER_CMD exec -i $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB < "$migration_file" 2>/dev/null; then
+                        # Record migration as applied
+                        $CONTAINER_CMD exec $POSTGRES_CONTAINER psql -U $POSTGRES_USER -d $POSTGRES_DB -c "
+                            INSERT INTO schema_migrations (migration_name) VALUES ('$migration_name');
+                        " 2>/dev/null
+                        echo -e "${GREEN}[OK] Applied: $migration_name${NC}"
+                        migration_count=$((migration_count + 1))
+                    else
+                        echo -e "${YELLOW}WARNING: Failed to apply $migration_name${NC}"
+                    fi
+                fi
+            fi
+        done
+        
+        if [ $migration_count -eq 0 ]; then
+            echo -e "${GRAY}All migrations already applied${NC}"
+        else
+            echo -e "${GREEN}[OK] Applied $migration_count migration(s)${NC}"
+        fi
+    fi
+}
+
+main() {
+    wait_for_postgres
+    initialize_database
+    echo -e "\n${GRAY}Containers running in background. Press Ctrl+C to exit this script.${NC}"
+}
+
+main
+echo -e "${YELLOW}PostgreSQL done...${NC}"
\ No newline at end of file
diff --git a/start-prod-env.sh b/start-prod-env.sh
index 8f92a20..b9b8e0c 100755
--- a/start-prod-env.sh
+++ b/start-prod-env.sh
@@ -4,12 +4,12 @@
 
 set -e
 FULL_SETUP=0
-START_API=0
+START_API=1
 COMPOSE_FILE="docker-compose-prod.yml"
 
 while [[ "$#" -gt 0 ]]; do
     case "$1" in
-        --full-setup) FULL_SETUP=1; shift ;;
+        --full-setup) FULpodmanL_SETUP=1; shift ;;
         --with-api) START_API=1; shift ;;
         --prod) COMPOSE_FILE="docker-compose-prod.yml"; shift ;;
         --dev) COMPOSE_FILE="docker-compose-dev.yml"; shift ;;
@@ -363,7 +363,6 @@ main() {
     stop_existing_containers
     start_containers
     wait_for_postgres
-    wait_for_rabbitmq
     wait_for_api
     initialize_database
     show_summary

From c1406eb8256c37ed0c3a9cd0be1e2a7596ba1266 Mon Sep 17 00:00:00 2001
From: Manu <manu.a@tap>
Date: Fri, 9 Jan 2026 19:48:42 +0530
Subject: [PATCH 6/7] Reference Image

---
 database/db_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/database/db_manager.py b/database/db_manager.py
index d77b70a..fd7fb8d 100644
--- a/database/db_manager.py
+++ b/database/db_manager.py
@@ -514,7 +514,7 @@ async def fetch_reference_images_by_id(self, reference_id):
             raise RuntimeError("Database pool not initialized")
 
         try:
-            image_path = await self._fetch(
+            image_path = await self._fetchval(
                 """
                     SELECT image_path 
                     FROM reference_images where reference_id = $1;

From 825065497cdea6f215024a9d12da20a2a34ec4a5 Mon Sep 17 00:00:00 2001
From: Manu <manu.a@tap>
Date: Tue, 17 Feb 2026 22:27:33 +0530
Subject: [PATCH 7/7] video media handling

---
 image_worker/image_validator.py | 38 +++++++++++++++++++++++++++++++
 image_worker/worker.py          | 40 +++++++++++++++++++++++++++++++--
 mq/rmq_client.py                |  9 ++++----
 3 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/image_worker/image_validator.py b/image_worker/image_validator.py
index 43a7b25..e4d1666 100644
--- a/image_worker/image_validator.py
+++ b/image_worker/image_validator.py
@@ -53,6 +53,26 @@ class ImageValidator:
         "vecteezy",
     ]
 
+
+    IMAGE_EXTENSIONS = {
+        "jpg",
+        "jpeg",
+        "png",
+        "gif",
+        "webp",
+        "bmp",
+        "tiff",
+        "heic",
+    }
+    VIDEO_EXTENSIONS = {
+        "mp4",
+        "mov",
+        "webm",
+        "mkv",
+        "avi",
+        "mpeg",
+        "mpg",
+    }
     def __init__(
         self,
         min_variance_threshold: float = 5.0,
@@ -71,6 +91,24 @@ def __init__(
         self.min_unique_colors = min_unique_colors
         self.max_solid_color_ratio = max_solid_color_ratio
 
+
+    def detect_media_type(self, submission_url: str) -> str:
+        """Detect media type based on URL/extension."""
+        if not submission_url:
+            return "image"
+
+        url_without_query = submission_url.split("?", 1)[0].lower()
+        if "." in url_without_query:
+            ext = url_without_query.rsplit(".", 1)[-1]
+            if ext in self.IMAGE_EXTENSIONS:
+                return "image"
+            if ext in self.VIDEO_EXTENSIONS:
+                return "video"
+
+        if "video" in url_without_query:
+            return "video"
+        return "image"
+
     def check_stock_image_url(self, image_url: str) -> Tuple[bool, Optional[str]]:
         """
         Check if URL is from a known stock image website.
diff --git a/image_worker/worker.py b/image_worker/worker.py
index a54c630..b5d67b1 100644
--- a/image_worker/worker.py
+++ b/image_worker/worker.py
@@ -356,7 +356,6 @@ async def check_assignment_reference_hash_match(
             logger.error(f"Hash check failed: {e}", exc_info=True)
             raise
 
-
     async def check_db_reference_hash_match(
         self, hashes: dict
     ) -> Tuple[bool, Optional[str], Optional[float], Optional[str]]:
@@ -550,10 +549,24 @@ async def process_submission(self, data: Dict[str, Any]) -> Optional[str]:
 
         try:
             extracted = self._validate_input(data)
-            submission_id, student_id, assign_id, image_url, db_record_id = extracted
+            submission_id, student_id, assign_id, submission_url, db_record_id = extracted
 
             logger.info(f"Processing submission: {submission_id}")
 
+            # Check for video URLs before attempting to download
+            media_type = self.image_validator.detect_media_type(submission_url)
+            if media_type == "video":
+                logger.warning(
+                    f"Video URL rejected: submission={submission_id}, url={submission_url}"
+                )
+                video_result = self._create_video_url_result(
+                    submission_id, student_id, assign_id, submission_url
+                )
+                processing_time_ms = int((time.time() - start_time) * 1000)
+                return json.dumps(video_result)
+            else:
+                image_url = submission_url
+
             # Check for stock image URLs before downloading
             is_stock, stock_site = self.image_validator.check_stock_image_url(image_url)
             if is_stock and stock_site:
@@ -1101,6 +1114,29 @@ def _create_stock_image_result(
             "plagiarism_source": f"stock_image_{stock_site}",
             "similar_sources": [{"source": stock_site, "url": image_url}],
         }
+    
+    def _create_video_url_result(
+        self,
+        submission_id: str,
+        student_id: str,
+        assign_id: str,
+        submission_url: str
+    ) -> dict:
+        """Create video URL detection result dictionary."""
+        return {
+            "submission_id": submission_id,
+            "student_id": student_id,
+            "assignment_id": assign_id,
+            "image_url": submission_url,
+            "is_ai_generated": False,
+            "ai_detection_source": "None",
+            "ai_confidence": 0.0,
+            "is_plagiarized": False,
+            "similarity_score": 1.0,
+            "match_type": "original",
+            "plagiarism_source": None,
+            "similar_sources": None,
+        }
 
     async def _build_reference_result(
         self,
diff --git a/mq/rmq_client.py b/mq/rmq_client.py
index a0f0786..1e4f31a 100644
--- a/mq/rmq_client.py
+++ b/mq/rmq_client.py
@@ -102,11 +102,7 @@ async def connect(self):
                         self.DEAD_LETTER_QUEUE, durable=True
                     )
                     logger.info(f"Dead Letter Queue declared: {self.DEAD_LETTER_QUEUE}")
-                self.submission_queue = await self.channel.declare_queue(
-                        self.SUBMISSION_QUEUE, 
-                        durable=True
-                    )
-                logger.info(f"Submission queue created: {self.SUBMISSION_QUEUE}")
+
 
                 try:
                     # First try passive declaration to check if queue exists
@@ -124,6 +120,8 @@ async def connect(self):
                     )
                     logger.info(f"Submission queue created: {self.SUBMISSION_QUEUE}")
 
+
+                
                 try:
                     # First try passive declaration to check if queue exists
                     self.feedback_queue = await self.channel.declare_queue(
@@ -165,6 +163,7 @@ async def publish_message(self, message_body):
             logger.info(
                 f"Published submission {message_body.get('submission_id')} for user {message_body.get('student_id')}"
             )
+            logger.info(f"Published message body: {message_body}")
         except asyncio.CancelledError as e:
             logger.warning("publish_message CancelledError")
             raise Exception("publish_message CancelledError") from e