From 2b7210a655f6e4939713c7796f41cc422ff6f96f Mon Sep 17 00:00:00 2001
From: davidamacey <davidamacey@gmail.com>
Date: Tue, 14 Oct 2025 00:58:03 -0400
Subject: [PATCH] feat: Production hardening with infrastructure upgrades and
 enhanced offline deployment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This release significantly improves production security, reliability, and user experience with comprehensive infrastructure modernization and air-gapped deployment enhancements.

## Infrastructure Upgrades
- **PostgreSQL**: 14-alpine → 17.5-alpine (security patches, performance improvements)
- **Redis**: 7-alpine → 8.2.2-alpine3.22 (latest stable release)
- **MinIO**: latest → RELEASE.2025-09-07T16-13-09Z (pinned for reproducibility)
- **Nginx**: alpine → 1.29.2-alpine3.22 (security updates)
- **PyTorch**: Simplified CUDA wheel installation (removed +cu128 suffix for compatibility)

## Security Hardening
- **Non-root frontend container**: Nginx now runs as unprivileged user on port 8080
- **Enhanced Docker security scanning**:
  - Fixed multi-arch scanning by pulling fresh amd64 images before Trivy/Dockle
  - Added 600s timeout to Dockle for large image scans
  - Improved cache control with NO_CACHE environment variable
- **LLM provider environment variables**: Added missing vLLM and Ollama configuration

## Offline Deployment Improvements
- **Automatic version syncing**: Infrastructure image versions automatically sync from docker-compose.yml to offline compose
- **Enhanced model download process**:
  - Fixed PyAnnote model download to use full WhisperX pipeline (matches backend behavior)
  - Proper root user execution in Docker for cache path compatibility
  - Added sudo handling for Docker-created files
  - Better error handling and progress reporting
- **New standalone model downloader**: `scripts/download-models.sh` for pre-caching models before Docker startup

## User Experience Enhancements
- **Interactive HuggingFace token setup**:
  - Setup script now prompts for HF token during installation
  - Automatic model pre-download if token provided (eliminates 10-30 min first-use delay)
  - Clear instructions for obtaining token with skip option
  - Model downloads happen before Docker starts, ensuring instant readiness
- **Improved documentation**:
  - Updated README with hardware detection and automatic model caching details
  - Better explanations of token requirements and offline capabilities
  - Clearer setup instructions

## Technical Changes
- **Docker Compose**: All services use pinned versions for reproducibility
- **Build script**: Enhanced cache control, improved security scan workflow
- **Model download**: Complete rewrite to match production backend behavior exactly
- **Setup script**: Downloads model downloader scripts with retry logic and validation

## Breaking Changes
- Frontend container now uses port 8080 instead of 80 (non-privileged port)
- Update any external configurations that reference frontend:80 to frontend:8080

## Migration Notes
For existing installations:
1. Pull latest images: `docker compose pull`
2. Restart services: `./opentranscribe.sh restart`
3. Monitor logs for any issues: `./opentranscribe.sh logs`

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 README.md                        |  38 +--
 backend/requirements.txt         |   4 +-
 docker-compose.offline.yml       |  16 +-
 docker-compose.yml               |   8 +-
 frontend/Dockerfile.prod         |  22 +-
 frontend/nginx.conf              |   6 +-
 scripts/build-offline-package.sh | 120 +++++++--
 scripts/docker-build-push.sh     |  27 +-
 scripts/download-models.py       | 107 ++++++--
 scripts/download-models.sh       | 268 +++++++++++++++++++
 scripts/security-scan.sh         |  10 +-
 scripts/test-model-download.sh   |  67 +++++
 setup-opentranscribe.sh          | 425 ++++++++++++++++++++++++-------
 13 files changed, 947 insertions(+), 171 deletions(-)
 create mode 100755 scripts/download-models.sh
 create mode 100755 scripts/test-model-download.sh

diff --git a/README.md b/README.md
index 716391ab..dbd16b42 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 <div align="center">
   <img src="assets/logo-banner.png" alt="OpenTranscribe Logo" width="400">
-  
+
   **AI-Powered Transcription and Media Analysis Platform**
 </div>
 
@@ -165,11 +165,15 @@ curl -fsSL https://raw.githubusercontent.com/davidamacey/OpenTranscribe/master/s
 ```
 
 Then follow the on-screen instructions. The setup script will:
+- Detect your hardware (NVIDIA GPU, Apple Silicon, or CPU)
 - Download the production Docker Compose file
-- Configure environment variables including GPU support (default GPU device ID: 2)
-- Help you set up your Hugging Face token (required for speaker diarization)
+- Configure environment variables with optimal settings for your hardware
+- **Prompt for your HuggingFace token** (required for speaker diarization)
+- **Automatically download and cache AI models (~2.5GB)** if token is provided
 - Set up the management script (`opentranscribe.sh`)
 
+**Note:** The script will prompt you for your HuggingFace token during setup. If you provide it, AI models will be downloaded and cached before Docker starts, ensuring the app is ready to use immediately. If you skip this step, models will download on first use (10-30 minute delay).
+
 Once setup is complete, start OpenTranscribe with:
 
 ```bash
@@ -189,7 +193,7 @@ Access the web interface at http://localhost:5173
    ```bash
    git clone https://github.com/davidamacey/OpenTranscribe.git
    cd OpenTranscribe
-   
+
    # Make utility script executable
    chmod +x opentr.sh
    ```
@@ -198,7 +202,7 @@ Access the web interface at http://localhost:5173
    ```bash
    # Copy environment template
    cp .env.example .env
-   
+
    # Edit .env file with your settings (optional for development)
    # Key variables:
    # - HUGGINGFACE_TOKEN (required for speaker diarization)
@@ -209,7 +213,7 @@ Access the web interface at http://localhost:5173
    ```bash
    # Start in development mode (with hot reload)
    ./opentr.sh start dev
-   
+
    # Or start in production mode
    ./opentr.sh start prod
    ```
@@ -470,7 +474,7 @@ OpenTranscribe offers flexible AI deployment options. Choose the approach that b
    LLM_PROVIDER=openai
    OPENAI_API_KEY=your_openai_key
    OPENAI_MODEL_NAME=gpt-4o-mini
-   
+
    # Start without local LLM
    ./opentr.sh start dev
    ```
@@ -480,7 +484,7 @@ OpenTranscribe offers flexible AI deployment options. Choose the approach that b
    # Configure for vLLM in .env
    LLM_PROVIDER=vllm
    VLLM_MODEL_NAME=gpt-oss-20b
-   
+
    # Start with vLLM service (requires 16GB+ VRAM)
    docker compose -f docker-compose.yml -f docker-compose.vllm.yml up
    ```
@@ -490,7 +494,7 @@ OpenTranscribe offers flexible AI deployment options. Choose the approach that b
    # Configure for Ollama in .env
    LLM_PROVIDER=ollama
    OLLAMA_MODEL_NAME=llama3.2:3b-instruct-q4_K_M
-   
+
    # Edit docker-compose.vllm.yml and uncomment ollama service
    # Then start with both compose files
    docker compose -f docker-compose.yml -f docker-compose.vllm.yml up
@@ -501,7 +505,7 @@ OpenTranscribe offers flexible AI deployment options. Choose the approach that b
 # Cloud Providers (configure in .env)
 LLM_PROVIDER=openai                  # openai, anthropic, custom (openrouter)
 OPENAI_API_KEY=your_openai_key       # OpenAI GPT models
-ANTHROPIC_API_KEY=your_claude_key    # Anthropic Claude models  
+ANTHROPIC_API_KEY=your_claude_key    # Anthropic Claude models
 OPENROUTER_API_KEY=your_or_key       # OpenRouter (multi-provider)
 
 # Local Providers (requires additional Docker services)
@@ -511,7 +515,7 @@ LLM_PROVIDER=ollama                  # Local Ollama server
 
 **🎯 Deployment Scenarios:**
 - **💰 Cost-Effective**: OpenRouter with Claude Haiku (~$0.25/1M tokens)
-- **🔒 Privacy-First**: Local vLLM or Ollama (no data leaves your server)  
+- **🔒 Privacy-First**: Local vLLM or Ollama (no data leaves your server)
 - **⚡ Performance**: OpenAI GPT-4o-mini (fastest cloud option)
 - **📱 Small Models**: Even 3B Ollama models can handle hours of content via intelligent sectioning
 - **🚫 No LLM**: Leave `LLM_PROVIDER` empty for transcription-only mode
@@ -534,7 +538,7 @@ OpenTranscribe automatically downloads and caches AI models for optimal performa
 │   ├── hub/             # WhisperX transcription models (~1.5GB)
 │   └── transformers/    # PyAnnote transformer models
 └── torch/               # PyTorch cache
-    ├── hub/checkpoints/ # Wav2Vec2 alignment model (~360MB)  
+    ├── hub/checkpoints/ # Wav2Vec2 alignment model (~360MB)
     └── pyannote/        # PyAnnote diarization models (~500MB)
 ```
 
@@ -606,7 +610,7 @@ For production use, ensure you:
    # Generate strong secrets
    openssl rand -hex 32  # For SECRET_KEY
    openssl rand -hex 32  # For JWT_SECRET_KEY
-   
+
    # Set strong database passwords
    # Configure proper firewall rules
    # Set up SSL/TLS certificates
@@ -616,7 +620,7 @@ For production use, ensure you:
    ```bash
    # Use production environment
    NODE_ENV=production
-   
+
    # Configure resource limits
    # Set up monitoring and logging
    # Configure backup strategies
@@ -628,7 +632,7 @@ For production use, ensure you:
    server {
        listen 80;
        server_name your-domain.com;
-       
+
        location / {
            proxy_pass http://localhost:5173;
            proxy_set_header Host $host;
@@ -657,7 +661,7 @@ pytest tests/                    # Run tests
 black app/                       # Format code
 flake8 app/                      # Lint code
 
-# Frontend development  
+# Frontend development
 cd frontend/
 npm install
 npm run dev                      # Development server
@@ -835,4 +839,4 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 
 **Built with ❤️ using AI assistance and modern open-source technologies.**
 
-*OpenTranscribe demonstrates the power of AI-assisted development while maintaining full local control over your data and processing.*
\ No newline at end of file
+*OpenTranscribe demonstrates the power of AI-assisted development while maintaining full local control over your data and processing.*
diff --git a/backend/requirements.txt b/backend/requirements.txt
index d42ec54a..ca0fafc5 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -27,8 +27,8 @@ numpy>=1.25.2
 
 # PyTorch with CUDA 12.8 support (CVE-2025-32434 fixed in 2.6.0+)
 --extra-index-url https://download.pytorch.org/whl/cu128
-torch==2.8.0+cu128
-torchaudio==2.8.0+cu128
+torch==2.8.0
+torchaudio==2.8.0
 
 # WhisperX latest version with ctranslate2 4.5+ support
 whisperx==3.7.0
diff --git a/docker-compose.offline.yml b/docker-compose.offline.yml
index 11f43a1e..2e03cb32 100644
--- a/docker-compose.offline.yml
+++ b/docker-compose.offline.yml
@@ -6,7 +6,7 @@ version: '3.8'
 
 services:
   postgres:
-    image: postgres:14-alpine
+    image: postgres:17.5-alpine
     restart: always
     volumes:
       - postgres_data:/var/lib/postgresql/data/
@@ -24,7 +24,7 @@ services:
       retries: 5
 
   minio:
-    image: minio/minio:latest
+    image: minio/minio:RELEASE.2025-09-07T16-13-09Z
     restart: always
     volumes:
       - minio_data:/data
@@ -42,7 +42,7 @@ services:
       retries: 5
 
   redis:
-    image: redis:7-alpine
+    image: redis:8.2.2-alpine3.22
     restart: always
     ports:
       - "${REDIS_PORT:-6379}:6379"
@@ -133,9 +133,14 @@ services:
       - MAX_SPEAKERS=${MAX_SPEAKERS:-10}
       # LLM Configuration - external providers only for offline deployment
       - LLM_PROVIDER=${LLM_PROVIDER:-}
+      - VLLM_BASE_URL=${VLLM_BASE_URL:-http://localhost:8012/v1}
+      - VLLM_API_KEY=${VLLM_API_KEY:-}
+      - VLLM_MODEL_NAME=${VLLM_MODEL_NAME:-gpt-oss-20b}
       - OPENAI_API_KEY=${OPENAI_API_KEY:-}
       - OPENAI_MODEL_NAME=${OPENAI_MODEL_NAME:-gpt-4o-mini}
       - OPENAI_BASE_URL=${OPENAI_BASE_URL:-}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://localhost:11434}
+      - OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME:-llama2:7b-chat}
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
       - ANTHROPIC_MODEL_NAME=${ANTHROPIC_MODEL_NAME:-claude-3-haiku-20240307}
       - ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-}
@@ -193,9 +198,14 @@ services:
       - MAX_SPEAKERS=${MAX_SPEAKERS:-10}
       # LLM Configuration
       - LLM_PROVIDER=${LLM_PROVIDER:-}
+      - VLLM_BASE_URL=${VLLM_BASE_URL:-http://localhost:8012/v1}
+      - VLLM_API_KEY=${VLLM_API_KEY:-}
+      - VLLM_MODEL_NAME=${VLLM_MODEL_NAME:-gpt-oss-20b}
       - OPENAI_API_KEY=${OPENAI_API_KEY:-}
       - OPENAI_MODEL_NAME=${OPENAI_MODEL_NAME:-gpt-4o-mini}
       - OPENAI_BASE_URL=${OPENAI_BASE_URL:-}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://localhost:11434}
+      - OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME:-llama2:7b-chat}
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
       - ANTHROPIC_MODEL_NAME=${ANTHROPIC_MODEL_NAME:-claude-3-haiku-20240307}
       - ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-}
diff --git a/docker-compose.yml b/docker-compose.yml
index 829a4eb6..7b93d40b 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3.8'
 
 services:
   postgres:
-    image: postgres:14-alpine
+    image: postgres:17.5-alpine
     restart: always
     volumes:
       - postgres_data:/var/lib/postgresql/data/
@@ -19,7 +19,7 @@ services:
       retries: 5
 
   minio:
-    image: minio/minio
+    image: minio/minio:RELEASE.2025-09-07T16-13-09Z
     restart: always
     volumes:
       - minio_data:/data
@@ -37,7 +37,7 @@ services:
       retries: 5
 
   redis:
-    image: redis:7-alpine
+    image: redis:8.2.2-alpine3.22
     restart: always
     ports:
       - "5177:6379"
@@ -241,7 +241,7 @@ services:
       dockerfile: Dockerfile.prod
     restart: unless-stopped
     ports:
-      - "5173:80"
+      - "5173:8080"
     environment:
       - NODE_ENV=production
     depends_on:
diff --git a/frontend/Dockerfile.prod b/frontend/Dockerfile.prod
index 764d3429..fa865aef 100644
--- a/frontend/Dockerfile.prod
+++ b/frontend/Dockerfile.prod
@@ -29,7 +29,7 @@ RUN chmod -R 755 static/fonts
 RUN npm run build
 
 # Production stage
-FROM nginx:alpine
+FROM nginx:1.29.2-alpine3.22
 
 # Copy the built files from the build stage
 COPY --from=build /app/dist /usr/share/nginx/html
@@ -37,8 +37,24 @@ COPY --from=build /app/dist /usr/share/nginx/html
 # Copy the nginx configuration
 COPY nginx.conf /etc/nginx/conf.d/default.conf
 
-# Expose port 80
-EXPOSE 80
+# Create non-root user and configure permissions for nginx
+# Note: nginx user already exists in base image, just configure permissions
+RUN chown -R nginx:nginx /usr/share/nginx/html && \
+    chown -R nginx:nginx /var/cache/nginx && \
+    mkdir -p /var/log/nginx && \
+    chown -R nginx:nginx /var/log/nginx && \
+    touch /var/run/nginx.pid && \
+    chown nginx:nginx /var/run/nginx.pid
+
+# Switch to non-root user
+USER nginx
+
+# Expose port 8080 (non-privileged port for non-root user)
+EXPOSE 8080
+
+# Add healthcheck
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD wget --no-verbose --tries=1 --spider http://localhost:8080/ || exit 1
 
 # Start nginx
 CMD ["nginx", "-g", "daemon off;"]
diff --git a/frontend/nginx.conf b/frontend/nginx.conf
index bd829d00..c959b59a 100644
--- a/frontend/nginx.conf
+++ b/frontend/nginx.conf
@@ -1,12 +1,12 @@
 server {
-    listen 80;
+    listen 8080;
     server_name localhost;
     root /usr/share/nginx/html;
     index index.html;
 
     # Set maximum file upload size to 15GB for large video/audio files
     client_max_body_size 15G;
-    
+
     # Increase timeouts for large file uploads
     client_body_timeout 300s;
     client_header_timeout 300s;
@@ -43,7 +43,7 @@ server {
         proxy_set_header Connection 'upgrade';
         proxy_set_header Host $host;
         proxy_cache_bypass $http_upgrade;
-        
+
         # Large file upload support
         proxy_request_buffering off;
         proxy_max_temp_file_size 0;
diff --git a/scripts/build-offline-package.sh b/scripts/build-offline-package.sh
index 5d13a5b7..63ee6828 100755
--- a/scripts/build-offline-package.sh
+++ b/scripts/build-offline-package.sh
@@ -24,16 +24,6 @@ PACKAGE_NAME="opentranscribe-offline-v${VERSION}"
 BUILD_DIR="./offline-package-build"
 PACKAGE_DIR="${BUILD_DIR}/${PACKAGE_NAME}"
 
-# Docker images to save
-IMAGES=(
-    "davidamacey/opentranscribe-backend:latest"
-    "davidamacey/opentranscribe-frontend:latest"
-    "postgres:14-alpine"
-    "redis:7-alpine"
-    "minio/minio:latest"
-    "opensearchproject/opensearch:2.5.0"
-)
-
 # Required files and directories
 CONFIG_FILES=(
     "docker-compose.offline.yml"
@@ -143,6 +133,60 @@ preflight_checks() {
     print_success "Pre-flight checks passed"
 }
 
+#######################
+# IMAGE EXTRACTION
+#######################
+
+extract_infrastructure_images() {
+    local compose_file="docker-compose.yml"
+
+    if [ ! -f "$compose_file" ]; then
+        print_error "docker-compose.yml not found!"
+        exit 1
+    fi
+
+    # Extract infrastructure service images (postgres, redis, minio, opensearch)
+    # These are the same in both dev and production
+    grep -E "^\s*image:\s*" "$compose_file" | \
+        sed -E 's/^\s*image:\s*//; s/\s*$//' | \
+        grep -v "^#" | \
+        sort -u
+}
+
+extract_docker_images() {
+    print_header "Extracting Docker Images from Configuration"
+
+    print_info "Source: docker-compose.yml (single source of truth)"
+
+    # Get infrastructure images from main docker-compose.yml
+    INFRASTRUCTURE_IMAGES=($(extract_infrastructure_images))
+
+    # Add production application images (these use 'build:' in dev, pre-built images in prod)
+    APPLICATION_IMAGES=(
+        "davidamacey/opentranscribe-backend:latest"
+        "davidamacey/opentranscribe-frontend:latest"
+    )
+
+    # Combine all images
+    IMAGES=("${APPLICATION_IMAGES[@]}" "${INFRASTRUCTURE_IMAGES[@]}")
+
+    if [ ${#IMAGES[@]} -eq 0 ]; then
+        print_error "No images found!"
+        exit 1
+    fi
+
+    print_success "Found ${#IMAGES[@]} images to package:"
+    print_info "Application images (pre-built for production):"
+    for img in "${APPLICATION_IMAGES[@]}"; do
+        print_info "  - $img"
+    done
+    print_info "Infrastructure images (from docker-compose.yml):"
+    for img in "${INFRASTRUCTURE_IMAGES[@]}"; do
+        print_info "  - $img"
+    done
+    echo
+}
+
 #######################
 # SETUP
 #######################
@@ -247,6 +291,7 @@ download_models() {
     # Run backend container with model download script
     print_info "Running model download in Docker container..."
 
+    # Run as root (not --user) so /root/.cache paths work correctly
     docker run --rm \
         --gpus all \
         -e HUGGINGFACE_TOKEN="${HUGGINGFACE_TOKEN}" \
@@ -257,22 +302,39 @@ download_models() {
         -v "${temp_model_cache}/huggingface:/root/.cache/huggingface" \
         -v "${temp_model_cache}/torch:/root/.cache/torch" \
         -v "$(pwd)/scripts/download-models.py:/app/download-models.py" \
+        -v "$(pwd)/test_videos:/app/test_videos:ro" \
         davidamacey/opentranscribe-backend:latest \
         python /app/download-models.py
 
-    # Copy models to package
+    # Copy models to package (use sudo because Docker created files as root)
     print_info "Copying models to package..."
-    cp -r "${temp_model_cache}/huggingface"/* "${PACKAGE_DIR}/models/huggingface/" || true
-    cp -r "${temp_model_cache}/torch"/* "${PACKAGE_DIR}/models/torch/" || true
+    if [ -d "${temp_model_cache}/huggingface" ] && [ "$(sudo ls -A ${temp_model_cache}/huggingface 2>/dev/null)" ]; then
+        sudo cp -r "${temp_model_cache}/huggingface"/* "${PACKAGE_DIR}/models/huggingface/"
+        print_info "  Copied HuggingFace models"
+    else
+        print_warning "No HuggingFace models found to copy"
+    fi
+
+    if [ -d "${temp_model_cache}/torch" ] && [ "$(sudo ls -A ${temp_model_cache}/torch 2>/dev/null)" ]; then
+        sudo cp -r "${temp_model_cache}/torch"/* "${PACKAGE_DIR}/models/torch/"
+        print_info "  Copied PyTorch/PyAnnote models"
+    else
+        print_warning "No PyTorch models found to copy"
+    fi
 
-    # Check if model manifest was created
-    if [ -f "${temp_model_cache}/huggingface/../model_manifest.json" ]; then
-        cp "${temp_model_cache}/huggingface/../model_manifest.json" "${PACKAGE_DIR}/models/"
+    # Check if model manifest was created (it's inside the huggingface cache dir)
+    if [ -f "${temp_model_cache}/huggingface/model_manifest.json" ]; then
+        sudo cp "${temp_model_cache}/huggingface/model_manifest.json" "${PACKAGE_DIR}/models/"
+    else
+        print_warning "Model manifest not found"
     fi
 
-    # Clean up temp directory
+    # Fix ownership of copied files
+    sudo chown -R "$(id -u):$(id -g)" "${PACKAGE_DIR}/models/"
+
+    # Clean up temp directory (need sudo because Docker created files as root)
     print_info "Cleaning up temporary files..."
-    rm -rf "${temp_model_cache}"
+    sudo rm -rf "${temp_model_cache}"
 
     local model_size=$(get_dir_size "${PACKAGE_DIR}/models")
     print_success "Models downloaded and packaged ($model_size)"
@@ -285,9 +347,24 @@ download_models() {
 copy_configuration() {
     print_header "Copying Configuration Files"
 
-    # Copy docker-compose
-    print_info "Copying docker-compose.offline.yml..."
-    cp docker-compose.offline.yml "${PACKAGE_DIR}/config/"
+    # Sync infrastructure image versions from docker-compose.yml to docker-compose.offline.yml
+    print_info "Syncing infrastructure image versions to docker-compose.offline.yml..."
+
+    # Create temporary copy of offline compose
+    local temp_compose="${PACKAGE_DIR}/config/docker-compose.offline.yml"
+    cp docker-compose.offline.yml "$temp_compose"
+
+    # Extract and sync each infrastructure image version
+    for img in "${INFRASTRUCTURE_IMAGES[@]}"; do
+        # Get service name and image (e.g., postgres:17.5-alpine -> postgres and full image)
+        local service_name=$(echo "$img" | cut -d: -f1 | cut -d/ -f1)
+
+        # Update the image line in offline compose file for this service
+        # Find the service block and update its image line
+        sed -i "s|image: ${service_name}[:/][^ ]*|image: ${img}|g" "$temp_compose"
+    done
+
+    print_success "Infrastructure images synced from docker-compose.yml"
 
     # Copy and template .env file
     print_info "Creating .env template..."
@@ -424,6 +501,7 @@ main() {
 
     # Execute build steps
     preflight_checks
+    extract_docker_images
     setup_directories
     pull_and_save_images
     download_models
diff --git a/scripts/docker-build-push.sh b/scripts/docker-build-push.sh
index 7283dffc..465fdfb2 100755
--- a/scripts/docker-build-push.sh
+++ b/scripts/docker-build-push.sh
@@ -123,6 +123,7 @@ build_backend() {
         --file Dockerfile.prod \
         --tag "${REPO_BACKEND}:latest" \
         --tag "${REPO_BACKEND}:${COMMIT_SHA}" \
+        ${CACHE_FLAG} \
         --push \
         .
 
@@ -131,7 +132,12 @@ build_backend() {
     print_success "Backend image built and pushed successfully"
     print_info "Tags: ${REPO_BACKEND}:latest, ${REPO_BACKEND}:${COMMIT_SHA}"
 
-    # Run security scan after build
+    # Remove old local image and pull fresh amd64 image for scanning
+    print_info "Pulling fresh amd64 image from registry for security scan..."
+    docker rmi "${REPO_BACKEND}:latest" 2>/dev/null || true
+    docker pull --platform linux/amd64 "${REPO_BACKEND}:latest"
+
+    # Run security scan on freshly pulled image
     run_security_scan "backend"
 }
 
@@ -149,6 +155,7 @@ build_frontend() {
         --file Dockerfile.prod \
         --tag "${REPO_FRONTEND}:latest" \
         --tag "${REPO_FRONTEND}:${COMMIT_SHA}" \
+        ${CACHE_FLAG} \
         --push \
         .
 
@@ -157,7 +164,12 @@ build_frontend() {
     print_success "Frontend image built and pushed successfully"
     print_info "Tags: ${REPO_FRONTEND}:latest, ${REPO_FRONTEND}:${COMMIT_SHA}"
 
-    # Run security scan after build
+    # Remove old local image and pull fresh amd64 image for scanning
+    print_info "Pulling fresh amd64 image from registry for security scan..."
+    docker rmi "${REPO_FRONTEND}:latest" 2>/dev/null || true
+    docker pull --platform linux/amd64 "${REPO_FRONTEND}:latest"
+
+    # Run security scan on freshly pulled image
     run_security_scan "frontend"
 }
 
@@ -178,6 +190,7 @@ Options:
 Environment Variables:
     DOCKERHUB_USERNAME        Docker Hub username (default: davidamacey)
     PLATFORMS                 Target platforms (default: linux/amd64,linux/arm64)
+    NO_CACHE                  Build without cache (default: false)
     SKIP_SECURITY_SCAN        Skip security scanning (default: false)
     FAIL_ON_SECURITY_ISSUES   Fail build if security issues found (default: false)
     FAIL_ON_CRITICAL          Fail scan if CRITICAL vulnerabilities found (default: false)
@@ -187,6 +200,9 @@ Examples:
     $0 backend      # Build and push only backend
     $0 auto         # Auto-detect and build changed components
 
+    # Build without cache (fresh build)
+    NO_CACHE=true $0 frontend
+
     # Build only for current platform (faster)
     PLATFORMS=linux/amd64 $0 backend
 
@@ -217,6 +233,13 @@ main() {
     print_info "Branch: ${BRANCH}"
     print_info ""
 
+    # Cache control - set NO_CACHE=true to force rebuild without cache
+    CACHE_FLAG=""
+    if [ "${NO_CACHE}" = "true" ]; then
+        CACHE_FLAG="--no-cache"
+        print_info "Building without cache (NO_CACHE=true)"
+    fi
+
     # Check prerequisites
     check_docker
     check_docker_login
diff --git a/scripts/download-models.py b/scripts/download-models.py
index 828c5499..5745af14 100755
--- a/scripts/download-models.py
+++ b/scripts/download-models.py
@@ -76,35 +76,107 @@ def download_whisperx_models():
         return {"whisperx": {"status": "failed", "error": str(e)}}
 
 def download_pyannote_models():
-    """Download PyAnnote models"""
+    """Download PyAnnote models by running full WhisperX pipeline (same as backend)"""
     print_header("Downloading PyAnnote Models")
 
     try:
-        from pyannote.audio import Pipeline
+        import whisperx
+        import torch
 
         hf_token = os.environ.get("HUGGINGFACE_TOKEN")
         if not hf_token:
             print_error("HUGGINGFACE_TOKEN not set!")
             return {"pyannote": {"status": "failed", "error": "No HuggingFace token"}}
 
-        model_name = os.environ.get("DIARIZATION_MODEL", "pyannote/speaker-diarization-3.1")
+        # Use default paths (same as backend) - let WhisperX/PyAnnote handle caching
+        print_info("Using WhisperX full pipeline (same as backend) to download all models")
+        print_info("Models will be cached to default locations (managed by WhisperX/PyAnnote)")
 
-        print_info(f"Model: {model_name}")
-        print_info("Loading PyAnnote pipeline (this will download if needed)...")
+        # Detect device
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        compute_type = "float16" if device == "cuda" else "float32"
+        print_info(f"Using device: {device}")
 
-        pipeline = Pipeline.from_pretrained(
-            model_name,
-            use_auth_token=hf_token
+        # Get test video path
+        test_audio_path = "/app/test_videos/The Race to Develop Warp Drive and AI Passing the Turing Test.mp4"
+
+        if not os.path.exists(test_audio_path):
+            raise FileNotFoundError(f"Test video not found: {test_audio_path}")
+
+        print_info(f"Test video: {test_audio_path}")
+
+        # Use WhisperX full pipeline (same as backend)
+        print_info("Step 1/4: Loading audio with WhisperX...")
+        audio = whisperx.load_audio(test_audio_path)
+
+        # Limit to first 60 seconds
+        sample_rate = 16000
+        max_samples = sample_rate * 60
+        if len(audio) > max_samples:
+            audio = audio[:max_samples]
+        print_success("  Audio loaded")
+
+        # Step 2: Transcribe (same as backend)
+        print_info("Step 2/4: Running WhisperX transcription...")
+        model = whisperx.load_model(
+            os.environ.get("WHISPER_MODEL", "base"),
+            device=device,
+            compute_type=compute_type
+        )
+        result = model.transcribe(audio, batch_size=16 if device == "cuda" else 1)
+        print_success("  Transcription completed")
+        del model
+        torch.cuda.empty_cache() if device == "cuda" else None
+
+        # Step 3: Align (same as backend - downloads wav2vec2)
+        print_info("Step 3/4: Aligning transcription (downloads wav2vec2)...")
+        model_a, metadata = whisperx.load_align_model(
+            language_code="en",
+            device=device
+        )
+        result = whisperx.align(
+            result["segments"],
+            model_a,
+            metadata,
+            audio,
+            device=device
+        )
+        print_success("  Alignment completed")
+        del model_a
+        torch.cuda.empty_cache() if device == "cuda" else None
+
+        # Step 4: Diarize (same as backend - downloads PyAnnote models)
+        print_info("Step 4/4: Running speaker diarization (downloads PyAnnote models)...")
+        print_info("  This downloads: segmentation-3.0, embedding, wespeaker-voxceleb...")
+
+        diarize_model = whisperx.diarize.DiarizationPipeline(
+            use_auth_token=hf_token,
+            device=device
+        )
+
+        diarize_segments = diarize_model(
+            audio,
+            min_speakers=1,
+            max_speakers=10
         )
 
-        print_success(f"PyAnnote model '{model_name}' downloaded successfully")
+        print_success("  Diarization completed")
+        print_success("  All PyAnnote model weights (.bin files) downloaded")
+
+        # Verify models were downloaded to default torch cache
+        torch_cache = Path.home() / ".cache" / "torch"
+        if torch_cache.exists():
+            model_files = list(torch_cache.rglob("*.bin")) + list(torch_cache.rglob("pytorch_model.bin"))
+            print_info(f"  Verified {len(model_files)} model files in torch cache")
 
         # Clean up
-        del pipeline
+        del diarize_model
+        del audio
+        torch.cuda.empty_cache() if device == "cuda" else None
 
         return {
             "pyannote": {
-                "model": model_name,
+                "model": "pyannote/speaker-diarization-3.1",
                 "status": "downloaded"
             }
         }
@@ -147,9 +219,13 @@ def download_alignment_models():
 
 def get_cache_info():
     """Get information about cached models"""
+    # Use default paths (same as backend)
+    hf_home = str(Path.home() / ".cache" / "huggingface")
+    torch_home = str(Path.home() / ".cache" / "torch")
+
     cache_dirs = {
-        "huggingface": Path.home() / ".cache" / "huggingface",
-        "torch": Path.home() / ".cache" / "torch"
+        "huggingface": Path(hf_home),
+        "torch": Path(torch_home)
     }
 
     info = {}
@@ -190,8 +266,9 @@ def create_manifest(download_results):
         }
     }
 
-    # Write manifest
-    manifest_path = Path.home() / ".cache" / "model_manifest.json"
+    # Write manifest to HF_HOME directory (inside the cache dir, not parent)
+    cache_base = os.environ.get("HF_HOME", str(Path.home() / ".cache" / "huggingface"))
+    manifest_path = Path(cache_base) / "model_manifest.json"
     manifest_path.parent.mkdir(parents=True, exist_ok=True)
 
     with open(manifest_path, 'w') as f:
diff --git a/scripts/download-models.sh b/scripts/download-models.sh
new file mode 100755
index 00000000..b8923869
--- /dev/null
+++ b/scripts/download-models.sh
@@ -0,0 +1,268 @@
+#!/bin/bash
+set -e
+
+# OpenTranscribe Model Downloader
+# Downloads all required AI models before application startup
+# Usage: ./scripts/download-models.sh [model_cache_dir]
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+CYAN='\033[0;36m'
+NC='\033[0m' # No Color
+
+# Configuration
+MODEL_CACHE_DIR="${1:-./models}"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+print_header() {
+    echo -e "\n${CYAN}================================================================${NC}"
+    echo -e "${CYAN}  $1${NC}"
+    echo -e "${CYAN}================================================================${NC}\n"
+}
+
+print_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+# Calculate directory size
+get_dir_size() {
+    du -sh "$1" 2>/dev/null | cut -f1 || echo "0"
+}
+
+check_models_exist() {
+    print_info "Checking for existing models in $MODEL_CACHE_DIR..."
+
+    # Check if models directory exists and has content
+    if [ -d "$MODEL_CACHE_DIR/huggingface" ] && [ -d "$MODEL_CACHE_DIR/torch" ]; then
+        local hf_size=$(du -sb "$MODEL_CACHE_DIR/huggingface" 2>/dev/null | cut -f1)
+        local torch_size=$(du -sb "$MODEL_CACHE_DIR/torch" 2>/dev/null | cut -f1)
+
+        # If both directories have substantial content (>100MB combined), assume models exist
+        if [ "$((hf_size + torch_size))" -gt 100000000 ]; then
+            local total_size=$(get_dir_size "$MODEL_CACHE_DIR")
+            print_success "Found existing models ($total_size)"
+            echo -e "${YELLOW}Do you want to skip model download and use existing models? (Y/n)${NC}"
+            read -n 1 -r
+            echo
+            if [[ ! $REPLY =~ ^[Nn]$ ]]; then
+                print_info "Skipping model download - using existing models"
+                return 0
+            else
+                print_info "Re-downloading models as requested"
+            fi
+        fi
+    fi
+
+    return 1
+}
+
+check_huggingface_token() {
+    print_info "Checking for HuggingFace token..."
+
+    # Check environment variable first
+    if [ -n "$HUGGINGFACE_TOKEN" ]; then
+        print_success "HuggingFace token found in environment"
+        return 0
+    fi
+
+    # Check .env file
+    if [ -f "$REPO_ROOT/.env" ]; then
+        local token=$(grep "^HUGGINGFACE_TOKEN=" "$REPO_ROOT/.env" | cut -d'=' -f2 | tr -d ' ')
+        if [ -n "$token" ]; then
+            export HUGGINGFACE_TOKEN="$token"
+            print_success "HuggingFace token loaded from .env file"
+            return 0
+        fi
+    fi
+
+    print_error "HUGGINGFACE_TOKEN not found!"
+    echo ""
+    echo -e "${YELLOW}A HuggingFace token is required to download speaker diarization models.${NC}"
+    echo ""
+    echo "To get your FREE token:"
+    echo "1. Go to: https://huggingface.co/settings/tokens"
+    echo "2. Click 'New token'"
+    echo "3. Give it a name (e.g., 'OpenTranscribe')"
+    echo "4. Select 'Read' permissions"
+    echo "5. Copy the token"
+    echo ""
+    echo "Then either:"
+    echo "  • Export it: export HUGGINGFACE_TOKEN=your_token_here"
+    echo "  • Add it to .env file: HUGGINGFACE_TOKEN=your_token_here"
+    echo ""
+    exit 1
+}
+
+download_models_docker() {
+    print_header "Downloading AI Models"
+
+    print_info "This will download approximately 2.5GB of AI models:"
+    print_info "  • WhisperX transcription models (~1.5GB)"
+    print_info "  • PyAnnote speaker diarization models (~500MB)"
+    print_info "  • Wav2Vec2 alignment model (~360MB)"
+    echo ""
+    print_warning "This may take 10-30 minutes depending on your internet speed..."
+    echo ""
+
+    # Create model cache directories
+    mkdir -p "$MODEL_CACHE_DIR/huggingface"
+    mkdir -p "$MODEL_CACHE_DIR/torch"
+
+    print_info "Starting model download using Docker..."
+    echo ""
+
+    # Get Whisper model from .env or use default
+    local whisper_model="large-v2"
+    if [ -f "$REPO_ROOT/.env" ]; then
+        local env_model=$(grep "^WHISPER_MODEL=" "$REPO_ROOT/.env" | cut -d'=' -f2 | tr -d ' ')
+        if [ -n "$env_model" ]; then
+            whisper_model="$env_model"
+        fi
+    fi
+
+    # Determine if GPU is available
+    local use_gpu="false"
+    local gpu_args=""
+    if command -v nvidia-smi &> /dev/null && nvidia-smi &> /dev/null; then
+        use_gpu="true"
+        gpu_args="--gpus all"
+        print_info "GPU detected - using GPU for faster model initialization"
+    else
+        print_info "No GPU detected - using CPU (this is fine, just slower)"
+    fi
+
+    # Run model download in Docker container with progress output
+    print_info "Downloading models (progress shown below)..."
+    echo ""
+
+    # Run the download with real-time output
+    if docker run --rm \
+        $gpu_args \
+        -e HUGGINGFACE_TOKEN="${HUGGINGFACE_TOKEN}" \
+        -e WHISPER_MODEL="${whisper_model}" \
+        -e USE_GPU="${use_gpu}" \
+        -e COMPUTE_TYPE="${COMPUTE_TYPE:-float16}" \
+        -e DIARIZATION_MODEL="${DIARIZATION_MODEL:-pyannote/speaker-diarization-3.1}" \
+        -v "$(realpath "$MODEL_CACHE_DIR/huggingface"):/root/.cache/huggingface" \
+        -v "$(realpath "$MODEL_CACHE_DIR/torch"):/root/.cache/torch" \
+        -v "$SCRIPT_DIR/download-models.py:/app/download-models.py:ro" \
+        davidamacey/opentranscribe-backend:latest \
+        python /app/download-models.py; then
+
+        echo ""
+        local total_size=$(get_dir_size "$MODEL_CACHE_DIR")
+        print_success "Models downloaded successfully ($total_size)"
+
+        # Create marker file to indicate successful download
+        echo "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" > "$MODEL_CACHE_DIR/.download_complete"
+
+        return 0
+    else
+        echo ""
+        print_error "Model download failed!"
+        print_warning "You can try running this script again, or start the application and models will download on first use."
+        return 1
+    fi
+}
+
+download_models_local() {
+    print_header "Downloading AI Models (Local Python)"
+
+    print_info "Checking for Python and required packages..."
+
+    if ! command -v python3 &> /dev/null; then
+        print_error "Python3 not found. Using Docker method instead..."
+        download_models_docker
+        return $?
+    fi
+
+    # Check for required packages
+    local missing_packages=()
+    python3 -c "import whisperx" 2>/dev/null || missing_packages+=("whisperx")
+    python3 -c "import pyannote.audio" 2>/dev/null || missing_packages+=("pyannote-audio")
+
+    if [ ${#missing_packages[@]} -gt 0 ]; then
+        print_warning "Missing Python packages: ${missing_packages[*]}"
+        print_info "Using Docker method instead (recommended)..."
+        download_models_docker
+        return $?
+    fi
+
+    # Set environment variables for model cache
+    export HF_HOME="$MODEL_CACHE_DIR/huggingface"
+    export TORCH_HOME="$MODEL_CACHE_DIR/torch"
+
+    # Run Python download script
+    print_info "Running model download script..."
+    python3 "$SCRIPT_DIR/download-models.py"
+}
+
+show_summary() {
+    print_header "Model Download Summary"
+
+    local total_size=$(get_dir_size "$MODEL_CACHE_DIR")
+    local hf_size=$(get_dir_size "$MODEL_CACHE_DIR/huggingface")
+    local torch_size=$(get_dir_size "$MODEL_CACHE_DIR/torch")
+
+    echo -e "${GREEN}✅ Model cache ready!${NC}"
+    echo ""
+    echo "Cache location: $MODEL_CACHE_DIR"
+    echo "Total size: $total_size"
+    echo "  • HuggingFace models: $hf_size"
+    echo "  • Torch models: $torch_size"
+    echo ""
+    print_info "Models are cached and will be available immediately when Docker starts"
+    echo ""
+}
+
+#######################
+# MAIN
+#######################
+
+main() {
+    print_header "OpenTranscribe Model Downloader"
+
+    print_info "Model cache directory: $MODEL_CACHE_DIR"
+    echo ""
+
+    # Check if models already exist
+    if check_models_exist; then
+        show_summary
+        exit 0
+    fi
+
+    # Check for HuggingFace token
+    check_huggingface_token
+
+    # Download models using Docker (recommended)
+    if download_models_docker; then
+        show_summary
+        exit 0
+    else
+        print_error "Model download failed"
+        echo ""
+        print_info "Models will be downloaded automatically when you first run the application,"
+        print_info "but this will cause a delay on first use."
+        echo ""
+        exit 1
+    fi
+}
+
+# Run main function
+main
diff --git a/scripts/security-scan.sh b/scripts/security-scan.sh
index cb8c1f96..c70d85ed 100755
--- a/scripts/security-scan.sh
+++ b/scripts/security-scan.sh
@@ -162,20 +162,24 @@ run_dockle() {
     print_header "Running Dockle on ${image}"
 
     local output_file="${OUTPUT_DIR}/${component}-dockle.json"
+    local abs_output_dir=$(cd "${OUTPUT_DIR}" && pwd)
 
-    # Run Dockle via Docker
+    # Run Dockle via Docker with mounted output directory and increased timeout
     if docker run --rm \
         -v /var/run/docker.sock:/var/run/docker.sock \
+        -v "${abs_output_dir}:/output" \
         goodwithtech/dockle:latest \
+        --timeout 600s \
         --format json \
-        --output "${output_file}" \
+        --output "/output/${component}-dockle.json" \
         "${image}"; then
         print_success "Dockle scan completed (see ${output_file})"
 
-        # Display summary
+        # Display summary with increased timeout
         docker run --rm \
             -v /var/run/docker.sock:/var/run/docker.sock \
             goodwithtech/dockle:latest \
+            --timeout 600s \
             "${image}"
 
         return 0
diff --git a/scripts/test-model-download.sh b/scripts/test-model-download.sh
new file mode 100755
index 00000000..2bc7671f
--- /dev/null
+++ b/scripts/test-model-download.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+set -e
+
+# Test Model Download Script
+# Quick test of model download without full offline package build
+
+echo "=== Testing Model Download ==="
+
+# Load .env for HF token
+if [ -f .env ]; then
+    export HUGGINGFACE_TOKEN=$(grep "^HUGGINGFACE_TOKEN=" .env | cut -d'=' -f2)
+fi
+
+# Create test cache directory
+TEST_CACHE="./test-model-cache"
+rm -rf "$TEST_CACHE"
+mkdir -p "$TEST_CACHE/huggingface"
+mkdir -p "$TEST_CACHE/torch"
+
+echo "Test cache directory: $TEST_CACHE"
+echo ""
+
+# Run model download in Docker container (as root for cache access)
+echo "Running model download..."
+docker run --rm \
+    --gpus all \
+    -e HUGGINGFACE_TOKEN="${HUGGINGFACE_TOKEN}" \
+    -e WHISPER_MODEL="base" \
+    -e DIARIZATION_MODEL="pyannote/speaker-diarization-3.1" \
+    -e USE_GPU="true" \
+    -e COMPUTE_TYPE="float16" \
+    -v "${TEST_CACHE}/huggingface:/root/.cache/huggingface" \
+    -v "${TEST_CACHE}/torch:/root/.cache/torch" \
+    -v "$(pwd)/scripts/download-models.py:/app/download-models.py" \
+    -v "$(pwd)/test_videos:/app/test_videos:ro" \
+    davidamacey/opentranscribe-backend:latest \
+    python /app/download-models.py
+
+echo ""
+echo "=== Checking Downloaded Models ==="
+echo ""
+
+echo "HuggingFace cache:"
+du -sh "$TEST_CACHE/huggingface"
+find "$TEST_CACHE/huggingface" -name "*.bin" -o -name "*.safetensors" | wc -l
+echo " model files found"
+echo ""
+
+echo "Torch cache:"
+du -sh "$TEST_CACHE/torch"
+find "$TEST_CACHE/torch" -name "*.bin" -o -name "pytorch_model.bin" | wc -l
+echo " model files found"
+echo ""
+
+echo "PyAnnote models in torch cache:"
+ls -la "$TEST_CACHE/torch/pyannote/" 2>/dev/null || echo "No pyannote directory found!"
+echo ""
+
+echo "PyAnnote subdirectories:"
+find "$TEST_CACHE/torch/pyannote" -maxdepth 1 -type d 2>/dev/null | sort
+echo ""
+
+echo "PyAnnote model weight files:"
+find "$TEST_CACHE/torch/pyannote" -name "pytorch_model.bin" 2>/dev/null
+echo ""
+
+echo "=== Test Complete ==="
diff --git a/setup-opentranscribe.sh b/setup-opentranscribe.sh
index 859fd256..01aecaa1 100755
--- a/setup-opentranscribe.sh
+++ b/setup-opentranscribe.sh
@@ -26,11 +26,11 @@ USE_GPU_RUNTIME="false"
 
 detect_platform() {
     echo -e "${BLUE}🔍 Detecting platform and hardware...${NC}"
-    
+
     # Detect OS and Architecture
     DETECTED_PLATFORM=$(uname -s | tr '[:upper:]' '[:lower:]')
     ARCH=$(uname -m)
-    
+
     case "$DETECTED_PLATFORM" in
         "linux")
             echo "✓ Detected: Linux ($ARCH)"
@@ -47,14 +47,14 @@ detect_platform() {
             echo "⚠️  Unknown platform: $DETECTED_PLATFORM ($ARCH)"
             ;;
     esac
-    
+
     # Detect hardware acceleration
     detect_hardware_acceleration
 }
 
 detect_hardware_acceleration() {
     DETECTED_DEVICE="cpu"  # Default fallback
-    
+
     # Check for NVIDIA GPU (CUDA)
     if command -v nvidia-smi &> /dev/null; then
         if nvidia-smi &> /dev/null; then
@@ -64,15 +64,15 @@ detect_hardware_acceleration() {
             COMPUTE_TYPE="float16"
             BATCH_SIZE="16"
             USE_GPU_RUNTIME="true"
-            
+
             # Get default GPU (first available)
             DEFAULT_GPU=$(nvidia-smi --query-gpu=index --format=csv,noheader,nounits | head -n1)
             GPU_DEVICE_ID=${DEFAULT_GPU:-0}
-            
+
             return
         fi
     fi
-    
+
     # Check for Apple Silicon (MPS)
     if [[ "$DETECTED_PLATFORM" == "macos" ]]; then
         # Check for Apple Silicon
@@ -81,7 +81,7 @@ detect_hardware_acceleration() {
             DETECTED_DEVICE="mps"
             COMPUTE_TYPE="float32"
             BATCH_SIZE="8"
-            
+
             # Check macOS version for MPS support (requires macOS 12.3+)
             macos_version=$(sw_vers -productVersion)
             if [[ $(echo "$macos_version" | cut -d. -f1) -ge 12 ]] && [[ $(echo "$macos_version" | cut -d. -f2) -ge 3 ]]; then
@@ -90,19 +90,19 @@ detect_hardware_acceleration() {
                 echo "⚠️  macOS $macos_version detected, MPS requires 12.3+, falling back to CPU"
                 DETECTED_DEVICE="cpu"
             fi
-            
+
             return
         else
             echo "✓ Intel Mac detected"
         fi
     fi
-    
+
     # CPU fallback
     echo "ℹ️  Using CPU processing (no GPU acceleration detected)"
     DETECTED_DEVICE="cpu"
     COMPUTE_TYPE="int8"
     BATCH_SIZE="4"
-    
+
     # Detect CPU cores for optimization
     if command -v nproc &> /dev/null; then
         CPU_CORES=$(nproc)
@@ -140,10 +140,10 @@ check_gpu_support() {
 
 configure_docker_runtime() {
     echo -e "${BLUE}🐳 Configuring Docker runtime...${NC}"
-    
+
     if [[ "$USE_GPU_RUNTIME" == "true" && "$DETECTED_DEVICE" == "cuda" ]]; then
         echo "🧪 Testing NVIDIA Container Toolkit..."
-        
+
         if check_gpu_support; then
             echo -e "${GREEN}✅ NVIDIA Container Toolkit fully functional${NC}"
             DOCKER_RUNTIME="nvidia"
@@ -179,7 +179,7 @@ fallback_to_cpu() {
 
 check_network_connectivity() {
     echo -e "${BLUE}🌐 Checking network connectivity...${NC}"
-    
+
     # Test GitHub connectivity
     if ! curl -s --connect-timeout 5 --max-time 10 https://raw.githubusercontent.com > /dev/null 2>&1; then
         echo -e "${YELLOW}⚠️  GitHub may not be accessible for downloading files${NC}"
@@ -219,25 +219,25 @@ validate_downloaded_files() {
     fi
 
     echo "✓ init_db.sql validated ($db_size bytes)"
-    
+
     # Validate docker-compose.yml
     if [ ! -f "docker-compose.yml" ]; then
         echo -e "${RED}❌ docker-compose.yml file not found${NC}"
         return 1
     fi
-    
+
     # Check docker-compose syntax
     if ! docker compose -f docker-compose.yml config > /dev/null 2>&1; then
         echo -e "${RED}❌ docker-compose.yml syntax validation failed${NC}"
         return 1
     fi
-    
+
     # Check for essential services
     if ! grep -q "backend:" docker-compose.yml || ! grep -q "frontend:" docker-compose.yml; then
         echo -e "${RED}❌ docker-compose.yml missing essential services${NC}"
         return 1
     fi
-    
+
     echo "✓ docker-compose.yml validated"
     echo "✓ All downloaded files validated successfully"
     return 0
@@ -245,7 +245,7 @@ validate_downloaded_files() {
 
 check_dependencies() {
     echo -e "${BLUE}📋 Checking dependencies...${NC}"
-    
+
     # Check for curl
     if ! command -v curl &> /dev/null; then
         echo -e "${RED}❌ curl is not installed${NC}"
@@ -255,7 +255,7 @@ check_dependencies() {
     else
         echo "✓ curl detected"
     fi
-    
+
     # Check for Docker
     if ! command -v docker &> /dev/null; then
         echo -e "${RED}❌ Docker is not installed${NC}"
@@ -265,7 +265,7 @@ check_dependencies() {
         docker_version=$(docker --version | cut -d' ' -f3 | cut -d',' -f1)
         echo "✓ Docker $docker_version detected"
     fi
-    
+
     # Check for Docker Compose
     if ! docker compose version &> /dev/null; then
         echo -e "${RED}❌ Docker Compose is not installed or not in PATH${NC}"
@@ -275,7 +275,7 @@ check_dependencies() {
         compose_version=$(docker compose version --short)
         echo "✓ Docker Compose $compose_version detected"
     fi
-    
+
     # Check if Docker daemon is running
     if ! docker info &> /dev/null; then
         echo -e "${RED}❌ Docker daemon is not running${NC}"
@@ -284,7 +284,7 @@ check_dependencies() {
     else
         echo "✓ Docker daemon is running"
     fi
-    
+
     # Check network connectivity
     check_network_connectivity
 }
@@ -295,7 +295,7 @@ check_dependencies() {
 
 setup_project_directory() {
     echo -e "${BLUE}📁 Setting up project directory...${NC}"
-    
+
     # Create and enter project directory
     mkdir -p "$PROJECT_DIR"
     cd "$PROJECT_DIR"
@@ -304,7 +304,7 @@ setup_project_directory() {
 
 create_database_files() {
     echo "✓ Downloading database initialization files..."
-    
+
     # Download the official init_db.sql from the repository
     local max_retries=3
     local retry_count=0
@@ -326,14 +326,14 @@ create_database_files() {
         else
             echo "⚠️  Download attempt $((retry_count + 1)) failed"
         fi
-        
+
         retry_count=$((retry_count + 1))
         if [ $retry_count -lt $max_retries ]; then
             echo "⏳ Retrying in 2 seconds..."
             sleep 2
         fi
     done
-    
+
     echo -e "${RED}❌ Failed to download database initialization file after $max_retries attempts${NC}"
     echo "Please check your internet connection and try again."
     echo "Alternative: You can manually download from:"
@@ -343,19 +343,19 @@ create_database_files() {
 
 create_configuration_files() {
     echo -e "${BLUE}📄 Creating configuration files...${NC}"
-    
+
     # Create database initialization files
     create_database_files
-    
+
     # Create comprehensive docker-compose.yml directly
     create_production_compose
-    
+
     # Validate all downloaded files
     if ! validate_downloaded_files; then
         echo -e "${RED}❌ File validation failed${NC}"
         exit 1
     fi
-    
+
     # Download NVIDIA override file if GPU detected
     if [[ "$USE_GPU_RUNTIME" == "true" && "$DETECTED_DEVICE" == "cuda" ]]; then
         download_nvidia_override
@@ -364,21 +364,24 @@ create_configuration_files() {
     # Download opentranscribe.sh management script
     download_management_script
 
+    # Download model downloader scripts
+    download_model_downloader_scripts
+
     # Create .env.example
     create_production_env_example
 }
 
 create_production_compose() {
     echo "✓ Downloading production docker-compose configuration..."
-    
+
     # Download the official production compose file from the repository
     local max_retries=3
     local retry_count=0
     local branch="${OPENTRANSCRIBE_BRANCH:-master}"
-    # URL-encode the branch name (replace / with %2F)  
+    # URL-encode the branch name (replace / with %2F)
     local encoded_branch=$(echo "$branch" | sed 's|/|%2F|g')
     local download_url="https://raw.githubusercontent.com/davidamacey/OpenTranscribe/${encoded_branch}/docker-compose.prod.yml"
-    
+
     while [ $retry_count -lt $max_retries ]; do
         if curl -fsSL --connect-timeout 10 --max-time 30 "$download_url" -o docker-compose.yml; then
             # Validate downloaded file
@@ -392,14 +395,14 @@ create_production_compose() {
         else
             echo "⚠️  Download attempt $((retry_count + 1)) failed"
         fi
-        
+
         retry_count=$((retry_count + 1))
         if [ $retry_count -lt $max_retries ]; then
             echo "⏳ Retrying in 2 seconds..."
             sleep 2
         fi
     done
-    
+
     echo -e "${RED}❌ Failed to download docker-compose configuration after $max_retries attempts${NC}"
     echo "Please check your internet connection and try again."
     echo "Alternative: You can manually download from:"
@@ -409,7 +412,7 @@ create_production_compose() {
 
 download_nvidia_override() {
     echo "✓ Downloading NVIDIA GPU override configuration..."
-    
+
     # Download the NVIDIA override file from the repository
     local max_retries=3
     local retry_count=0
@@ -417,7 +420,7 @@ download_nvidia_override() {
     # URL-encode the branch name (replace / with %2F)
     local encoded_branch=$(echo "$branch" | sed 's|/|%2F|g')
     local download_url="https://raw.githubusercontent.com/davidamacey/OpenTranscribe/${encoded_branch}/docker-compose.nvidia.yml"
-    
+
     while [ $retry_count -lt $max_retries ]; do
         if curl -fsSL --connect-timeout 10 --max-time 30 "$download_url" -o docker-compose.nvidia.yml; then
             # Validate downloaded file
@@ -431,14 +434,14 @@ download_nvidia_override() {
         else
             echo "⚠️  Download attempt $((retry_count + 1)) failed"
         fi
-        
+
         retry_count=$((retry_count + 1))
         if [ $retry_count -lt $max_retries ]; then
             echo "⏳ Retrying in 2 seconds..."
             sleep 2
         fi
     done
-    
+
     echo -e "${YELLOW}⚠️  Failed to download NVIDIA override file after $max_retries attempts${NC}"
     echo "GPU acceleration may not work optimally, but CPU processing will still function."
     echo "You can manually download from: $download_url"
@@ -481,9 +484,71 @@ download_management_script() {
     echo "You can manually download from: $download_url"
 }
 
+download_model_downloader_scripts() {
+    echo "✓ Downloading model downloader scripts..."
+
+    # Create scripts directory
+    mkdir -p scripts
+
+    # Download download-models.sh
+    local max_retries=3
+    local retry_count=0
+    local branch="${OPENTRANSCRIBE_BRANCH:-master}"
+    local encoded_branch=$(echo "$branch" | sed 's|/|%2F|g')
+    local download_url="https://raw.githubusercontent.com/davidamacey/OpenTranscribe/${encoded_branch}/scripts/download-models.sh"
+
+    while [ $retry_count -lt $max_retries ]; do
+        if curl -fsSL --connect-timeout 10 --max-time 30 "$download_url" -o scripts/download-models.sh; then
+            if [ -s scripts/download-models.sh ] && grep -q "OpenTranscribe Model Downloader" scripts/download-models.sh; then
+                chmod +x scripts/download-models.sh
+                echo "✓ Downloaded and validated download-models.sh"
+                break
+            else
+                echo "⚠️  Downloaded download-models.sh appears invalid, retrying..."
+                rm -f scripts/download-models.sh
+            fi
+        else
+            echo "⚠️  Download attempt $((retry_count + 1)) failed"
+        fi
+
+        retry_count=$((retry_count + 1))
+        if [ $retry_count -lt $max_retries ]; then
+            echo "⏳ Retrying in 2 seconds..."
+            sleep 2
+        fi
+    done
+
+    # Download download-models.py
+    retry_count=0
+    download_url="https://raw.githubusercontent.com/davidamacey/OpenTranscribe/${encoded_branch}/scripts/download-models.py"
+
+    while [ $retry_count -lt $max_retries ]; do
+        if curl -fsSL --connect-timeout 10 --max-time 30 "$download_url" -o scripts/download-models.py; then
+            if [ -s scripts/download-models.py ] && grep -q "Download all required AI models" scripts/download-models.py; then
+                echo "✓ Downloaded and validated download-models.py"
+                return 0
+            else
+                echo "⚠️  Downloaded download-models.py appears invalid, retrying..."
+                rm -f scripts/download-models.py
+            fi
+        else
+            echo "⚠️  Download attempt $((retry_count + 1)) failed"
+        fi
+
+        retry_count=$((retry_count + 1))
+        if [ $retry_count -lt $max_retries ]; then
+            echo "⏳ Retrying in 2 seconds..."
+            sleep 2
+        fi
+    done
+
+    echo -e "${YELLOW}⚠️  Failed to download model downloader scripts${NC}"
+    echo "Models will be downloaded on first application run instead."
+}
+
 create_production_env_example() {
     echo "✓ Downloading environment configuration template..."
-    
+
     # Download the official .env.example from the repository
     local max_retries=3
     local retry_count=0
@@ -491,7 +556,7 @@ create_production_env_example() {
     # URL-encode the branch name (replace / with %2F)
     local encoded_branch=$(echo "$branch" | sed 's|/|%2F|g')
     local download_url="https://raw.githubusercontent.com/davidamacey/OpenTranscribe/${encoded_branch}/.env.example"
-    
+
     while [ $retry_count -lt $max_retries ]; do
         if curl -fsSL --connect-timeout 10 --max-time 30 "$download_url" -o .env.example; then
             # Validate downloaded file
@@ -505,14 +570,14 @@ create_production_env_example() {
         else
             echo "⚠️  Download attempt $((retry_count + 1)) failed"
         fi
-        
+
         retry_count=$((retry_count + 1))
         if [ $retry_count -lt $max_retries ]; then
             echo "⏳ Retrying in 2 seconds..."
             sleep 2
         fi
     done
-    
+
     echo -e "${RED}❌ Failed to download .env.example file after $max_retries attempts${NC}"
     echo "Please check your internet connection and try again."
     echo "Alternative: You can manually download from:"
@@ -520,14 +585,69 @@ create_production_env_example() {
     exit 1
 }
 
+prompt_huggingface_token() {
+    echo ""
+    echo -e "${YELLOW}🤗 HuggingFace Token Configuration${NC}"
+    echo "================================================="
+    echo -e "${RED}⚠️  IMPORTANT: A HuggingFace token is REQUIRED for speaker diarization!${NC}"
+    echo ""
+    echo "Without this token:"
+    echo "  • Transcription will work normally"
+    echo "  • Speaker diarization (who said what) will NOT work"
+    echo "  • Models cannot be pre-downloaded (will download on first use)"
+    echo ""
+    echo "To get your FREE token:"
+    echo "  1. Visit: https://huggingface.co/settings/tokens"
+    echo "  2. Click 'New token'"
+    echo "  3. Give it a name (e.g., 'OpenTranscribe')"
+    echo "  4. Select 'Read' permissions"
+    echo "  5. Copy the token"
+    echo ""
+
+    # Ask if they want to enter token now
+    read -p "Do you have a HuggingFace token to enter now? (Y/n) " -n 1 -r
+    echo
+    echo
+
+    if [[ $REPLY =~ ^[Nn]$ ]]; then
+        print_warning "Skipping HuggingFace token - you can add it later"
+        echo "To add later:"
+        echo "  1. Edit: $PROJECT_DIR/.env"
+        echo "  2. Set: HUGGINGFACE_TOKEN=your_token_here"
+        echo "  3. Restart: cd $PROJECT_DIR && ./opentranscribe.sh restart"
+        echo ""
+        HUGGINGFACE_TOKEN=""
+        return 0
+    fi
+
+    # Prompt for token
+    echo "Please enter your HuggingFace token:"
+    echo "(Token will be hidden for security)"
+    read -s HUGGINGFACE_TOKEN
+    echo
+
+    # Validate token format (basic check - should start with hf_)
+    if [[ -z "$HUGGINGFACE_TOKEN" ]]; then
+        print_warning "No token entered - you can add it later in .env file"
+        HUGGINGFACE_TOKEN=""
+    elif [[ ! "$HUGGINGFACE_TOKEN" =~ ^hf_ ]]; then
+        print_warning "Token doesn't start with 'hf_' - this may not be valid"
+        echo "Using it anyway, but verify it's correct."
+        echo ""
+    else
+        print_success "HuggingFace token configured!"
+        echo ""
+    fi
+}
+
 configure_environment() {
     echo -e "${BLUE}⚙️  Configuring environment...${NC}"
-    
+
     if [ -f .env ]; then
         echo "ℹ️  Using existing .env file"
         return
     fi
-    
+
     # Generate secure JWT secret
     if command -v openssl &> /dev/null; then
         JWT_SECRET=$(openssl rand -hex 32)
@@ -537,42 +657,23 @@ configure_environment() {
         JWT_SECRET="change_this_in_production_$(date +%s)"
         echo "⚠️  Using basic JWT secret - consider generating a secure one"
     fi
-    
-    # Display HuggingFace token instructions
-    echo ""
-    echo -e "${YELLOW}🤗 HuggingFace Token Configuration${NC}"
-    echo "================================================="
-    echo -e "${RED}⚠️  IMPORTANT: A HuggingFace token is REQUIRED for speaker diarization!${NC}"
-    echo ""
-    echo "Without this token, the application will only do transcription (no speaker identification)."
-    echo ""
-    echo "To get your FREE token:"
-    echo "1. Go to: https://huggingface.co/settings/tokens"
-    echo "2. Click 'New token'"
-    echo "3. Give it a name (e.g., 'OpenTranscribe')"
-    echo "4. Select 'Read' permissions"
-    echo "5. Copy the token"
-    echo "6. Edit the .env file after setup and add: HUGGINGFACE_TOKEN=your_token_here"
-    echo ""
-    echo -e "${YELLOW}💡 You can add your token later by editing the .env file${NC}"
-    echo ""
-    
-    # Set empty token for now - user will add it manually
-    HUGGINGFACE_TOKEN=""
-    
+
+    # Prompt for HuggingFace token
+    prompt_huggingface_token
+
     # Model selection based on hardware
     select_whisper_model
-    
+
     # LLM configuration for AI features
     configure_llm_settings
-    
+
     # Create .env file
     create_env_file
 }
 
 select_whisper_model() {
     echo -e "${YELLOW}🎤 Auto-selecting Whisper Model based on hardware...${NC}"
-    
+
     # Auto-select optimal model based on hardware with GPU memory detection
     case "$DETECTED_DEVICE" in
         "cuda")
@@ -609,7 +710,7 @@ select_whisper_model() {
             echo "✓ CPU processing - selecting base model (fastest for CPU)"
             ;;
     esac
-    
+
     echo "✓ Selected model: $WHISPER_MODEL"
     echo "💡 You can change this later by editing WHISPER_MODEL in the .env file"
     echo "   Available options: tiny, base, small, medium, large-v2"
@@ -634,10 +735,10 @@ configure_llm_settings() {
     echo "• Configure now (recommended for vLLM users)"
     echo "• Skip configuration (you can set up later in .env file)"
     echo ""
-    
+
     read -p "Do you want to configure LLM settings now? (y/N) " -n 1 -r
     echo
-    
+
     if [[ $REPLY =~ ^[Yy]$ ]]; then
         echo ""
         echo "Select your LLM provider:"
@@ -647,11 +748,11 @@ configure_llm_settings() {
         echo "4) Anthropic Claude"
         echo "5) OpenRouter"
         echo "6) Skip (configure manually later)"
-        
+
         read -p "Enter your choice (1-6): " -n 1 -r llm_choice
         echo
         echo
-        
+
         case $llm_choice in
             1)
                 echo "✓ Configuring vLLM (Local server)"
@@ -710,7 +811,7 @@ configure_llm_settings() {
         echo "💡 Edit the LLM_* variables in .env file to enable AI features"
         LLM_PROVIDER="vllm"  # Default
     fi
-    
+
     echo ""
     echo -e "${YELLOW}💡 LLM Configuration Notes:${NC}"
     echo "• AI features require a working LLM endpoint"
@@ -720,22 +821,22 @@ configure_llm_settings() {
 
 create_env_file() {
     echo "✓ Creating .env file with optimized settings..."
-    
+
     # Copy example and update values
     cp .env.example .env
-    
+
     # Update configuration values
     sed -i.bak "s|JWT_SECRET_KEY=.*|JWT_SECRET_KEY=$JWT_SECRET|g" .env
     sed -i.bak "s|HUGGINGFACE_TOKEN=.*|HUGGINGFACE_TOKEN=$HUGGINGFACE_TOKEN|g" .env
     sed -i.bak "s|WHISPER_MODEL=.*|WHISPER_MODEL=$WHISPER_MODEL|g" .env
     sed -i.bak "s|BATCH_SIZE=.*|BATCH_SIZE=$BATCH_SIZE|g" .env
     sed -i.bak "s|COMPUTE_TYPE=.*|COMPUTE_TYPE=$COMPUTE_TYPE|g" .env
-    
+
     # Update LLM configuration
     if [[ -n "$LLM_PROVIDER" ]]; then
         sed -i.bak "s|LLM_PROVIDER=.*|LLM_PROVIDER=$LLM_PROVIDER|g" .env
     fi
-    
+
     # Provider-specific configurations
     if [[ "$LLM_PROVIDER" == "vllm" && -n "$VLLM_BASE_URL" ]]; then
         sed -i.bak "s|VLLM_BASE_URL=.*|VLLM_BASE_URL=$VLLM_BASE_URL|g" .env
@@ -766,7 +867,7 @@ create_env_file() {
             sed -i.bak "s|# OPENROUTER_MODEL_NAME=.*|OPENROUTER_MODEL_NAME=$OPENROUTER_MODEL_NAME|g" .env
         fi
     fi
-    
+
     # Hardware-specific configurations
     case "$DETECTED_DEVICE" in
         "cuda")
@@ -783,19 +884,146 @@ create_env_file() {
             sed -i.bak "s|TORCH_DEVICE=.*|TORCH_DEVICE=cpu|g" .env
             ;;
     esac
-    
+
+    # Set model cache directory
+    MODEL_CACHE_DIR="${MODEL_CACHE_DIR:-./models}"
+    sed -i.bak "s|MODEL_CACHE_DIR=.*|MODEL_CACHE_DIR=$MODEL_CACHE_DIR|g" .env
+
     # Add Docker runtime configuration
     echo "" >> .env
     echo "# Hardware Configuration (Auto-detected)" >> .env
     echo "DETECTED_DEVICE=${DETECTED_DEVICE}" >> .env
     echo "USE_NVIDIA_RUNTIME=${USE_GPU_RUNTIME}" >> .env
-    
+
     # Clean up backup file
     rm -f .env.bak
-    
+
     echo "✓ Environment configured for $DETECTED_DEVICE with $COMPUTE_TYPE precision"
 }
 
+#######################
+# MODEL DOWNLOADING
+#######################
+
+download_ai_models() {
+    print_header "AI Model Pre-Download"
+
+    echo "OpenTranscribe requires AI models (~2.5GB) for transcription and speaker diarization."
+    echo ""
+    echo "Configuration summary:"
+    echo "  • Hardware: $DETECTED_DEVICE ($COMPUTE_TYPE precision)"
+    echo "  • Whisper Model: $WHISPER_MODEL"
+    echo "  • HuggingFace Token: $([[ -n "$HUGGINGFACE_TOKEN" ]] && echo "✓ Configured" || echo "✗ Not configured")"
+    echo ""
+
+    # If HuggingFace token not set, offer one more chance to enter it
+    if [ -z "$HUGGINGFACE_TOKEN" ]; then
+        print_warning "HuggingFace token not configured"
+        echo ""
+        echo "Without a token, speaker diarization will not work and models cannot be pre-downloaded."
+        echo ""
+        read -p "Would you like to enter your HuggingFace token now? (y/N) " -n 1 -r
+        echo
+        echo
+
+        if [[ $REPLY =~ ^[Yy]$ ]]; then
+            echo "Please enter your HuggingFace token:"
+            echo "(Token will be hidden for security)"
+            read -s HUGGINGFACE_TOKEN
+            echo
+
+            # Validate and update .env file
+            if [[ -n "$HUGGINGFACE_TOKEN" ]]; then
+                # Update the token in .env file
+                sed -i.bak "s|HUGGINGFACE_TOKEN=.*|HUGGINGFACE_TOKEN=$HUGGINGFACE_TOKEN|g" .env
+                rm -f .env.bak
+
+                if [[ "$HUGGINGFACE_TOKEN" =~ ^hf_ ]]; then
+                    print_success "HuggingFace token configured and saved to .env!"
+                    echo ""
+                else
+                    print_warning "Token doesn't start with 'hf_' - this may not be valid"
+                    echo "Continuing anyway..."
+                    echo ""
+                fi
+            else
+                print_warning "No token entered - skipping model pre-download"
+                echo ""
+                HUGGINGFACE_TOKEN=""
+            fi
+        fi
+    fi
+
+    # If still no token, skip download
+    if [ -z "$HUGGINGFACE_TOKEN" ]; then
+        print_info "Skipping model pre-download"
+        echo ""
+        echo "Models will be downloaded automatically when you first run the application."
+        echo "This will cause a 10-30 minute delay on first use."
+        echo ""
+        echo "To pre-download models later:"
+        echo "  1. Add your HuggingFace token to .env file"
+        echo "  2. Run: cd $PROJECT_DIR && bash scripts/download-models.sh"
+        echo ""
+        read -p "Press Enter to continue setup..." -r
+        echo
+        return 0
+    fi
+
+    # Token is configured - proceed with download
+    echo -e "${YELLOW}Ready to download AI models (~2.5GB)${NC}"
+    echo "This will take 10-30 minutes depending on your internet speed."
+    echo ""
+    read -p "Start model download now? (Y/n) " -n 1 -r
+    echo
+    echo
+
+    if [[ $REPLY =~ ^[Nn]$ ]]; then
+        print_info "Skipping model pre-download"
+        echo "Models will be downloaded automatically when you first run the application."
+        echo ""
+        echo "To download models later, run:"
+        echo "  cd $PROJECT_DIR && bash scripts/download-models.sh"
+        echo ""
+        return 0
+    fi
+
+    # Check if scripts exist
+    if [ ! -f "scripts/download-models.sh" ]; then
+        print_warning "Model download script not found - skipping pre-download"
+        echo "Models will be downloaded automatically when you first run the application."
+        echo ""
+        return 0
+    fi
+
+    print_info "Starting model download..."
+    echo ""
+
+    # Export necessary environment variables
+    export HUGGINGFACE_TOKEN
+    export WHISPER_MODEL
+    export COMPUTE_TYPE
+    export DETECTED_DEVICE
+
+    # Create models directory
+    mkdir -p models
+
+    # Run the download script
+    if bash scripts/download-models.sh models; then
+        echo ""
+        print_success "✨ Models downloaded and cached successfully!"
+        print_info "Docker containers will start with models ready to use"
+        echo ""
+        return 0
+    else
+        echo ""
+        print_warning "Model download failed or was incomplete"
+        echo "Models will be downloaded automatically when you first run the application."
+        echo ""
+        return 1
+    fi
+}
+
 
 
 #######################
@@ -804,7 +1032,7 @@ create_env_file() {
 
 validate_setup() {
     echo -e "${BLUE}✅ Validating setup...${NC}"
-    
+
     # Check required files
     local required_files=(".env" "docker-compose.yml" "opentranscribe.sh")
     for file in "${required_files[@]}"; do
@@ -815,7 +1043,7 @@ validate_setup() {
             exit 1
         fi
     done
-    
+
     # Validate Docker Compose
     if docker compose config &> /dev/null; then
         echo "✓ Docker Compose configuration valid"
@@ -823,7 +1051,7 @@ validate_setup() {
         echo -e "${RED}❌ Docker Compose configuration invalid${NC}"
         exit 1
     fi
-    
+
     echo "✓ Setup validation complete"
 }
 
@@ -837,7 +1065,7 @@ display_summary() {
     echo "  • Compute Type: $COMPUTE_TYPE"
     echo "  • Batch Size: $BATCH_SIZE"
     echo "  • Docker Runtime: ${DOCKER_RUNTIME:-default}"
-    
+
     if [[ "$DETECTED_DEVICE" == "cuda" ]]; then
         echo "  • GPU Device ID: ${GPU_DEVICE_ID:-0}"
         if command -v nvidia-smi &> /dev/null; then
@@ -845,7 +1073,7 @@ display_summary() {
             echo "  • GPU: $GPU_NAME"
         fi
     fi
-    
+
     echo ""
     echo -e "${BLUE}📋 Application Configuration:${NC}"
     echo "  • Whisper Model: $WHISPER_MODEL"
@@ -853,19 +1081,19 @@ display_summary() {
     echo "  • LLM Provider: ${LLM_PROVIDER:-vllm} (for AI summarization)"
     echo "  • Project Directory: $PROJECT_DIR"
     echo ""
-    
+
     echo -e "${YELLOW}🚀 To start OpenTranscribe:${NC}"
     echo "  cd $PROJECT_DIR"
     echo "  ./opentranscribe.sh start"
     echo ""
-    
+
     echo -e "${RED}⚠️  Speaker Diarization Setup Required${NC}"
     echo "To enable speaker identification:"
     echo "1. Get a free token at: https://huggingface.co/settings/tokens"
     echo "2. Edit the .env file and add: HUGGINGFACE_TOKEN=your_token_here"
     echo "3. Restart the application: ./opentranscribe.sh restart"
     echo ""
-    
+
     if [[ -z "$VLLM_BASE_URL" && "$LLM_PROVIDER" == "vllm" ]]; then
         echo -e "${YELLOW}🤖 LLM Setup for AI Features${NC}"
         echo "To enable AI summarization and speaker identification:"
@@ -874,14 +1102,14 @@ display_summary() {
         echo "3. Restart the application: ./opentranscribe.sh restart"
         echo ""
     fi
-    
+
     if [[ "$DETECTED_DEVICE" == "cuda" && "$DOCKER_RUNTIME" != "nvidia" ]]; then
         echo -e "${YELLOW}💡 Note: NVIDIA GPU detected but runtime not configured${NC}"
         echo "If you experience GPU issues, check NVIDIA Container Toolkit installation:"
         echo "https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
         echo ""
     fi
-    
+
     echo -e "${GREEN}🌐 Access URLs (after starting):${NC}"
     echo "  • Web Interface: http://localhost:${FRONTEND_PORT:-5173}"
     echo "  • API Documentation: http://localhost:${BACKEND_PORT:-5174}/docs"
@@ -913,9 +1141,10 @@ main() {
     setup_project_directory
     create_configuration_files
     configure_environment
-ia     validate_setup
+    download_ai_models
+    validate_setup
     display_summary
 }
 
 # Execute main function
-main
\ No newline at end of file
+main