davidamacey · davidamacey · Oct 14, 2025 · Oct 14, 2025
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 <div align="center">
   <img src="assets/logo-banner.png" alt="OpenTranscribe Logo" width="400">
-  
+
   **AI-Powered Transcription and Media Analysis Platform**
 </div>
 
@@ -165,11 +165,15 @@ curl -fsSL https://raw.githubusercontent.com/davidamacey/OpenTranscribe/master/s
 ```
 
 Then follow the on-screen instructions. The setup script will:
+- Detect your hardware (NVIDIA GPU, Apple Silicon, or CPU)
 - Download the production Docker Compose file
-- Configure environment variables including GPU support (default GPU device ID: 2)
-- Help you set up your Hugging Face token (required for speaker diarization)
+- Configure environment variables with optimal settings for your hardware
+- **Prompt for your HuggingFace token** (required for speaker diarization)
+- **Automatically download and cache AI models (~2.5GB)** if token is provided
 - Set up the management script (`opentranscribe.sh`)
 
+**Note:** The script will prompt you for your HuggingFace token during setup. If you provide it, AI models will be downloaded and cached before Docker starts, ensuring the app is ready to use immediately. If you skip this step, models will download on first use (10-30 minute delay).
+
 Once setup is complete, start OpenTranscribe with:
 
 ```bash
@@ -189,7 +193,7 @@ Access the web interface at http://localhost:5173
    ```bash
    git clone https://github.com/davidamacey/OpenTranscribe.git
    cd OpenTranscribe
-   
+
    # Make utility script executable
    chmod +x opentr.sh
    ```
@@ -198,7 +202,7 @@ Access the web interface at http://localhost:5173
    ```bash
    # Copy environment template
    cp .env.example .env
-   
+
    # Edit .env file with your settings (optional for development)
    # Key variables:
    # - HUGGINGFACE_TOKEN (required for speaker diarization)
@@ -209,7 +213,7 @@ Access the web interface at http://localhost:5173
    ```bash
    # Start in development mode (with hot reload)
    ./opentr.sh start dev
-   
+
    # Or start in production mode
    ./opentr.sh start prod
    ```
@@ -470,7 +474,7 @@ OpenTranscribe offers flexible AI deployment options. Choose the approach that b
    LLM_PROVIDER=openai
    OPENAI_API_KEY=your_openai_key
    OPENAI_MODEL_NAME=gpt-4o-mini
-   
+
    # Start without local LLM
    ./opentr.sh start dev
    ```
@@ -480,7 +484,7 @@ OpenTranscribe offers flexible AI deployment options. Choose the approach that b
    # Configure for vLLM in .env
    LLM_PROVIDER=vllm
    VLLM_MODEL_NAME=gpt-oss-20b
-   
+
    # Start with vLLM service (requires 16GB+ VRAM)
    docker compose -f docker-compose.yml -f docker-compose.vllm.yml up
    ```
@@ -490,7 +494,7 @@ OpenTranscribe offers flexible AI deployment options. Choose the approach that b
    # Configure for Ollama in .env
    LLM_PROVIDER=ollama
    OLLAMA_MODEL_NAME=llama3.2:3b-instruct-q4_K_M
-   
+
    # Edit docker-compose.vllm.yml and uncomment ollama service
    # Then start with both compose files
    docker compose -f docker-compose.yml -f docker-compose.vllm.yml up
@@ -501,7 +505,7 @@ OpenTranscribe offers flexible AI deployment options. Choose the approach that b
 # Cloud Providers (configure in .env)
 LLM_PROVIDER=openai                  # openai, anthropic, custom (openrouter)
 OPENAI_API_KEY=your_openai_key       # OpenAI GPT models
-ANTHROPIC_API_KEY=your_claude_key    # Anthropic Claude models  
+ANTHROPIC_API_KEY=your_claude_key    # Anthropic Claude models
 OPENROUTER_API_KEY=your_or_key       # OpenRouter (multi-provider)
 
 # Local Providers (requires additional Docker services)
@@ -511,7 +515,7 @@ LLM_PROVIDER=ollama                  # Local Ollama server
 
 **🎯 Deployment Scenarios:**
 - **💰 Cost-Effective**: OpenRouter with Claude Haiku (~$0.25/1M tokens)
-- **🔒 Privacy-First**: Local vLLM or Ollama (no data leaves your server)  
+- **🔒 Privacy-First**: Local vLLM or Ollama (no data leaves your server)
 - **⚡ Performance**: OpenAI GPT-4o-mini (fastest cloud option)
 - **📱 Small Models**: Even 3B Ollama models can handle hours of content via intelligent sectioning
 - **🚫 No LLM**: Leave `LLM_PROVIDER` empty for transcription-only mode
@@ -534,7 +538,7 @@ OpenTranscribe automatically downloads and caches AI models for optimal performa
 │   ├── hub/             # WhisperX transcription models (~1.5GB)
 │   └── transformers/    # PyAnnote transformer models
 └── torch/               # PyTorch cache
-    ├── hub/checkpoints/ # Wav2Vec2 alignment model (~360MB)  
+    ├── hub/checkpoints/ # Wav2Vec2 alignment model (~360MB)
     └── pyannote/        # PyAnnote diarization models (~500MB)
 ```
 
@@ -606,7 +610,7 @@ For production use, ensure you:
    # Generate strong secrets
    openssl rand -hex 32  # For SECRET_KEY
    openssl rand -hex 32  # For JWT_SECRET_KEY
-   
+
    # Set strong database passwords
    # Configure proper firewall rules
    # Set up SSL/TLS certificates
@@ -616,7 +620,7 @@ For production use, ensure you:
    ```bash
    # Use production environment
    NODE_ENV=production
-   
+
    # Configure resource limits
    # Set up monitoring and logging
    # Configure backup strategies
@@ -628,7 +632,7 @@ For production use, ensure you:
    server {
        listen 80;
        server_name your-domain.com;
-       
+
        location / {
            proxy_pass http://localhost:5173;
            proxy_set_header Host $host;
@@ -657,7 +661,7 @@ pytest tests/                    # Run tests
 black app/                       # Format code
 flake8 app/                      # Lint code
 
-# Frontend development  
+# Frontend development
 cd frontend/
 npm install
 npm run dev                      # Development server
@@ -835,4 +839,4 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 
 **Built with ❤️ using AI assistance and modern open-source technologies.**
 
-*OpenTranscribe demonstrates the power of AI-assisted development while maintaining full local control over your data and processing.*
+*OpenTranscribe demonstrates the power of AI-assisted development while maintaining full local control over your data and processing.*
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -27,8 +27,8 @@ numpy>=1.25.2
 
 # PyTorch with CUDA 12.8 support (CVE-2025-32434 fixed in 2.6.0+)
 --extra-index-url https://download.pytorch.org/whl/cu128
-torch==2.8.0+cu128
-torchaudio==2.8.0+cu128
+torch==2.8.0
+torchaudio==2.8.0
 
 # WhisperX latest version with ctranslate2 4.5+ support
 whisperx==3.7.0

diff --git a/docker-compose.offline.yml b/docker-compose.offline.yml
@@ -6,7 +6,7 @@ version: '3.8'
 
 services:
   postgres:
-    image: postgres:14-alpine
+    image: postgres:17.5-alpine
     restart: always
     volumes:
       - postgres_data:/var/lib/postgresql/data/
@@ -24,7 +24,7 @@ services:
       retries: 5
 
   minio:
-    image: minio/minio:latest
+    image: minio/minio:RELEASE.2025-09-07T16-13-09Z
     restart: always
     volumes:
       - minio_data:/data
@@ -42,7 +42,7 @@ services:
       retries: 5
 
   redis:
-    image: redis:7-alpine
+    image: redis:8.2.2-alpine3.22
     restart: always
     ports:
       - "${REDIS_PORT:-6379}:6379"
@@ -133,9 +133,14 @@ services:
       - MAX_SPEAKERS=${MAX_SPEAKERS:-10}
       # LLM Configuration - external providers only for offline deployment
       - LLM_PROVIDER=${LLM_PROVIDER:-}
+      - VLLM_BASE_URL=${VLLM_BASE_URL:-http://localhost:8012/v1}
+      - VLLM_API_KEY=${VLLM_API_KEY:-}
+      - VLLM_MODEL_NAME=${VLLM_MODEL_NAME:-gpt-oss-20b}
       - OPENAI_API_KEY=${OPENAI_API_KEY:-}
       - OPENAI_MODEL_NAME=${OPENAI_MODEL_NAME:-gpt-4o-mini}
       - OPENAI_BASE_URL=${OPENAI_BASE_URL:-}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://localhost:11434}
+      - OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME:-llama2:7b-chat}
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
       - ANTHROPIC_MODEL_NAME=${ANTHROPIC_MODEL_NAME:-claude-3-haiku-20240307}
       - ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-}
@@ -193,9 +198,14 @@ services:
       - MAX_SPEAKERS=${MAX_SPEAKERS:-10}
       # LLM Configuration
       - LLM_PROVIDER=${LLM_PROVIDER:-}
+      - VLLM_BASE_URL=${VLLM_BASE_URL:-http://localhost:8012/v1}
+      - VLLM_API_KEY=${VLLM_API_KEY:-}
+      - VLLM_MODEL_NAME=${VLLM_MODEL_NAME:-gpt-oss-20b}
       - OPENAI_API_KEY=${OPENAI_API_KEY:-}
       - OPENAI_MODEL_NAME=${OPENAI_MODEL_NAME:-gpt-4o-mini}
       - OPENAI_BASE_URL=${OPENAI_BASE_URL:-}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://localhost:11434}
+      - OLLAMA_MODEL_NAME=${OLLAMA_MODEL_NAME:-llama2:7b-chat}
       - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
       - ANTHROPIC_MODEL_NAME=${ANTHROPIC_MODEL_NAME:-claude-3-haiku-20240307}
       - ANTHROPIC_BASE_URL=${ANTHROPIC_BASE_URL:-}

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3.8'
 
 services:
   postgres:
-    image: postgres:14-alpine
+    image: postgres:17.5-alpine
     restart: always
     volumes:
       - postgres_data:/var/lib/postgresql/data/
@@ -19,7 +19,7 @@ services:
       retries: 5
 
   minio:
-    image: minio/minio
+    image: minio/minio:RELEASE.2025-09-07T16-13-09Z
     restart: always
     volumes:
       - minio_data:/data
@@ -37,7 +37,7 @@ services:
       retries: 5
 
   redis:
-    image: redis:7-alpine
+    image: redis:8.2.2-alpine3.22
     restart: always
     ports:
       - "5177:6379"
@@ -241,7 +241,7 @@ services:
       dockerfile: Dockerfile.prod
     restart: unless-stopped
     ports:
-      - "5173:80"
+      - "5173:8080"
     environment:
       - NODE_ENV=production
     depends_on:

diff --git a/frontend/Dockerfile.prod b/frontend/Dockerfile.prod
@@ -29,16 +29,32 @@ RUN chmod -R 755 static/fonts
 RUN npm run build
 
 # Production stage
-FROM nginx:alpine
+FROM nginx:1.29.2-alpine3.22
 
 # Copy the built files from the build stage
 COPY --from=build /app/dist /usr/share/nginx/html
 
 # Copy the nginx configuration
 COPY nginx.conf /etc/nginx/conf.d/default.conf
 
-# Expose port 80
-EXPOSE 80
+# Create non-root user and configure permissions for nginx
+# Note: nginx user already exists in base image, just configure permissions
+RUN chown -R nginx:nginx /usr/share/nginx/html && \
+    chown -R nginx:nginx /var/cache/nginx && \
+    mkdir -p /var/log/nginx && \
+    chown -R nginx:nginx /var/log/nginx && \
+    touch /var/run/nginx.pid && \
+    chown nginx:nginx /var/run/nginx.pid
+
+# Switch to non-root user
+USER nginx
+
+# Expose port 8080 (non-privileged port for non-root user)
+EXPOSE 8080
+
+# Add healthcheck
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD wget --no-verbose --tries=1 --spider http://localhost:8080/ || exit 1
 
 # Start nginx
 CMD ["nginx", "-g", "daemon off;"]
diff --git a/frontend/nginx.conf b/frontend/nginx.conf
@@ -1,12 +1,12 @@
 server {
-    listen 80;
+    listen 8080;
     server_name localhost;
     root /usr/share/nginx/html;
     index index.html;
 
     # Set maximum file upload size to 15GB for large video/audio files
     client_max_body_size 15G;
-    
+
     # Increase timeouts for large file uploads
     client_body_timeout 300s;
     client_header_timeout 300s;
@@ -43,7 +43,7 @@ server {
         proxy_set_header Connection 'upgrade';
         proxy_set_header Host $host;
         proxy_cache_bypass $http_upgrade;
-        
+
         # Large file upload support
         proxy_request_buffering off;
         proxy_max_temp_file_size 0;