attevon-llc
diff --git a/‎backend/Dockerfile.blackwell‎
Lines changed: 144 additions & 0 deletions b/‎backend/Dockerfile.blackwell‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎backend/app/tasks/utility.py‎
Lines changed: 105 additions & 11 deletions b/‎backend/app/tasks/utility.py‎
Lines changed: 105 additions & 11 deletions
@@ -0,0 +1,144 @@
+# =============================================================================
+# OpenTranscribe Backend - Blackwell / DGX Spark Dockerfile
+#
+# Specialized image for NVIDIA Blackwell architecture (SM_121 / GB10) GPUs.
+# Based on NVIDIA's PyTorch container which bundles the correct CUDA 12.x
+# toolkit and ARM64-native torch builds.
+#
+# Key differences from Dockerfile.prod:
+#   - Uses NVIDIA PyTorch base image (nvcr.io/nvidia/pytorch) instead of
+#     python:3.13-slim to get ARM64 CUDA support
+#   - Applies SM_121 -> SM_90 compatibility patches via scripts/blackwell_patches.py
+#   - Pins huggingface_hub to avoid deprecated API calls in NVIDIA stack
+#   - Sets CUDA_FORCE_PTX_JIT=1 for forward-compatible PTX execution
+#
+# Usage:
+#   docker build -t opentranscribe-backend-blackwell -f Dockerfile.blackwell .
+#   (Then reference in docker-compose.blackwell.yml)
+# =============================================================================
+
+FROM nvcr.io/nvidia/pytorch:25.01-py3
+
+LABEL org.opencontainers.image.title="OpenTranscribe Backend (Blackwell)" \
+      org.opencontainers.image.description="AI-powered transcription backend for NVIDIA Blackwell/DGX Spark" \
+      org.opencontainers.image.vendor="OpenTranscribe" \
+      org.opencontainers.image.authors="OpenTranscribe Contributors" \
+      org.opencontainers.image.licenses="AGPL-3.0" \
+      org.opencontainers.image.source="https://github.com/davidamacey/OpenTranscribe"
+
+WORKDIR /app
+
+# ---------------------------------------------------------------------------
+# Blackwell SM_121 compatibility environment variables
+# ---------------------------------------------------------------------------
+# CUDA_FORCE_PTX_JIT: Forces PTX JIT compilation for forward compatibility
+#   when the installed CUDA toolkit doesn't know SM_121 natively.
+# TORCH_CUDA_ARCH_LIST: Tells PyTorch extensions to compile for SM_90
+#   (Hopper), which is binary-compatible with Blackwell via PTX fallback.
+ENV CUDA_FORCE_PTX_JIT=1 \
+    TORCH_CUDA_ARCH_LIST="9.0" \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONWARNINGS=ignore::UserWarning:pyannote.audio.core.io \
+    DEBIAN_FRONTEND=noninteractive
+
+# ---------------------------------------------------------------------------
+# System dependencies (runtime only)
+# ---------------------------------------------------------------------------
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    ffmpeg \
+    libsndfile1 \
+    libimage-exiftool-perl \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+# ---------------------------------------------------------------------------
+# Create directories for runtime and caches
+# ---------------------------------------------------------------------------
+# Note: The NVIDIA base image uses 'user' (UID 1000), not 'appuser'.
+# Cache volume mounts in docker-compose.blackwell.yml use /home/user/ paths.
+RUN mkdir -p /app/models /app/temp \
+    /home/user/.cache/huggingface \
+    /home/user/.cache/torch \
+    /home/user/.cache/nltk_data \
+    /home/user/.cache/sentence-transformers \
+    /home/user/.cache/yt-dlp
+
+# ---------------------------------------------------------------------------
+# Save NVIDIA-stack torch packages before pip installs can overwrite them
+# ---------------------------------------------------------------------------
+# The NVIDIA base image bundles a custom torch/torchaudio/torchvision built
+# against their CUDA toolkit.  pip installs below might pull in PyPI versions
+# that lack the NVIDIA-specific patches.  We save and restore.
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+RUN pip freeze | grep -iE '^(torch|torchaudio|torchvision)==' > /tmp/nvidia_torch_versions.txt
+
+# ---------------------------------------------------------------------------
+# Base Python tools
+# ---------------------------------------------------------------------------
+# hadolint ignore=DL3013
+RUN pip install --no-cache-dir --upgrade pip setuptools wheel
+
+# ---------------------------------------------------------------------------
+# Install application dependencies
+# ---------------------------------------------------------------------------
+COPY requirements.txt .
+RUN pip install --no-cache-dir --no-warn-script-location -r requirements.txt
+
+# ---------------------------------------------------------------------------
+# Restore NVIDIA torch stack (overwrite any PyPI versions pulled by deps)
+# ---------------------------------------------------------------------------
+# hadolint ignore=DL3013,SC2046
+RUN pip install --no-cache-dir --force-reinstall \
+    "$(cat /tmp/nvidia_torch_versions.txt | tr '\n' ' ')"
+
+# ---------------------------------------------------------------------------
+# Pin huggingface_hub for NVIDIA base image compatibility
+# ---------------------------------------------------------------------------
+# The NVIDIA PyTorch 25.01 base bundles libraries that still use the
+# deprecated `use_auth_token` parameter removed in huggingface_hub>=0.24.
+# This pin is Blackwell-specific and intentionally NOT in requirements.txt.
+RUN pip install --no-cache-dir "huggingface_hub==0.23.5"
+
+# ---------------------------------------------------------------------------
+# Install WhisperX (with --no-deps to preserve NVIDIA torch stack)
+# ---------------------------------------------------------------------------
+RUN pip install --no-cache-dir --no-deps "whisperx==3.8.1"
+
+# ---------------------------------------------------------------------------
+# Install PyAnnote fork with GPU optimizations
+# ---------------------------------------------------------------------------
+# Custom fork with vectorized chunking, adaptive batch size, TF32, CUDA
+# streams, and memory-safe batch indexing.
+RUN pip install --no-cache-dir --no-deps \
+    "pyannote.audio @ git+https://github.com/davidamacey/pyannote-audio.git@gpu-optimizations"
+
+# ---------------------------------------------------------------------------
+# Apply Blackwell SM_121 -> SM_90 compatibility patches
+# ---------------------------------------------------------------------------
+# Patches torch, torchaudio, and pyannote for SM_121 compatibility.
+# See scripts/blackwell_patches.py for details.
+COPY scripts/blackwell_patches.py scripts/blackwell_patches.py
+RUN python scripts/blackwell_patches.py
+
+# ---------------------------------------------------------------------------
+# Set cache environment variables
+# ---------------------------------------------------------------------------
+ENV HF_HOME=/home/user/.cache/huggingface \
+    TORCH_HOME=/home/user/.cache/torch \
+    NLTK_DATA=/home/user/.cache/nltk_data \
+    SENTENCE_TRANSFORMERS_HOME=/home/user/.cache/sentence-transformers
+
+# ---------------------------------------------------------------------------
+# Copy application code
+# ---------------------------------------------------------------------------
+COPY . .
+
+EXPOSE 8080
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+  CMD curl -f http://localhost:8080/health || exit 1
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8080"]
@@ -16,8 +16,59 @@
 logger = logging.getLogger(__name__)
 
 
+def _safe_float(value: str | None) -> float | None:
+    """Parse a numeric string to float, returning None for non-numeric values.
+
+    Handles nvidia-smi reporting '[N/A]' or 'N/A' on systems with unified
+    memory (e.g. NVIDIA DGX Spark / Blackwell GB10).
+
+    Args:
+        value: String to parse, or None.
+
+    Returns:
+        Parsed float, or None if the value is not a valid number.
+    """
+    if value is None:
+        return None
+    value = value.strip()
+    if not value or value.upper() in ("[N/A]", "N/A", "N/A%"):
+        return None
+    try:
+        return float(value)
+    except (ValueError, TypeError):
+        return None
+
+
+def _query_gpu_memory_torch(device_id: int) -> tuple[float, float] | None:
+    """Query GPU memory via torch.cuda.mem_get_info() as a fallback.
+
+    This is needed for systems with unified CPU+GPU memory (e.g. DGX Spark)
+    where nvidia-smi reports memory stats as '[N/A]'.
+
+    Args:
+        device_id: CUDA device index.
+
+    Returns:
+        Tuple of (free_bytes, total_bytes), or None if torch is unavailable.
+    """
+    try:
+        import torch
+
+        if not torch.cuda.is_available():
+            return None
+        free, total = torch.cuda.mem_get_info(device_id)
+        return (float(free), float(total))
+    except Exception as e:
+        logger.debug(f"torch.cuda.mem_get_info({device_id}) failed: {e}")
+        return None
+
+
 def _query_single_gpu(device_id: int, subprocess_mod, format_bytes) -> dict | None:
-    """Query nvidia-smi for one GPU device and return a parsed stats dict.
+    """Query GPU stats for one device, with unified-memory fallback.
+
+    First tries nvidia-smi for full stats. If memory values come back as
+    '[N/A]' (unified memory systems like DGX Spark), falls back to
+    torch.cuda.mem_get_info() for memory data.
 
     Args:
         device_id: NVIDIA device index to query.
@@ -31,7 +82,8 @@ def _query_single_gpu(device_id: int, subprocess_mod, format_bytes) -> dict | No
         result = subprocess_mod.run(  # noqa: S603 # nosec B603 B607
             [  # noqa: S607
                 "nvidia-smi",
-                "--query-gpu=name,memory.used,memory.total,memory.free,utilization.gpu,temperature.gpu",
+                "--query-gpu=name,memory.used,memory.total,memory.free,"
+                "utilization.gpu,temperature.gpu",
                 "--format=csv,noheader,nounits",
                 f"--id={device_id}",
             ],
@@ -41,15 +93,56 @@ def _query_single_gpu(device_id: int, subprocess_mod, format_bytes) -> dict | No
         )
         parts = result.stdout.strip().split(", ")
         gpu_name = parts[0]
-        memory_used_mib = float(parts[1])
-        memory_total_mib = float(parts[2])
-        memory_free_mib = float(parts[3])
-        utilization_percent = int(parts[4]) if len(parts) > 4 else None
-        temperature_celsius = int(parts[5]) if len(parts) > 5 else None
-
-        memory_used = memory_used_mib * 1024 * 1024
-        memory_total = memory_total_mib * 1024 * 1024
-        memory_free = memory_free_mib * 1024 * 1024
+        memory_used_mib = _safe_float(parts[1])
+        memory_total_mib = _safe_float(parts[2])
+        memory_free_mib = _safe_float(parts[3])
+        utilization_percent = (
+            int(parts[4]) if len(parts) > 4 and _safe_float(parts[4]) is not None else None
+        )
+        temperature_celsius = (
+            int(parts[5]) if len(parts) > 5 and _safe_float(parts[5]) is not None else None
+        )
+
+        memory_source = "nvidia-smi"
+
+        # If nvidia-smi returned N/A for memory (unified memory systems),
+        # fall back to torch.cuda.mem_get_info()
+        if memory_total_mib is None or memory_used_mib is None:
+            torch_mem = _query_gpu_memory_torch(device_id)
+            if torch_mem is not None:
+                free_bytes, total_bytes = torch_mem
+                used_bytes = total_bytes - free_bytes
+                memory_total = total_bytes
+                memory_used = used_bytes
+                memory_free = free_bytes
+                memory_source = "torch.cuda.mem_get_info"
+            else:
+                # Both nvidia-smi and torch failed — report what we can
+                return {
+                    "available": True,
+                    "device_id": device_id,
+                    "name": gpu_name,
+                    "memory_total": "N/A (unified memory)",
+                    "memory_used": "N/A",
+                    "memory_free": "N/A",
+                    "memory_percent": "N/A",
+                    "utilization_percent": f"{utilization_percent}%"
+                    if utilization_percent is not None
+                    else "N/A",
+                    "temperature_celsius": temperature_celsius,
+                    "memory_source": "unavailable",
+                    "memory_note": "Unified memory system — install PyTorch "
+                    "for memory stats via torch.cuda.mem_get_info()",
+                }
+        else:
+            memory_used = memory_used_mib * 1024 * 1024
+            memory_total = memory_total_mib * 1024 * 1024
+            memory_free = (
+                memory_free_mib * 1024 * 1024
+                if memory_free_mib is not None
+                else memory_total - memory_used
+            )
+
         memory_percent = (memory_used / memory_total * 100) if memory_total > 0 else 0
 
         return {
@@ -64,6 +157,7 @@ def _query_single_gpu(device_id: int, subprocess_mod, format_bytes) -> dict | No
             if utilization_percent is not None
             else "N/A",
             "temperature_celsius": temperature_celsius,
+            "memory_source": memory_source,
         }
     except Exception as e:
         logger.warning(f"nvidia-smi query for device {device_id} failed: {e}")