daydreamlive · yondonfu · Jan 14, 2026 · Jan 8, 2026 · Jan 13, 2026 · Jan 13, 2026
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -18,7 +18,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.10.12'
+          python-version: '3.12.8'
 
       - name: Install uv
         uses: astral-sh/setup-uv@v3

diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.10.12
+3.12.8
diff --git a/patches.pth b/patches.pth
@@ -0,0 +1 @@
+import scope.core.patches._startup
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "daydream-scope"
 version = "0.1.0b2"
 description = "A tool for running and customizing real-time, interactive generative AI pipelines and models"
 readme = "README.md"
-requires-python = ">=3.10.12"
+requires-python = ">=3.12"
 authors = [
     {name = "Yondon Fu", email = "yondon@livepeer.org"},
     {name = "Rafal Leszko", email = "rafal@livepeer.org"}
@@ -33,8 +33,8 @@ dependencies = [
     "httpx>=0.28.1",
     "twilio>=9.8.0",
     "uvicorn>=0.35.0",
-    "torch==2.8.0",
-    "torchvision==0.23.0",
+    "torch==2.9.1",
+    "torchvision==0.24.1",
     "easydict>=1.13",
     "diffusers>=0.31.0",
     "ftfy>=6.3.1",
@@ -50,10 +50,10 @@ dependencies = [
     "pluggy>=1.5.0",
     "click>=8.3.1",
     "peft>=0.17.1",
-    "torchao==0.13.0",
+    "torchao==0.15.0",
     "kernels>=0.10.4",
-    "triton==3.4.0; sys_platform == 'linux'",
-    "triton-windows==3.4.0.post21; sys_platform == 'win32'",
+    "triton==3.5.1; sys_platform == 'linux'",
+    "triton-windows==3.5.1.post24; sys_platform == 'win32'",
     "SpoutGL>=0.1.1; sys_platform == 'win32'",
     "PyOpenGL>=3.1.10; sys_platform == 'win32'",
 ]
@@ -70,6 +70,11 @@ Issues = "https://github.com/daydreamlive/scope/issues"
 
 [tool.uv]
 preview = true
+# Override cuDNN version to fix PyTorch 2.9.1 Conv3D bf16 performance regression
+# See: https://github.com/pytorch/pytorch/issues/168167
+override-dependencies = [
+    "nvidia-cudnn-cu12>=9.15",
+]
 
 [tool.uv.extra-build-dependencies]
 flash-attn = [{ requirement = "torch", match-runtime = true, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }]
@@ -86,15 +91,15 @@ torchvision = [
 ]
 flash-attn = [
     # Prebuilt Linux wheels from https://github.com/Dao-AILab/flash-attention
-    { url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp310-cp310-linux_x86_64.whl", marker = "sys_platform == 'linux'" },
+    { url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.9cxx11abiTRUE-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux'" },
     # Prebuilt Windows wheels from https://github.com/kingbri1/flash-attention
-    { url = "https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.8.0cxx11abiFALSE-cp310-cp310-win_amd64.whl", marker = "sys_platform == 'win32'" },
+    { url = "https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp312-cp312-win_amd64.whl", marker = "sys_platform == 'win32'" },
 ]
 sageattention = [
-    # Prebuilt Linux wheels from https://github.com/daydreamlive/SageAttention
-    { url = "https://github.com/daydreamlive/SageAttention/releases/download/v2.2.0-linux/sageattention-2.2.0-cp310-cp310-linux_x86_64.whl", marker = "sys_platform == 'linux'" },
+    # Prebuilt Linux wheels from https://huggingface.co/Kijai/PrecompiledWheels
+    { url = "https://huggingface.co/Kijai/PrecompiledWheels/resolve/main/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux'" },
     # Prebuilt Windows wheels from https://github.com/woct0rdho/SageAttention/releases
-    { url = "https://github.com/woct0rdho/SageAttention/releases/download/v2.2.0-windows.post3/sageattention-2.2.0+cu128torch2.8.0.post3-cp39-abi3-win_amd64.whl", marker = "sys_platform == 'win32'" },
+    { url = "https://github.com/woct0rdho/SageAttention/releases/download/v2.2.0-windows.post4/sageattention-2.2.0+cu128torch2.9.0andhigher.post4-cp39-abi3-win_amd64.whl", marker = "sys_platform == 'win32'" },
 ]
 
 [[tool.uv.index]]
@@ -118,7 +123,7 @@ testpaths = ["tests"]
 
 [tool.ruff]
 line-length = 88
-target-version = "py310"
+target-version = "py312"
 exclude = [
     "*/vendor/*",
     "**/vendor/**",
@@ -159,6 +164,10 @@ build-backend = "hatchling.build"
 [tool.hatch.build.targets.wheel]
 packages = ["src/scope"]
 
+[tool.hatch.build.targets.wheel.force-include]
+# Include .pth file for automatic cuDNN patching at Python startup
+"patches.pth" = "patches.pth"
+
 [tool.hatch.build]
 include = [
     "frontend/dist/assets/**/*",

diff --git a/src/scope/core/patches/__init__.py b/src/scope/core/patches/__init__.py
@@ -0,0 +1 @@
+"""Patching utilities for dependencies."""
diff --git a/src/scope/core/patches/_startup.py b/src/scope/core/patches/_startup.py
@@ -0,0 +1,17 @@
+"""
+Auto-patch at Python startup. Imported by patches.pth.
+
+This module is imported at every Python startup before any user code.
+It silently applies patches needed for the current platform.
+"""
+
+import sys
+
+if sys.platform == "win32":
+    try:
+        from .cudnn import patch_torch_cudnn
+
+        patch_torch_cudnn(silent=True)
+    except Exception:
+        # Never crash Python startup - fail silently
+        pass
diff --git a/src/scope/core/patches/cudnn.py b/src/scope/core/patches/cudnn.py
@@ -0,0 +1,138 @@
+"""
+Patch PyTorch's bundled cuDNN with a newer version from nvidia-cudnn-cu12.
+
+On Windows, PyTorch bundles cuDNN in torch/lib and loads it directly by path,
+ignoring PATH and os.add_dll_directory(). To use a newer cuDNN version, we
+must copy the DLLs from nvidia-cudnn-cu12 to torch/lib.
+
+This fixes the PyTorch 2.9.1 Conv3D bf16 performance regression.
+See: https://github.com/pytorch/pytorch/issues/168167
+
+This can be removed when a new PyTorch with the correct cuDNN version is released.
+"""
+
+import glob
+import importlib.util
+import os
+import shutil
+import sys
+
+
+def _find_package_path(package_name: str) -> str | None:
+    """Find a package's install path WITHOUT importing it.
+
+    This is critical for torch - importing it loads cuDNN DLLs which then
+    can't be overwritten. Using find_spec() locates the package without
+    executing its __init__.py.
+
+    Handles both regular packages (with __init__.py) and namespace packages.
+    """
+    try:
+        spec = importlib.util.find_spec(package_name)
+        if spec:
+            # Regular package: spec.origin points to __init__.py
+            if spec.origin:
+                return os.path.dirname(spec.origin)
+            # Namespace package: use submodule_search_locations
+            if spec.submodule_search_locations:
+                locations = list(spec.submodule_search_locations)
+                if locations:
+                    return locations[0]
+    except (ImportError, ModuleNotFoundError):
+        pass
+    return None
+
+
+def patch_torch_cudnn(silent: bool = False):
+    """Copy cuDNN DLLs from nvidia-cudnn-cu12 to torch/lib on Windows.
+
+    This patches PyTorch to use the newer cuDNN version.
+    Idempotent: skips files that are already the correct size.
+
+    IMPORTANT: This function does NOT import torch, so it can safely
+    overwrite cuDNN DLLs before they are loaded.
+
+    Args:
+        silent: If True, suppress all output (for use at Python startup).
+    """
+    if sys.platform != "win32":
+        if not silent:
+            print("Not on Windows, skipping cuDNN patch")
+        return
+
+    # Find package paths WITHOUT importing them (avoids loading/locking DLLs)
+    cudnn_path = _find_package_path("nvidia.cudnn")
+    torch_path = _find_package_path("torch")
+
+    if not cudnn_path:
+        if not silent:
+            print("nvidia-cudnn-cu12 package not found")
+        return
+
+    if not torch_path:
+        if not silent:
+            print("torch package not found")
+        return
+
+    cudnn_src = os.path.join(cudnn_path, "bin")
+    torch_lib = os.path.join(torch_path, "lib")
+
+    if not os.path.isdir(cudnn_src):
+        if not silent:
+            print(f"cuDNN source not found: {cudnn_src}")
+        return
+
+    if not os.path.isdir(torch_lib):
+        if not silent:
+            print(f"torch lib not found: {torch_lib}")
+        return
+
+    # Find cuDNN DLLs to copy
+    cudnn_dlls = glob.glob(os.path.join(cudnn_src, "cudnn*.dll"))
+
+    if not cudnn_dlls:
+        if not silent:
+            print(f"No cuDNN DLLs found in {cudnn_src}")
+        return
+
+    if not silent:
+        print(f"Patching torch cuDNN: {cudnn_src} -> {torch_lib}")
+
+    for src_dll in cudnn_dlls:
+        dll_name = os.path.basename(src_dll)
+        dst_dll = os.path.join(torch_lib, dll_name)
+
+        if os.path.exists(dst_dll):
+            src_size = os.path.getsize(src_dll)
+            dst_size = os.path.getsize(dst_dll)
+            if src_size == dst_size:
+                if not silent:
+                    print(f"  {dll_name}: already patched (same size)")
+                continue
+
+        if not silent:
+            print(f"  {dll_name}: copying...")
+        try:
+            # Try to make the destination writable if it exists
+            if os.path.exists(dst_dll):
+                os.chmod(dst_dll, 0o666)
+            shutil.copy2(src_dll, dst_dll)
+        except PermissionError:
+            if not silent:
+                print(
+                    "    ERROR: Permission denied. Close any Python/torch processes and retry."
+                )
+                print(f"    Or manually copy: {src_dll} -> {dst_dll}")
+            continue
+
+    if not silent:
+        print("Done. Restart Python to use new cuDNN.")
+
+
+def main():
+    """Entry point for manual patching."""
+    patch_torch_cudnn(silent=False)
+
+
+if __name__ == "__main__":
+    main()