Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10.12'
python-version: '3.12.8'

- name: Install uv
uses: astral-sh/setup-uv@v3
Expand Down
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.10.12
3.12.8
1 change: 1 addition & 0 deletions patches.pth
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import scope.core.patches._startup
33 changes: 21 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "daydream-scope"
version = "0.1.0b2"
description = "A tool for running and customizing real-time, interactive generative AI pipelines and models"
readme = "README.md"
requires-python = ">=3.10.12"
requires-python = ">=3.12"
authors = [
{name = "Yondon Fu", email = "yondon@livepeer.org"},
{name = "Rafal Leszko", email = "rafal@livepeer.org"}
Expand Down Expand Up @@ -33,8 +33,8 @@ dependencies = [
"httpx>=0.28.1",
"twilio>=9.8.0",
"uvicorn>=0.35.0",
"torch==2.8.0",
"torchvision==0.23.0",
"torch==2.9.1",
"torchvision==0.24.1",
"easydict>=1.13",
"diffusers>=0.31.0",
"ftfy>=6.3.1",
Expand All @@ -50,10 +50,10 @@ dependencies = [
"pluggy>=1.5.0",
"click>=8.3.1",
"peft>=0.17.1",
"torchao==0.13.0",
"torchao==0.15.0",
"kernels>=0.10.4",
"triton==3.4.0; sys_platform == 'linux'",
"triton-windows==3.4.0.post21; sys_platform == 'win32'",
"triton==3.5.1; sys_platform == 'linux'",
"triton-windows==3.5.1.post24; sys_platform == 'win32'",
"SpoutGL>=0.1.1; sys_platform == 'win32'",
"PyOpenGL>=3.1.10; sys_platform == 'win32'",
]
Expand All @@ -70,6 +70,11 @@ Issues = "https://github.com/daydreamlive/scope/issues"

[tool.uv]
preview = true
# Override cuDNN version to fix PyTorch 2.9.1 Conv3D bf16 performance regression
# See: https://github.com/pytorch/pytorch/issues/168167
override-dependencies = [
"nvidia-cudnn-cu12>=9.15",
]

[tool.uv.extra-build-dependencies]
flash-attn = [{ requirement = "torch", match-runtime = true, marker = "sys_platform == 'linux' or sys_platform == 'win32'" }]
Expand All @@ -86,15 +91,15 @@ torchvision = [
]
flash-attn = [
# Prebuilt Linux wheels from https://github.com/Dao-AILab/flash-attention
{ url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp310-cp310-linux_x86_64.whl", marker = "sys_platform == 'linux'" },
{ url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.9cxx11abiTRUE-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux'" },
# Prebuilt Windows wheels from https://github.com/kingbri1/flash-attention
{ url = "https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.8.0cxx11abiFALSE-cp310-cp310-win_amd64.whl", marker = "sys_platform == 'win32'" },
{ url = "https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp312-cp312-win_amd64.whl", marker = "sys_platform == 'win32'" },
]
sageattention = [
# Prebuilt Linux wheels from https://github.com/daydreamlive/SageAttention
{ url = "https://github.com/daydreamlive/SageAttention/releases/download/v2.2.0-linux/sageattention-2.2.0-cp310-cp310-linux_x86_64.whl", marker = "sys_platform == 'linux'" },
# Prebuilt Linux wheels from https://huggingface.co/Kijai/PrecompiledWheels
{ url = "https://huggingface.co/Kijai/PrecompiledWheels/resolve/main/sageattention-2.2.0-cp312-cp312-linux_x86_64.whl", marker = "sys_platform == 'linux'" },
# Prebuilt Windows wheels from https://github.com/woct0rdho/SageAttention/releases
{ url = "https://github.com/woct0rdho/SageAttention/releases/download/v2.2.0-windows.post3/sageattention-2.2.0+cu128torch2.8.0.post3-cp39-abi3-win_amd64.whl", marker = "sys_platform == 'win32'" },
{ url = "https://github.com/woct0rdho/SageAttention/releases/download/v2.2.0-windows.post4/sageattention-2.2.0+cu128torch2.9.0andhigher.post4-cp39-abi3-win_amd64.whl", marker = "sys_platform == 'win32'" },
]

[[tool.uv.index]]
Expand All @@ -118,7 +123,7 @@ testpaths = ["tests"]

[tool.ruff]
line-length = 88
target-version = "py310"
target-version = "py312"
exclude = [
"*/vendor/*",
"**/vendor/**",
Expand Down Expand Up @@ -159,6 +164,10 @@ build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/scope"]

[tool.hatch.build.targets.wheel.force-include]
# Include .pth file for automatic cuDNN patching at Python startup
"patches.pth" = "patches.pth"

[tool.hatch.build]
include = [
"frontend/dist/assets/**/*",
Expand Down
1 change: 1 addition & 0 deletions src/scope/core/patches/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Patching utilities for dependencies."""
17 changes: 17 additions & 0 deletions src/scope/core/patches/_startup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""
Auto-patch at Python startup. Imported by patches.pth.

This module is imported at every Python startup before any user code.
It silently applies patches needed for the current platform.
"""

import sys

if sys.platform == "win32":
try:
from .cudnn import patch_torch_cudnn

patch_torch_cudnn(silent=True)
except Exception:
# Never crash Python startup - fail silently
pass
138 changes: 138 additions & 0 deletions src/scope/core/patches/cudnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
Patch PyTorch's bundled cuDNN with a newer version from nvidia-cudnn-cu12.

On Windows, PyTorch bundles cuDNN in torch/lib and loads it directly by path,
ignoring PATH and os.add_dll_directory(). To use a newer cuDNN version, we
must copy the DLLs from nvidia-cudnn-cu12 to torch/lib.

This fixes the PyTorch 2.9.1 Conv3D bf16 performance regression.
See: https://github.com/pytorch/pytorch/issues/168167

This can be removed when a new PyTorch with the correct cuDNN version is released.
"""

import glob
import importlib.util
import os
import shutil
import sys


def _find_package_path(package_name: str) -> str | None:
"""Find a package's install path WITHOUT importing it.

This is critical for torch - importing it loads cuDNN DLLs which then
can't be overwritten. Using find_spec() locates the package without
executing its __init__.py.

Handles both regular packages (with __init__.py) and namespace packages.
"""
try:
spec = importlib.util.find_spec(package_name)
if spec:
# Regular package: spec.origin points to __init__.py
if spec.origin:
return os.path.dirname(spec.origin)
# Namespace package: use submodule_search_locations
if spec.submodule_search_locations:
locations = list(spec.submodule_search_locations)
if locations:
return locations[0]
except (ImportError, ModuleNotFoundError):
pass
return None


def patch_torch_cudnn(silent: bool = False):
"""Copy cuDNN DLLs from nvidia-cudnn-cu12 to torch/lib on Windows.

This patches PyTorch to use the newer cuDNN version.
Idempotent: skips files that are already the correct size.

IMPORTANT: This function does NOT import torch, so it can safely
overwrite cuDNN DLLs before they are loaded.

Args:
silent: If True, suppress all output (for use at Python startup).
"""
if sys.platform != "win32":
if not silent:
print("Not on Windows, skipping cuDNN patch")
return

# Find package paths WITHOUT importing them (avoids loading/locking DLLs)
cudnn_path = _find_package_path("nvidia.cudnn")
torch_path = _find_package_path("torch")

if not cudnn_path:
if not silent:
print("nvidia-cudnn-cu12 package not found")
return

if not torch_path:
if not silent:
print("torch package not found")
return

cudnn_src = os.path.join(cudnn_path, "bin")
torch_lib = os.path.join(torch_path, "lib")

if not os.path.isdir(cudnn_src):
if not silent:
print(f"cuDNN source not found: {cudnn_src}")
return

if not os.path.isdir(torch_lib):
if not silent:
print(f"torch lib not found: {torch_lib}")
return

# Find cuDNN DLLs to copy
cudnn_dlls = glob.glob(os.path.join(cudnn_src, "cudnn*.dll"))

if not cudnn_dlls:
if not silent:
print(f"No cuDNN DLLs found in {cudnn_src}")
return

if not silent:
print(f"Patching torch cuDNN: {cudnn_src} -> {torch_lib}")

for src_dll in cudnn_dlls:
dll_name = os.path.basename(src_dll)
dst_dll = os.path.join(torch_lib, dll_name)

if os.path.exists(dst_dll):
src_size = os.path.getsize(src_dll)
dst_size = os.path.getsize(dst_dll)
if src_size == dst_size:
if not silent:
print(f" {dll_name}: already patched (same size)")
continue

if not silent:
print(f" {dll_name}: copying...")
try:
# Try to make the destination writable if it exists
if os.path.exists(dst_dll):
os.chmod(dst_dll, 0o666)
shutil.copy2(src_dll, dst_dll)
except PermissionError:
if not silent:
print(
" ERROR: Permission denied. Close any Python/torch processes and retry."
)
print(f" Or manually copy: {src_dll} -> {dst_dll}")
continue

if not silent:
print("Done. Restart Python to use new cuDNN.")


def main():
"""Entry point for manual patching."""
patch_torch_cudnn(silent=False)


if __name__ == "__main__":
main()
Loading
Loading