-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathsetup.py
More file actions
74 lines (65 loc) · 1.75 KB
/
setup.py
File metadata and controls
74 lines (65 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from setuptools import setup, find_packages
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
_base = [
"torch>=2.0.0",
"scipy>=1.10.0,<1.15.0",
]
_hf = [
"transformers>=4.40.0",
"accelerate>=0.25.0",
]
_bnb = [
"bitsandbytes>=0.43.0",
]
_triton = [
"triton>=3.0.0",
]
_faiss = [
"faiss-gpu",
]
_benchmark = [
"datasets",
"matplotlib",
"tqdm",
]
_dev = [
"pytest>=7.0.0",
"pytest-cov>=4.0.0",
]
setup(
name="turboquantdc",
version="0.3.0",
author="TurboQuantDC Contributors",
description="TurboQuant: 3-bit KV cache compression for LLMs with <0.5% attention quality loss",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/turboquantdc/turboquantdc",
packages=find_packages(exclude=["tests*", "benchmarks*", "reference*", "docs*", "warroom*"]),
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
license="MIT",
python_requires=">=3.10",
install_requires=_base,
extras_require={
"base": _base,
"hf": _hf,
"bnb": _bnb,
"triton": _triton,
"faiss": _faiss,
"benchmark": _benchmark,
"all": _base + _hf + _bnb + _triton + _faiss + _benchmark,
"dev": _dev,
},
keywords=[
"llm", "kv-cache", "quantization", "compression",
"transformer", "attention", "cuda", "pytorch",
],
)