Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
130 commits
Select commit Hold shift + click to select a range
38bcedc
Changed the directory for hardware components
Jul 10, 2024
4676c68
clear original files
Jul 10, 2024
5854c66
Make clean new directory
Jul 10, 2024
f4254ff
Updated some paths to pass existing tests
Jul 10, 2024
58949d2
fixing doc when gpu is available (troubleshooting deepspeed install),
Aaron-Zhao123 Jul 10, 2024
97dcb37
refactor file strcuture and let mase runner to support group/subgroup…
Aaron-Zhao123 Jul 11, 2024
6aea3fe
refactor file strcuture and let mase runner to support group/subgroup…
Aaron-Zhao123 Jul 11, 2024
4017be3
refactor file strcuture and let mase runner to support group/subgroup…
Aaron-Zhao123 Jul 11, 2024
c1dd5cf
fix deps
Aaron-Zhao123 Jul 11, 2024
46be531
reformating
Aaron-Zhao123 Jul 11, 2024
9299575
change linear smoke to an easy case, notice when bias=True, test benc…
Aaron-Zhao123 Jul 11, 2024
4b44431
fix verilog gen for sw test
Aaron-Zhao123 Jul 11, 2024
5af716a
formating
Aaron-Zhao123 Jul 11, 2024
3efea49
fix typo...
Aaron-Zhao123 Jul 11, 2024
0a6e5e3
convolution
firemountain154B Jul 12, 2024
820bfe1
fix memory
Aaron-Zhao123 Jul 12, 2024
6c4e841
resolve conflict in justfile
Aaron-Zhao123 Jul 12, 2024
19ea785
formating
Aaron-Zhao123 Jul 12, 2024
ea0faa9
Added updated register_slice and syntax-correct testbench
Jul 13, 2024
43e782f
Fixed register slice
Jul 13, 2024
722b732
Fixed linting in common. Also removed mac op
Jul 13, 2024
c8f32c4
Added to justfile
Jul 13, 2024
63ef8f9
reforming all hw test
Aaron-Zhao123 Jul 14, 2024
30fae61
fix just conflict
Aaron-Zhao123 Jul 14, 2024
a5662d6
input buffer commented
Aaron-Zhao123 Jul 14, 2024
a794063
lint float ignored for now
Aaron-Zhao123 Jul 14, 2024
0b8ffc8
finish fixed softmax
firemountain154B Jul 17, 2024
d248bb6
merge
firemountain154B Jul 17, 2024
a59d467
add softmax into attention
firemountain154B Jul 23, 2024
210369a
conv timing
firemountain154B Jul 23, 2024
22c7a3b
finish softmax timing
firemountain154B Jul 23, 2024
954cb24
merge
firemountain154B Jul 24, 2024
064b8fc
Merge branch 'main' into cx/IntViT
firemountain154B Jul 25, 2024
c361c75
add mult func into div
firemountain154B Jul 27, 2024
55a0e7a
debug fixed attention
firemountain154B Jul 27, 2024
e2fc9bf
finish testing
firemountain154B Jul 30, 2024
d2b7a47
a small bug on linear
firemountain154B Jul 30, 2024
aa0bfd3
linear layer data match, 1 integer_floor in software, 2 release out q…
firemountain154B Jul 30, 2024
d396f01
add floor arg in softmax, lut_generation
firemountain154B Jul 30, 2024
17bedf8
add floor arg in preprocess_tensor func
firemountain154B Jul 30, 2024
a9fa443
finish attention head floor
firemountain154B Jul 30, 2024
d00e3a1
finish mha without proj
firemountain154B Jul 31, 2024
76a89e8
finish attention
firemountain154B Aug 1, 2024
cfb0d58
finish layernorm
firemountain154B Aug 2, 2024
b891c38
finish fixed small modules on hardware
firemountain154B Aug 13, 2024
417a8b2
finish verilog generating
firemountain154B Aug 13, 2024
8aea4fc
for testting
firemountain154B Aug 15, 2024
20d123e
fixed emit verilog linear and match the output
firemountain154B Aug 15, 2024
7b4c0f4
add integer floor as a independent quantization
firemountain154B Aug 16, 2024
ecbdc1e
finish gelu
firemountain154B Aug 16, 2024
887ada7
finish MLP
firemountain154B Aug 16, 2024
ebccfc4
add layernorm software
firemountain154B Aug 19, 2024
af18053
add to layernorm to functional
firemountain154B Aug 19, 2024
46797f0
support dim1 in layernorm
firemountain154B Aug 19, 2024
1227f2a
finish auto generating Layernorm
firemountain154B Aug 19, 2024
c36b0b1
finish supporting layer_norm_affine
firemountain154B Aug 21, 2024
895150b
bias to has_bias for autogenerating
firemountain154B Aug 21, 2024
4bf2588
finish affine layernorm
firemountain154B Aug 21, 2024
255134e
finish fork add
firemountain154B Aug 25, 2024
737a883
add graph level quantization for vitattention
firemountain154B Aug 26, 2024
cd8921f
ready to pull
firemountain154B Aug 28, 2024
d2970a6
formating
firemountain154B Aug 28, 2024
cf00095
Merge branch 'main' into cx/IntViT
firemountain154B Aug 28, 2024
cfaeb4e
small bugs
firemountain154B Aug 28, 2024
8d182b3
..
firemountain154B Aug 28, 2024
c048a74
..
firemountain154B Aug 28, 2024
beb9991
finish linear testing
firemountain154B Aug 31, 2024
20eec14
finish block autogenerating
firemountain154B Sep 1, 2024
a3602eb
small bugs on layernorm
firemountain154B Sep 2, 2024
c0b8227
adding tranpose weight func(default) and insert fifo pass for fork pass
firemountain154B Sep 3, 2024
987be8d
finish throughput pass
firemountain154B Sep 3, 2024
65dbc53
timing and reuse
firemountain154B Sep 17, 2024
1d8fa01
finished mxint_matmul
firemountain154B Sep 25, 2024
050a27a
modified justfile
firemountain154B Sep 25, 2024
149a1c7
bugs on MXInt
firemountain154B Oct 1, 2024
a5210a8
finished mxint linear
firemountain154B Oct 2, 2024
03aed22
updated just file
firemountain154B Oct 2, 2024
3acaa7e
Merge branch 'main' into cx/MxInt
firemountain154B Oct 2, 2024
597ee09
repack code
firemountain154B Oct 2, 2024
4da5752
optimize block max
firemountain154B Oct 4, 2024
49f8ba9
..
firemountain154B Oct 4, 2024
b6619c3
finish modification
firemountain154B Oct 6, 2024
8128a27
finish linear timing 200 MHz
firemountain154B Oct 6, 2024
2f1bcc4
Merge remote-tracking branch 'origin/cx/MxInt' into cx/IntViT
firemountain154B Oct 6, 2024
814b5b2
finish ..
firemountain154B Oct 6, 2024
461fd5b
finish vit attention
firemountain154B Oct 8, 2024
a0611cb
finish mxint software quantization
firemountain154B Oct 8, 2024
c5800fe
forget something ..
firemountain154B Oct 8, 2024
c9ceee5
Merge branch 'cx/MxInt' into cx/IntViT
firemountain154B Oct 8, 2024
e599f77
finish accumulator large resource utilizaiton
firemountain154B Oct 20, 2024
9f2a767
new cast
firemountain154B Oct 20, 2024
8531033
matmul bug
firemountain154B Oct 20, 2024
028866a
finish mxint linear
firemountain154B Oct 22, 2024
ea2635a
finish mxint again
firemountain154B Oct 22, 2024
8a39ea1
forget somethign
firemountain154B Oct 22, 2024
68d137d
optimzed mxint Linear
firemountain154B Oct 23, 2024
128a573
merge mxint_linear
firemountain154B Oct 23, 2024
867730f
finish mxint_linear
firemountain154B Oct 24, 2024
e94263a
format again
firemountain154B Oct 24, 2024
97e2b81
mxint linear
firemountain154B Oct 24, 2024
d351017
bug on mxint_cast
firemountain154B Oct 25, 2024
1018b3f
gogogo
firemountain154B Oct 25, 2024
6bd0623
partially update fixed_softmax
firemountain154B Oct 27, 2024
ab4d1f6
fixed_softmax
firemountain154B Oct 27, 2024
4a66bce
gogogo
firemountain154B Oct 27, 2024
5a486e0
format generated lut|
firemountain154B Oct 27, 2024
e5bb347
debug gelu
firemountain154B Oct 27, 2024
5129c39
softmax
firemountain154B Oct 27, 2024
a1ddb10
finish hardware round sv
firemountain154B Nov 5, 2024
e89ba67
mxint range reduction
firemountain154B Nov 7, 2024
0692a28
finish mxint softmax
firemountain154B Nov 10, 2024
8104dc6
format error
firemountain154B Nov 10, 2024
0877563
Merge branch 'cx/mxint_softmax' into cx/IntViT
firemountain154B Nov 10, 2024
b31e837
mxint_int_vit_head
firemountain154B Nov 12, 2024
a47b8a4
finish logic test of mxint_vit_head
firemountain154B Nov 12, 2024
7336b21
finish block
firemountain154B Nov 13, 2024
c99ecd6
mxint_gelu
firemountain154B Nov 15, 2024
63fd90b
finish mxint layernorm
firemountain154B Nov 20, 2024
3065094
several update
firemountain154B Dec 5, 2024
3f49e8e
update
firemountain154B Dec 10, 2024
1954bda
move to beholder0
firemountain154B Jan 4, 2025
42522df
file for storage
firemountain154B Feb 23, 2025
32f56d0
..
firemountain154B Feb 23, 2025
a0308fe
gogo
firemountain154B Feb 23, 2025
8385dad
new update
firemountain154B Mar 11, 2025
aa58cf4
clean up something
firemountain154B Mar 11, 2025
933da04
finish clean up
firemountain154B Mar 11, 2025
d417bde
save a important code
firemountain154B Mar 11, 2025
abb003f
disable submodules
firemountain154B Apr 22, 2025
caa30aa
..
firemountain154B Apr 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
13 changes: 13 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,19 @@ build-docker:
docker pull $(img); \
fi

build-docker-python13:
docker build --build-arg VHLS_PATH=$(vhls) --build-arg VHLS_VERSION=$(vhls_version) -f Docker/Dockerfile-$(PLATFORM)-python13 --tag mase-ubuntu2204-docker-python13 Docker; \

shell-python13:
docker run -it --shm-size 256m \
--hostname mase-ubuntu2204-docker-python13 \
-w /workspace \
-v /$(USER_PREFIX)/$(shell whoami)/.gitconfig:/root/.gitconfig \
-v /$(USER_PREFIX)/$(shell whoami)/.ssh:/root/.ssh \
-v /$(USER_PREFIX)/$(shell whoami)/.mase:/root/.mase:z \
-v $(shell pwd):/workspace:z \
$(DOCKER_RUN_EXTRA_ARGS) \
$(img) /bin/bash
shell:
docker run -it --shm-size 256m \
--hostname mase-ubuntu2204 \
Expand Down
86 changes: 86 additions & 0 deletions a_cx_mxint_quant/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from .module_level_tranform import vit_module_level_quantize
from .quantizers import mxint_hardware, mxint_quant_block

from .linear import MXIntLinear
from .attention import MXIntAttention
from .module_level_tranform import MXIntLayerNorm, MXIntGELU
from .modules import MXIntPatchEmbed, MXIntAddition
from mase_components import get_module_dependencies
VIT_CUSTOM_OPS = {
"modules": {
MXIntPatchEmbed: {
"args": {
"data_in": "data_in",
"q_config": "config",
},
"toolchain": "INTERNAL_RTL",
"module": "mxint_patch_embed",
"dependence_files": get_module_dependencies(
"linear_layers/mxint_operators/mxint_patch_embed"
),
},
MXIntAttention: {
"args": {
"data_in": "data_in",
"dim": "config",
"num_heads": "config",
"qkv_bias": "config",
"qk_norm": None,
"attn_drop": None,
"proj_drop": None,
"norm_layer": None,
"q_config": "config",
},
"toolchain": "INTERNAL_RTL",
"module": "mxint_vit_attention_wrap",
"dependence_files": get_module_dependencies(
"linear_layers/mxint_operators/mxint_vit_attention_wrap"
),
},
MXIntLayerNorm: {
"args": {
"data_in": "data_in",
"q_config": "config",
},
"toolchain": "INTERNAL_RTL",
"module": "mxint_layernorm",
"dependence_files": get_module_dependencies(
"linear_layers/mxint_operators/mxint_layernorm"
),
},
MXIntGELU: {
"args": {
"data_in": "data_in",
"q_config": "config",
},
"toolchain": "INTERNAL_RTL",
"module": "mxint_gelu",
"dependence_files": get_module_dependencies(
"linear_layers/mxint_operators/mxint_gelu"
),
},
MXIntLinear: {
"args": {
"data_in": "data_in",
"q_config": "config",
},
"toolchain": "INTERNAL_RTL",
"module": "mxint_linear",
"dependence_files": get_module_dependencies(
"linear_layers/mxint_operators/mxint_linear"
),
},
MXIntAddition: {
"args": {
"input_0": "data_in",
"input_1": "data_in",
"q_config": "config",
},
"toolchain": "INTERNAL_RTL",
"module": "mxint_addition",
"dependence_files": get_module_dependencies(
"linear_layers/mxint_operators/mxint_addition"
),
},
},
}
192 changes: 192 additions & 0 deletions a_cx_mxint_quant/attention.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
from functools import partial

import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import functional as F

from .attention_head import _ViTSelfAttentionHeadBase, ViTSelfAttentionHeadInteger

from chop.nn.quantized.modules.linear import (
LinearInteger,
)
from chop.nn.quantized.functional import fixed_softermax
from chop.nn.quantizers import integer_quantizer
from chop.nn.quantized.functional import matmul_integer

from typing import Optional, Tuple, Union

from .linear import MXIntLinear
from .attention_head import MXIntViTAttentionHead

class _ViTAttentionBase(nn.Module):
def __init__(
self,
dim: int,
num_heads: int = 8,
qkv_bias: bool = False,
qk_norm: bool = False,
attn_drop: float = 0.0,
proj_drop: float = 0.0,
) -> None:
super().__init__()
assert dim % num_heads == 0, "dim should be divisible by num_heads"
self.dim = dim
self.num_heads = num_heads
self.head_dim = dim // num_heads
self.query = nn.Linear(dim, dim, bias=qkv_bias)
self.key = nn.Linear(dim, dim, bias=qkv_bias)
self.value = nn.Linear(dim, dim, bias=qkv_bias)
self.self_attention = _ViTSelfAttentionHeadBase(
dim=self.head_dim, num_heads=num_heads, attn_drop=attn_drop
)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)

def forward(self, x: torch.Tensor) -> torch.Tensor:
B, N, C = x.shape

def _tensor_reshape(x):
return x.reshape(B, -1, self.num_heads, self.head_dim).permute(0, 2, 1, 3)

q, k, v = (
_tensor_reshape(self.query(x)),
_tensor_reshape(self.key(x)),
_tensor_reshape(self.value(x)),
)
x = self.self_attention(q, k, v)
x = x.transpose(1, 2).reshape(B, N, C)

x = self.proj(x)
x = self.proj_drop(x)
return x

class ViTAttentionInteger(_ViTAttentionBase):
def __init__(
self,
dim: int,
num_heads: int = 8,
qkv_bias: bool = False,
qk_norm: bool = False,
attn_drop: float = 0.0,
proj_drop: float = 0.0,
norm_layer: nn.Module = nn.LayerNorm,
q_config: dict = None,
floor=True,
) -> None:
super().__init__(dim, num_heads, qkv_bias, qk_norm, attn_drop, proj_drop)
self.q_config = q_config
self.query = LinearInteger(
dim,
dim,
bias=qkv_bias,
config={
"data_in_width": q_config["data_in_width"],
"data_in_frac_width": q_config["data_in_frac_width"],
"weight_width": q_config["qkv_weight_width"],
"weight_frac_width": q_config["qkv_weight_frac_width"],
"bias_width": q_config["qkv_bias_width"],
"bias_frac_width": q_config["qkv_bias_frac_width"],
},
out_config={
"data_out_width": q_config["qkv_width"],
"data_out_frac_width": q_config["qkv_frac_width"],
},
floor=floor,
)
self.key = LinearInteger(
dim,
dim,
bias=qkv_bias,
config={
"data_in_width": q_config["data_in_width"],
"data_in_frac_width": q_config["data_in_frac_width"],
"weight_width": q_config["qkv_weight_width"],
"weight_frac_width": q_config["qkv_weight_frac_width"],
"bias_width": q_config["qkv_bias_width"],
"bias_frac_width": q_config["qkv_bias_frac_width"],
},
out_config={
"data_out_width": q_config["qkv_width"],
"data_out_frac_width": q_config["qkv_frac_width"],
},
floor=floor,
)
self.value = LinearInteger(
dim,
dim,
bias=qkv_bias,
config={
"data_in_width": q_config["data_in_width"],
"data_in_frac_width": q_config["data_in_frac_width"],
"weight_width": q_config["qkv_weight_width"],
"weight_frac_width": q_config["qkv_weight_frac_width"],
"bias_width": q_config["qkv_bias_width"],
"bias_frac_width": q_config["qkv_bias_frac_width"],
},
out_config={
"data_out_width": q_config["qkv_width"],
"data_out_frac_width": q_config["qkv_frac_width"],
},
floor=floor,
)
self.self_attention = ViTSelfAttentionHeadInteger(
dim=self.head_dim,
num_heads=num_heads,
attn_drop=attn_drop,
q_config={
"query_width": q_config["qkv_width"],
"query_frac_width": q_config["qkv_frac_width"],
"key_width": q_config["qkv_width"],
"key_frac_width": q_config["qkv_frac_width"],
"value_width": q_config["qkv_width"],
"value_frac_width": q_config["qkv_frac_width"],
"qkmm_out_width": q_config["qkmm_out_width"],
"qkmm_out_frac_width": q_config["qkmm_out_frac_width"],
"softmax_exp_width": q_config["softmax_exp_width"],
"softmax_exp_frac_width": q_config["softmax_exp_frac_width"],
"softmax_out_frac_width": q_config["softmax_out_frac_width"],
"svmm_out_width": q_config["svmm_out_width"],
"svmm_out_frac_width": q_config["svmm_out_frac_width"],
},
floor=floor,
)
self.proj = LinearInteger(
dim,
dim,
config={
"data_in_width": q_config["svmm_out_width"],
"data_in_frac_width": q_config["svmm_out_frac_width"],
"weight_width": q_config["proj_weight_width"],
"weight_frac_width": q_config["proj_weight_frac_width"],
"bias_width": q_config["proj_bias_width"],
"bias_frac_width": q_config["proj_bias_frac_width"],
},
out_config={
"data_out_width": q_config["data_out_width"],
"data_out_frac_width": q_config["data_out_frac_width"],
},
floor=floor,
)

class MXIntAttention(_ViTAttentionBase):
def __init__(
self,
dim: int,
num_heads: int = 8,
qkv_bias: bool = False,
qk_norm: bool = False,
attn_drop: float = 0.0,
proj_drop: float = 0.0,
q_config: dict = None,
) -> None:
super().__init__(dim, num_heads, qkv_bias, qk_norm, attn_drop, proj_drop)
self.q_config = q_config

# Replace attention with MXIntViTAttentionHead
# self.self_attention = MXIntViTAttentionHead(
# dim=self.head_dim,
# num_heads=num_heads,
# attn_drop=attn_drop,
# q_config=q_config
# )
Loading