-
Notifications
You must be signed in to change notification settings - Fork 69
Open
Milestone
Description
Track the issues when adding support for this.
- All experts were contacted as one
FP8Expert, which caused all weights to materialize immediately at loading stage. This breaks the previous behavior where weights were kept in disk until quantization. -
FP8Expertfailed during its forward pass
File "/home/yiliu7/workspace/auto-round/auto_round/compressors/base.py", line 1668, in _get_block_outputs
tmp_output = self.block_forward(
^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/auto-round/auto_round/compressors/utils.py", line 116, in block_forward
output = block(input_ids, *input_tuple, **input_others)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/transformers/src/transformers/modeling_layers.py", line 94, in __call__
return super().__call__(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/vllm-dev/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/vllm-dev/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/transformers/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py", line 531, in forward
hidden_states = self.mlp(hidden_states)
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/vllm-dev/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/vllm-dev/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/transformers/src/transformers/models/deepseek_v3/modeling_deepseek_v3.py", line 244, in forward
hidden_states = self.experts(hidden_states, topk_indices, topk_weights).view(*orig_shape)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/vllm-dev/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/vllm-dev/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/yiliu7/workspace/transformers/src/transformers/integrations/finegrained_fp8.py", line 546, in forward
expert_hit = torch.greater(expert_mask.sum(dim=(-1, -2)), 0).nonzero()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
torch.AcceleratorError: CUDA error: device-side assert triggered