Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions configs/hf_model_configs/configs/roberta_base.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"architectures": [
"RobertaForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "roberta",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 1,
"type_vocab_size": 1,
"vocab_size": 50265
}
21 changes: 21 additions & 0 deletions configs/hf_model_configs/configs/roberta_large.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"architectures": [
"RobertaForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "roberta",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"pad_token_id": 1,
"type_vocab_size": 1,
"vocab_size": 50265
}
21 changes: 21 additions & 0 deletions configs/hf_model_configs/configs/roberta_medium.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"architectures": [
"RobertaForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 512,
"initializer_range": 0.02,
"intermediate_size": 2048,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "roberta",
"num_attention_heads": 8,
"num_hidden_layers": 8,
"pad_token_id": 1,
"type_vocab_size": 1,
"vocab_size": 50265
}
21 changes: 21 additions & 0 deletions configs/hf_model_configs/configs/roberta_mini.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"architectures": [
"RobertaForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 256,
"initializer_range": 0.02,
"intermediate_size": 1024,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "roberta",
"num_attention_heads": 4,
"num_hidden_layers": 4,
"pad_token_id": 1,
"type_vocab_size": 1,
"vocab_size": 50265
}
21 changes: 21 additions & 0 deletions configs/hf_model_configs/configs/roberta_small.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"architectures": [
"RobertaForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 512,
"initializer_range": 0.02,
"intermediate_size": 2048,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "roberta",
"num_attention_heads": 8,
"num_hidden_layers": 4,
"pad_token_id": 1,
"type_vocab_size": 1,
"vocab_size": 50265
}
21 changes: 21 additions & 0 deletions configs/hf_model_configs/configs/roberta_tiny.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"architectures": [
"RobertaForMaskedLM"
],
"attention_probs_dropout_prob": 0.1,
"bos_token_id": 0,
"eos_token_id": 2,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 128,
"initializer_range": 0.02,
"intermediate_size": 512,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 514,
"model_type": "roberta",
"num_attention_heads": 2,
"num_hidden_layers": 2,
"pad_token_id": 1,
"type_vocab_size": 1,
"vocab_size": 50265
}
116 changes: 116 additions & 0 deletions src/chop/nn/quantizers/SNN/LSQ.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ def floor_pass(x):
return (y - y_grad).detach() + y_grad


# ========================================================================================================
# SNN quantization from SpikeZIP-TF
# ========================================================================================================
class LSQInteger(nn.Module):
"""
LSQInteger is a PyTorch module for Learned Step Size Quantization (LSQ) with integer levels.
Expand Down Expand Up @@ -140,3 +143,116 @@ def forward(self, x):
self.global_step = 0.0

return output


# ========================================================================================================
# SNN quantization from SpikeLM
# ========================================================================================================
class AlphaInit(nn.Parameter):
def __init__(self, tensor, requires_grad=True):
super(AlphaInit, self).__new__(
nn.Parameter, data=tensor, requires_grad=requires_grad
)
self.initialized = False

def _initialize(self, init_tensor):
assert not self.initialized, "already initialized."
self.data.copy_(init_tensor)
self.initialized = True

def initialize_wrapper(self, tensor, num_bits, symmetric, init_method="default"):
Qp = 2 ** (num_bits - 1) - 1 if symmetric else 2 ** (num_bits) - 1
if Qp == 0:
Qp = 1.0
if init_method == "default":
init_val = (
2 * tensor.abs().mean() / math.sqrt(Qp)
if symmetric
else 4 * tensor.abs().mean() / math.sqrt(Qp)
)
elif init_method == "uniform":
init_val = 1.0 / (2 * Qp + 1) if symmetric else 1.0 / Qp

self._initialize(init_val)


class ElasticBiSpiking(torch.autograd.Function):
"""
Modified from Learned Step-size Quantization.
https://arxiv.org/abs/1902.08153
"""

@staticmethod
def forward(ctx, input, alpha, num_bits, layerwise):
"""
:param input: input to be quantized
:param alpha: the step size
:param num_bits: quantization bits
:param layerwise: rowwise quant
:return: quantized output
"""
if not layerwise:
# TODO
raise NotImplementedError
ctx.num_bits = num_bits
if num_bits == 32:
return input
elif num_bits == 1 or num_bits == 2:
Qn = -1
Qp = 1

eps = torch.tensor(0.00001).float().to(alpha.device)
if alpha.item() == 1.0 and (not alpha.initialized):
alpha.initialize_wrapper(
input, num_bits, symmetric=True, init_method="default"
)
alpha = torch.where(alpha > eps, alpha, eps)
assert alpha > 0, "alpha = {:.6f} becomes non-positive".format(alpha)

grad_scale = (
1.0 / math.sqrt(input.numel())
if not Qp
else 1.0 / math.sqrt(input.numel() * Qp)
)
ctx.save_for_backward(input, alpha)
ctx.other = grad_scale, Qn, Qp
if num_bits == 1:
q_w = input.sign() ################################## binary
else:
q_w = (input / alpha).round().clamp(Qn, Qp) ###################### ternary
w_q = q_w * alpha
return w_q

@staticmethod
def backward(ctx, grad_output):
if ctx.num_bits == 32:
return grad_output, None, None, None

input_, alpha = ctx.saved_tensors
grad_scale, Qn, Qp = ctx.other
q_w = input_ / alpha
indicate_small = (q_w < Qn).float()
indicate_big = (q_w > Qp).float()
indicate_middle = (
1.0 - indicate_small - indicate_big
) # this is more cpu-friendly than torch.ones(input_.shape)
if ctx.num_bits == 1:
grad_alpha = (
((input_.sign()) * grad_output * grad_scale).sum().unsqueeze(dim=0)
)
else:
grad_alpha = (
(
(
indicate_small * Qn
+ indicate_big * Qp
+ indicate_middle * (-q_w + q_w.round())
)
* grad_output
* grad_scale
)
.sum()
.unsqueeze(dim=0)
)
grad_input = indicate_middle * grad_output
return grad_input, grad_alpha, None, None
5 changes: 4 additions & 1 deletion src/chop/nn/snn/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from .conv3d import Conv3d

from .linear import Linear, LinearUnfoldBias
from .linear import Linear, LinearUnfoldBias, LinearElasticBiSpiking

from .pool1d import MaxPool1d, AvgPool1d, AdaptiveAvgPool1d

Expand Down Expand Up @@ -62,6 +62,7 @@
from .embedding import EmbeddingZIPTF
from .roberta import (
RobertaSelfAttentionZIPTF,
RobertaSelfAttentionSpikeLM,
)

spiking_basic_module_map = {
Expand All @@ -70,6 +71,7 @@
"conv3d": Conv3d,
"linear": Linear,
"linear_unfold_bias": LinearUnfoldBias,
"linear_elastic_bi_spiking": LinearElasticBiSpiking,
"max_pool1d": MaxPool1d,
"avg_pool1d": AvgPool1d,
"adaptive_avg_pool1d": AdaptiveAvgPool1d,
Expand Down Expand Up @@ -105,6 +107,7 @@

spiking_roberta_module_map = {
"roberta_self_attention_zip_tf": RobertaSelfAttentionZIPTF,
"roberta_self_attention_spikeLM": RobertaSelfAttentionSpikeLM,
}

spiking_module_map = {
Expand Down
65 changes: 65 additions & 0 deletions src/chop/nn/snn/modules/linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import chop.nn.snn.base as base
import torch

from chop.nn.quantizers.SNN.LSQ import AlphaInit, ElasticBiSpiking


class Linear(nn.Linear, base.StepModule):
def __init__(
Expand Down Expand Up @@ -105,3 +107,66 @@ def forward(self, input):
self.first = False

return output


class LinearElasticBiSpiking(nn.Linear):
def __init__(
self,
in_features: int,
out_features: int,
bias: bool = True,
device=None,
dtype=None,
symmetric=True,
config=None,
) -> None:
super().__init__(
in_features,
out_features,
bias,
device,
dtype,
)
# NOTE: dead code from the original implementation (maybe useful in future reference)
# self.weight_bits = config["weight_bits"]
# self.quantize_act = config["quantize_act"]
# self.register_buffer('weight_clip_val', torch.tensor([config["clip_val"]]))
# self.input_bits = config["input_bits"]

self.T = config["T"]
self.act_clip_val = nn.ParameterList(
[AlphaInit(torch.tensor(1.0), requires_grad=False) for i in range(self.T)]
)
self.act_quantizer = ElasticBiSpiking

def forward(self, input):
# quantize weight
assert len(self.weight.size()) == 2

weight = self.weight
mem = torch.zeros_like(input[0]).cuda()
output = torch.zeros_like(input).cuda()
mem_old = 0
for i in range(self.T):
if i == 0:
mem = input[0]
else:
# v = beta * mem_old (alpha - spike) + v_reset(which is 0) + input
mem = (
mem_old
* 0.25
* (self.act_clip_val[i - 1].detach() - output[i - 1].detach())
+ input[i]
)

# spike
output[i] = self.act_quantizer.apply(
mem, self.act_clip_val[i], self.input_bits, True
)
mem_old = mem.clone()

out = nn.functional.linear(output, weight)
if not self.bias is None:
out += self.bias.view(1, -1).expand_as(out)

return out
2 changes: 1 addition & 1 deletion src/chop/nn/snn/modules/roberta/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .attention import RobertaSelfAttentionZIPTF
from .attention import RobertaSelfAttentionZIPTF, RobertaSelfAttentionSpikeLM
Loading
Loading