diff --git a/models/tabformer_bert.py b/models/tabformer_bert.py index bc7c0ca..def5573 100644 --- a/models/tabformer_bert.py +++ b/models/tabformer_bert.py @@ -2,9 +2,9 @@ from torch import nn from torch.nn import CrossEntropyLoss -from transformers.modeling_bert import ACT2FN, BertLayerNorm -from transformers.modeling_bert import BertForMaskedLM -from transformers.configuration_bert import BertConfig +from transformers.models.bert.modeling_bert import ACT2FN +from transformers.models.bert.modeling_bert import BertForMaskedLM +from transformers.models.bert.configuration_bert import BertConfig from models.custom_criterion import CustomAdaptiveLogSoftmax @@ -37,7 +37,7 @@ def __init__(self, config): self.transform_act_fn = ACT2FN[config.hidden_act] else: self.transform_act_fn = config.hidden_act - self.LayerNorm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps) + self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps) def forward(self, hidden_states): hidden_states = self.dense(hidden_states) @@ -190,4 +190,4 @@ def forward( sequence_output = outputs[0] # [bsz * seqlen * hidden] - return sequence_output \ No newline at end of file + return sequence_output diff --git a/models/tabformer_gpt2.py b/models/tabformer_gpt2.py index ef814cb..66665bc 100644 --- a/models/tabformer_gpt2.py +++ b/models/tabformer_gpt2.py @@ -1,6 +1,6 @@ from torch.nn import CrossEntropyLoss -from transformers.modeling_gpt2 import GPT2LMHeadModel +from transformers.models.gpt2.modeling_gpt2 import GPT2LMHeadModel class TabFormerGPT2LMHeadModel(GPT2LMHeadModel):