Skip to content

Commit 9febc5c

Browse files
authored
Merge pull request #3 from jleechung/OLMo
Fix to OLMo 2 normalization
2 parents 9032fe7 + 688a421 commit 9febc5c

1 file changed

Lines changed: 27 additions & 2 deletions

File tree

transformer_lens/loading_from_pretrained.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,14 @@
279279
"allenai/OLMoE-1B-7B-0924",
280280
"allenai/OLMoE-1B-7B-0924-SFT",
281281
"allenai/OLMoE-1B-7B-0924-Instruct",
282+
"allenai/OLMo-2-0425-1B",
283+
"allenai/OLMo-2-0425-1B-SFT",
284+
"allenai/OLMo-2-0425-1B-DPO",
285+
"allenai/OLMo-2-0425-1B-Instruct",
282286
"allenai/OLMo-2-1124-7B",
287+
"allenai/OLMo-2-1124-7B-SFT",
288+
"allenai/OLMo-2-1124-7B-DPO",
289+
"allenai/OLMo-2-1124-7B-Instruct",
283290
]
284291
"""Official model names for models on HuggingFace."""
285292

@@ -1616,7 +1623,25 @@ def convert_hf_model_config(model_name: str, **kwargs: Any):
16161623
"positional_embedding_type": "rotary",
16171624
"gated_mlp": True,
16181625
}
1619-
elif official_model_name == "allenai/OLMo-2-1124-7B":
1626+
elif official_model_name.startswith("allenai/OLMo-2-0425-1B"):
1627+
cfg_dict = {
1628+
"d_model": 2048,
1629+
"d_head": 128,
1630+
"n_heads": 16,
1631+
"d_mlp": 8192,
1632+
"n_layers": 16,
1633+
"n_ctx": 4096,
1634+
"eps": 1e-06,
1635+
"d_vocab": 100352,
1636+
"act_fn": "silu",
1637+
"initializer_range": 0.02,
1638+
"normalization_type": "RMS",
1639+
"rotary_base": 500000.0,
1640+
"attn_types": ["global"] * 16,
1641+
"positional_embedding_type": "rotary",
1642+
"gated_mlp": True,
1643+
}
1644+
elif official_model_name.startswith("allenai/OLMo-2-1124-7B"):
16201645
cfg_dict = {
16211646
"d_model": 4096,
16221647
"d_head": 128,
@@ -1628,7 +1653,7 @@ def convert_hf_model_config(model_name: str, **kwargs: Any):
16281653
"d_vocab": 100352,
16291654
"act_fn": "silu",
16301655
"initializer_range": 0.02,
1631-
"normalization_type": "RMSPre",
1656+
"normalization_type": "RMS",
16321657
"rotary_base": 500000.0,
16331658
"attn_types": ["global"] * 32,
16341659
"positional_embedding_type": "rotary",

0 commit comments

Comments
 (0)