279279 "allenai/OLMoE-1B-7B-0924" ,
280280 "allenai/OLMoE-1B-7B-0924-SFT" ,
281281 "allenai/OLMoE-1B-7B-0924-Instruct" ,
282+ "allenai/OLMo-2-0425-1B" ,
283+ "allenai/OLMo-2-0425-1B-SFT" ,
284+ "allenai/OLMo-2-0425-1B-DPO" ,
285+ "allenai/OLMo-2-0425-1B-Instruct" ,
282286 "allenai/OLMo-2-1124-7B" ,
287+ "allenai/OLMo-2-1124-7B-SFT" ,
288+ "allenai/OLMo-2-1124-7B-DPO" ,
289+ "allenai/OLMo-2-1124-7B-Instruct" ,
283290]
284291"""Official model names for models on HuggingFace."""
285292
@@ -1616,7 +1623,25 @@ def convert_hf_model_config(model_name: str, **kwargs: Any):
16161623 "positional_embedding_type" : "rotary" ,
16171624 "gated_mlp" : True ,
16181625 }
1619- elif official_model_name == "allenai/OLMo-2-1124-7B" :
1626+ elif official_model_name .startswith ("allenai/OLMo-2-0425-1B" ):
1627+ cfg_dict = {
1628+ "d_model" : 2048 ,
1629+ "d_head" : 128 ,
1630+ "n_heads" : 16 ,
1631+ "d_mlp" : 8192 ,
1632+ "n_layers" : 16 ,
1633+ "n_ctx" : 4096 ,
1634+ "eps" : 1e-06 ,
1635+ "d_vocab" : 100352 ,
1636+ "act_fn" : "silu" ,
1637+ "initializer_range" : 0.02 ,
1638+ "normalization_type" : "RMS" ,
1639+ "rotary_base" : 500000.0 ,
1640+ "attn_types" : ["global" ] * 16 ,
1641+ "positional_embedding_type" : "rotary" ,
1642+ "gated_mlp" : True ,
1643+ }
1644+ elif official_model_name .startswith ("allenai/OLMo-2-1124-7B" ):
16201645 cfg_dict = {
16211646 "d_model" : 4096 ,
16221647 "d_head" : 128 ,
@@ -1628,7 +1653,7 @@ def convert_hf_model_config(model_name: str, **kwargs: Any):
16281653 "d_vocab" : 100352 ,
16291654 "act_fn" : "silu" ,
16301655 "initializer_range" : 0.02 ,
1631- "normalization_type" : "RMSPre " ,
1656+ "normalization_type" : "RMS " ,
16321657 "rotary_base" : 500000.0 ,
16331658 "attn_types" : ["global" ] * 32 ,
16341659 "positional_embedding_type" : "rotary" ,
0 commit comments