diff --git a/transformer_lens/model_bridge/supported_architectures/phi3.py b/transformer_lens/model_bridge/supported_architectures/phi3.py index f47f7c65f..f365a2bfb 100644 --- a/transformer_lens/model_bridge/supported_architectures/phi3.py +++ b/transformer_lens/model_bridge/supported_architectures/phi3.py @@ -22,6 +22,7 @@ JointQKVPositionEmbeddingsAttentionBridge, LinearBridge, RMSNormalizationBridge, + RotaryEmbeddingBridge, UnembeddingBridge, ) @@ -101,6 +102,7 @@ def __init__(self, cfg: Any) -> None: # Set up component mapping self.component_mapping = { "embed": EmbeddingBridge(name="model.embed_tokens"), + "rotary_emb": RotaryEmbeddingBridge(name="model.rotary_emb"), "blocks": BlockBridge( name="model.layers", submodules={ @@ -207,6 +209,23 @@ def _split_phi3_qkv( return q_linear, k_linear, v_linear + def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> None: + """Set up rotary embedding references for Phi-3 component testing. + + Args: + hf_model: The HuggingFace Phi-3 model instance + bridge_model: The TransformerBridge model (if available) + """ + rotary_emb = hf_model.model.rotary_emb + + if bridge_model is not None and hasattr(bridge_model, "blocks"): + for block in bridge_model.blocks: + if hasattr(block, "attn"): + block.attn.set_rotary_emb(rotary_emb) + + attn_bridge = self.get_generalized_component("blocks.0.attn") + attn_bridge.set_rotary_emb(rotary_emb) + def prepare_loading(self, model_name: str, model_kwargs: dict) -> None: """Patch cached Phi-3 remote code for transformers v5 compatibility.""" uses_remote_code = model_kwargs.get("trust_remote_code", False) diff --git a/transformer_lens/tools/model_registry/data/supported_models.json b/transformer_lens/tools/model_registry/data/supported_models.json index 5dde66ed6..2f67419db 100644 --- a/transformer_lens/tools/model_registry/data/supported_models.json +++ b/transformer_lens/tools/model_registry/data/supported_models.json @@ -42,14 +42,15 @@ "architecture_id": "GPT2LMHeadModel", "model_id": "openai-community/gpt2", "status": 1, - "verified_date": "2026-03-10", + "verified_date": "2026-04-07", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, "phase4_score": 88.5, - "phase7_score": null + "phase7_score": null, + "phase8_score": null }, { "architecture_id": "Qwen2ForCausalLM", @@ -320,14 +321,15 @@ "architecture_id": "GPT2LMHeadModel", "model_id": "distilbert/distilgpt2", "status": 1, - "verified_date": "2026-03-10", + "verified_date": "2026-04-07", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, "phase4_score": 81.0, - "phase7_score": null + "phase7_score": null, + "phase8_score": null }, { "architecture_id": "Qwen3ForCausalLM", @@ -805,7 +807,7 @@ "architecture_id": "Phi3ForCausalLM", "model_id": "microsoft/Phi-3-mini-4k-instruct", "status": 1, - "verified_date": "2026-03-30", + "verified_date": "2026-04-07", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, @@ -2537,13 +2539,13 @@ "architecture_id": "BloomForCausalLM", "model_id": "bigscience/bloom-560m", "status": 1, - "verified_date": "2026-04-02", + "verified_date": "2026-04-07", "metadata": null, "note": "Full verification completed", "phase1_score": 100.0, "phase2_score": 100.0, "phase3_score": 100.0, - "phase4_score": 89.2, + "phase4_score": 75.9, "phase7_score": null, "phase8_score": null }, @@ -73181,13 +73183,15 @@ "architecture_id": "GPTNeoXForCausalLM", "model_id": "EleutherAI/pythia-70m", "status": 1, - "verified_date": "2026-03-17", + "verified_date": "2026-04-07", "metadata": null, - "note": "Core verification completed", + "note": "Full verification completed", "phase1_score": 100.0, - "phase2_score": null, - "phase3_score": null, - "phase4_score": 78.7 + "phase2_score": 100.0, + "phase3_score": 100.0, + "phase4_score": 70.5, + "phase7_score": null, + "phase8_score": null }, { "architecture_id": "MistralForCausalLM", diff --git a/transformer_lens/tools/model_registry/data/verification_history.json b/transformer_lens/tools/model_registry/data/verification_history.json index 11672aee9..08d286a61 100644 --- a/transformer_lens/tools/model_registry/data/verification_history.json +++ b/transformer_lens/tools/model_registry/data/verification_history.json @@ -1,5 +1,5 @@ { - "last_updated": "2026-04-02T16:47:39.135017", + "last_updated": "2026-04-07T18:30:31.307768", "records": [ { "model_id": "Macropodus/macbert4mdcspell_v1", @@ -11100,6 +11100,66 @@ "notes": "Full verification completed with issues: P2=91.7% (failed: generation)", "invalidated": false, "invalidation_reason": null + }, + { + "model_id": "distilbert/distilgpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-04-07", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "distilbert/distilgpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-04-07", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "openai-community/gpt2", + "architecture_id": "GPT2LMHeadModel", + "verified_date": "2026-04-07", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "bigscience/bloom-560m", + "architecture_id": "BloomForCausalLM", + "verified_date": "2026-04-07", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "EleutherAI/pythia-70m", + "architecture_id": "GPTNeoXForCausalLM", + "verified_date": "2026-04-07", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "microsoft/Phi-3-mini-4k-instruct", + "architecture_id": "Phi3ForCausalLM", + "verified_date": "2026-04-07", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null } ] }