Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions transformer_lens/model_bridge/supported_architectures/phi3.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
JointQKVPositionEmbeddingsAttentionBridge,
LinearBridge,
RMSNormalizationBridge,
RotaryEmbeddingBridge,
UnembeddingBridge,
)

Expand Down Expand Up @@ -101,6 +102,7 @@ def __init__(self, cfg: Any) -> None:
# Set up component mapping
self.component_mapping = {
"embed": EmbeddingBridge(name="model.embed_tokens"),
"rotary_emb": RotaryEmbeddingBridge(name="model.rotary_emb"),
"blocks": BlockBridge(
name="model.layers",
submodules={
Expand Down Expand Up @@ -207,6 +209,23 @@ def _split_phi3_qkv(

return q_linear, k_linear, v_linear

def setup_component_testing(self, hf_model: Any, bridge_model: Any = None) -> None:
"""Set up rotary embedding references for Phi-3 component testing.

Args:
hf_model: The HuggingFace Phi-3 model instance
bridge_model: The TransformerBridge model (if available)
"""
rotary_emb = hf_model.model.rotary_emb

if bridge_model is not None and hasattr(bridge_model, "blocks"):
for block in bridge_model.blocks:
if hasattr(block, "attn"):
block.attn.set_rotary_emb(rotary_emb)

attn_bridge = self.get_generalized_component("blocks.0.attn")
attn_bridge.set_rotary_emb(rotary_emb)

def prepare_loading(self, model_name: str, model_kwargs: dict) -> None:
"""Patch cached Phi-3 remote code for transformers v5 compatibility."""
uses_remote_code = model_kwargs.get("trust_remote_code", False)
Expand Down
28 changes: 16 additions & 12 deletions transformer_lens/tools/model_registry/data/supported_models.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@
"architecture_id": "GPT2LMHeadModel",
"model_id": "openai-community/gpt2",
"status": 1,
"verified_date": "2026-03-10",
"verified_date": "2026-04-07",
"metadata": null,
"note": "Full verification completed",
"phase1_score": 100.0,
"phase2_score": 100.0,
"phase3_score": 100.0,
"phase4_score": 88.5,
"phase7_score": null
"phase7_score": null,
"phase8_score": null
},
{
"architecture_id": "Qwen2ForCausalLM",
Expand Down Expand Up @@ -320,14 +321,15 @@
"architecture_id": "GPT2LMHeadModel",
"model_id": "distilbert/distilgpt2",
"status": 1,
"verified_date": "2026-03-10",
"verified_date": "2026-04-07",
"metadata": null,
"note": "Full verification completed",
"phase1_score": 100.0,
"phase2_score": 100.0,
"phase3_score": 100.0,
"phase4_score": 81.0,
"phase7_score": null
"phase7_score": null,
"phase8_score": null
},
{
"architecture_id": "Qwen3ForCausalLM",
Expand Down Expand Up @@ -805,7 +807,7 @@
"architecture_id": "Phi3ForCausalLM",
"model_id": "microsoft/Phi-3-mini-4k-instruct",
"status": 1,
"verified_date": "2026-03-30",
"verified_date": "2026-04-07",
"metadata": null,
"note": "Full verification completed",
"phase1_score": 100.0,
Expand Down Expand Up @@ -2537,13 +2539,13 @@
"architecture_id": "BloomForCausalLM",
"model_id": "bigscience/bloom-560m",
"status": 1,
"verified_date": "2026-04-02",
"verified_date": "2026-04-07",
"metadata": null,
"note": "Full verification completed",
"phase1_score": 100.0,
"phase2_score": 100.0,
"phase3_score": 100.0,
"phase4_score": 89.2,
"phase4_score": 75.9,
"phase7_score": null,
"phase8_score": null
},
Expand Down Expand Up @@ -73181,13 +73183,15 @@
"architecture_id": "GPTNeoXForCausalLM",
"model_id": "EleutherAI/pythia-70m",
"status": 1,
"verified_date": "2026-03-17",
"verified_date": "2026-04-07",
"metadata": null,
"note": "Core verification completed",
"note": "Full verification completed",
"phase1_score": 100.0,
"phase2_score": null,
"phase3_score": null,
"phase4_score": 78.7
"phase2_score": 100.0,
"phase3_score": 100.0,
"phase4_score": 70.5,
"phase7_score": null,
"phase8_score": null
},
{
"architecture_id": "MistralForCausalLM",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"last_updated": "2026-04-02T16:47:39.135017",
"last_updated": "2026-04-07T18:30:31.307768",
"records": [
{
"model_id": "Macropodus/macbert4mdcspell_v1",
Expand Down Expand Up @@ -11100,6 +11100,66 @@
"notes": "Full verification completed with issues: P2=91.7% (failed: generation)",
"invalidated": false,
"invalidation_reason": null
},
{
"model_id": "distilbert/distilgpt2",
"architecture_id": "GPT2LMHeadModel",
"verified_date": "2026-04-07",
"verified_by": "verify_models",
"transformerlens_version": null,
"notes": "Full verification completed",
"invalidated": false,
"invalidation_reason": null
},
{
"model_id": "distilbert/distilgpt2",
"architecture_id": "GPT2LMHeadModel",
"verified_date": "2026-04-07",
"verified_by": "verify_models",
"transformerlens_version": null,
"notes": "Full verification completed",
"invalidated": false,
"invalidation_reason": null
},
{
"model_id": "openai-community/gpt2",
"architecture_id": "GPT2LMHeadModel",
"verified_date": "2026-04-07",
"verified_by": "verify_models",
"transformerlens_version": null,
"notes": "Full verification completed",
"invalidated": false,
"invalidation_reason": null
},
{
"model_id": "bigscience/bloom-560m",
"architecture_id": "BloomForCausalLM",
"verified_date": "2026-04-07",
"verified_by": "verify_models",
"transformerlens_version": null,
"notes": "Full verification completed",
"invalidated": false,
"invalidation_reason": null
},
{
"model_id": "EleutherAI/pythia-70m",
"architecture_id": "GPTNeoXForCausalLM",
"verified_date": "2026-04-07",
"verified_by": "verify_models",
"transformerlens_version": null,
"notes": "Full verification completed",
"invalidated": false,
"invalidation_reason": null
},
{
"model_id": "microsoft/Phi-3-mini-4k-instruct",
"architecture_id": "Phi3ForCausalLM",
"verified_date": "2026-04-07",
"verified_by": "verify_models",
"transformerlens_version": null,
"notes": "Full verification completed",
"invalidated": false,
"invalidation_reason": null
}
]
}
Loading