Fix deprecated torch_dtype argument in HuggingFace from_pretrained calls (#1225)

zachgoldfine44 · Zachary Goldfine · claude · web-flow · commit 9c5a2a81674d · 2026-03-31T11:09:04.000-05:00
Replace `torch_dtype=dtype` with `dtype=dtype` in all internal calls to HuggingFace's `from_pretrained()` methods in loading_from_pretrained.py. The `torch_dtype` parameter is deprecated in recent versions of the transformers library in favor of `dtype`. The backwards-compatible acceptance of `torch_dtype` from users (lines 2389-2391) is preserved so existing user code continues to work. Fixes #1093 Co-authored-by: Zachary Goldfine <zacharygoldfine@Zacharys-Air.attlocal.net> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py
@@ -2439,15 +2439,15 @@ def get_pretrained_state_dict(
                 hf_model = AutoModelForCausalLM.from_pretrained(
                     official_model_name,
                     revision=f"checkpoint-{cfg.checkpoint_value}",
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
             elif official_model_name.startswith("EleutherAI/pythia"):
                 hf_model = AutoModelForCausalLM.from_pretrained(
                     official_model_name,
                     revision=f"step{cfg.checkpoint_value}",
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token,
                     **kwargs,
                 )
@@ -2460,28 +2460,28 @@ def get_pretrained_state_dict(
             elif "hubert" in official_model_name:
                 hf_model = HubertModel.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
             elif "wav2vec2" in official_model_name:
                 hf_model = Wav2Vec2Model.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
             elif "bert" in official_model_name:
                 hf_model = BertForPreTraining.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
             elif "t5" in official_model_name:
                 hf_model = T5ForConditionalGeneration.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
@@ -2491,14 +2491,14 @@ def get_pretrained_state_dict(
 
                 hf_model = AutoModel.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )
             else:
                 hf_model = AutoModelForCausalLM.from_pretrained(
                     official_model_name,
-                    torch_dtype=dtype,
+                    dtype=dtype,
                     token=huggingface_token if len(huggingface_token) > 0 else None,
                     **kwargs,
                 )