From f385330550a0e2424a9af301d489cc449eb4c458 Mon Sep 17 00:00:00 2001 From: michaelv2 <1800075+michaelv2@users.noreply.github.com> Date: Wed, 8 Apr 2026 09:10:48 -0400 Subject: [PATCH] Add weights_only=True to all torch.load() calls PyTorch's pickle-based deserialization can execute arbitrary code when loading a crafted .pt file. Adding weights_only=True restricts deserialization to tensor data only, preventing this class of attack. This is the recommended practice since PyTorch 2.0 and addresses CVE-2025-32434 for users on PyTorch < 2.6. Affected call sites: - dictionary.py: AutoEncoder, GatedAutoEncoder, JumpReluAutoEncoder, AutoEncoderNew from_pretrained() - trainers/top_k.py: AutoEncoderTopK from_pretrained() - trainers/batch_top_k.py: BatchTopKSAE from_pretrained() - trainers/matryoshka_batch_top_k.py: MatryoshkaBatchTopKSAE from_pretrained() - activault_s3_buffer.py: compile() --- dictionary_learning/activault_s3_buffer.py | 2 +- dictionary_learning/dictionary.py | 8 ++++---- dictionary_learning/trainers/batch_top_k.py | 2 +- dictionary_learning/trainers/matryoshka_batch_top_k.py | 2 +- dictionary_learning/trainers/top_k.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/dictionary_learning/activault_s3_buffer.py b/dictionary_learning/activault_s3_buffer.py index 1b94a7e..0722b3b 100644 --- a/dictionary_learning/activault_s3_buffer.py +++ b/dictionary_learning/activault_s3_buffer.py @@ -118,7 +118,7 @@ def compile(byte_buffers, shuffle=True, seed=None, return_ids=False): # t = torch.from_numpy(n) # t = torch.frombuffer(combined_bytes, dtype=dtype) # torch.float32 buffer = io.BytesIO(combined_bytes) - t = torch.load(buffer) + t = torch.load(buffer, weights_only=True) if ( isinstance(t, dict) and "states" in t and not return_ids ): # backward compatibility diff --git a/dictionary_learning/dictionary.py b/dictionary_learning/dictionary.py index 238a866..5076cdf 100644 --- a/dictionary_learning/dictionary.py +++ b/dictionary_learning/dictionary.py @@ -129,7 +129,7 @@ def from_pretrained(cls, path, dtype=t.float, device=None, normalize_decoder=Tru """ Load a pretrained autoencoder from a file. """ - state_dict = t.load(path) + state_dict = t.load(path, weights_only=True) dict_size, activation_dim = state_dict["encoder.weight"].shape autoencoder = cls(activation_dim, dict_size) autoencoder.load_state_dict(state_dict) @@ -279,7 +279,7 @@ def from_pretrained(path, device=None): """ Load a pretrained autoencoder from a file. """ - state_dict = t.load(path) + state_dict = t.load(path, weights_only=True) dict_size, activation_dim = state_dict["encoder.weight"].shape autoencoder = GatedAutoEncoder(activation_dim, dict_size) autoencoder.load_state_dict(state_dict) @@ -358,7 +358,7 @@ def from_pretrained( loading function. """ if not load_from_sae_lens: - state_dict = t.load(path) + state_dict = t.load(path, weights_only=True) activation_dim, dict_size = state_dict["W_enc"].shape autoencoder = JumpReluAutoEncoder(activation_dim, dict_size) autoencoder.load_state_dict(state_dict) @@ -429,7 +429,7 @@ def from_pretrained(path, device=None): """ Load a pretrained autoencoder from a file. """ - state_dict = t.load(path) + state_dict = t.load(path, weights_only=True) dict_size, activation_dim = state_dict["encoder.weight"].shape autoencoder = AutoEncoderNew(activation_dim, dict_size) autoencoder.load_state_dict(state_dict) diff --git a/dictionary_learning/trainers/batch_top_k.py b/dictionary_learning/trainers/batch_top_k.py index 8cb2ecf..59c1675 100644 --- a/dictionary_learning/trainers/batch_top_k.py +++ b/dictionary_learning/trainers/batch_top_k.py @@ -79,7 +79,7 @@ def scale_biases(self, scale: float): @classmethod def from_pretrained(cls, path, k=None, device=None, **kwargs) -> "BatchTopKSAE": - state_dict = t.load(path) + state_dict = t.load(path, weights_only=True) dict_size, activation_dim = state_dict["encoder.weight"].shape if k is None: k = state_dict["k"].item() diff --git a/dictionary_learning/trainers/matryoshka_batch_top_k.py b/dictionary_learning/trainers/matryoshka_batch_top_k.py index 03c195b..e81d1b6 100644 --- a/dictionary_learning/trainers/matryoshka_batch_top_k.py +++ b/dictionary_learning/trainers/matryoshka_batch_top_k.py @@ -121,7 +121,7 @@ def scale_biases(self, scale: float): def from_pretrained( cls, path, k=None, device=None, **kwargs ) -> "MatryoshkaBatchTopKSAE": - state_dict = t.load(path) + state_dict = t.load(path, weights_only=True) activation_dim, dict_size = state_dict["W_enc"].shape if k is None: k = state_dict["k"].item() diff --git a/dictionary_learning/trainers/top_k.py b/dictionary_learning/trainers/top_k.py index e81259f..712d91d 100644 --- a/dictionary_learning/trainers/top_k.py +++ b/dictionary_learning/trainers/top_k.py @@ -137,7 +137,7 @@ def from_pretrained(path, k: Optional[int] = None, device=None): """ Load a pretrained autoencoder from a file. """ - state_dict = t.load(path) + state_dict = t.load(path, weights_only=True) dict_size, activation_dim = state_dict["encoder.weight"].shape if k is None: