From f2347e336920edcfd91a263b38ec1285ce801259 Mon Sep 17 00:00:00 2001
From: Brendan Long <self@brendanlong.com>
Date: Sat, 28 Mar 2026 13:26:27 -0700
Subject: [PATCH 1/2] Fix apertus test failing on machines with GPU

Tensor equality includes the device, so set device="cpu" so
weight tensors always match expected, even if there's  GPU
they could be created on.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/unit/pretrained_weight_conversions/test_apertus.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/unit/pretrained_weight_conversions/test_apertus.py b/tests/unit/pretrained_weight_conversions/test_apertus.py
index a9fc7f05f..dc3b678f4 100644
--- a/tests/unit/pretrained_weight_conversions/test_apertus.py
+++ b/tests/unit/pretrained_weight_conversions/test_apertus.py
@@ -27,6 +27,7 @@ def make_cfg(use_qk_norm=True, n_key_value_heads=4):
         use_qk_norm=use_qk_norm,
         n_key_value_heads=n_key_value_heads,
         dtype=torch.float32,
+        device="cpu",
     )
 
 
@@ -183,7 +184,7 @@ def test_zero_biases_have_correct_device(self):
             "blocks.0.mlp.b_out",
             "unembed.b_U",
         ]:
-            assert sd[key].device.type == cfg.device.type, f"{key} on wrong device"
+            assert sd[key].device.type == cfg.device, f"{key} on wrong device"
 
     def test_unembed_shapes(self):
         cfg = make_cfg()

From a476e09fada1bd1ad3a0eac69cf7e12067e73174 Mon Sep 17 00:00:00 2001
From: Brendan Long <self@brendanlong.com>
Date: Sat, 28 Mar 2026 13:37:36 -0700
Subject: [PATCH 2/2] Fix test_cuda using nonexistent mlm_tokens fixture

The test_cuda function referenced a fixture named mlm_tokens which was
never defined, causing a fixture-not-found error. Changed to use the
existing tokens fixture which provides the same MLM-style tokenized input.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/acceptance/test_hooked_encoder.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/acceptance/test_hooked_encoder.py b/tests/acceptance/test_hooked_encoder.py
index e62d35574..3afa69561 100644
--- a/tests/acceptance/test_hooked_encoder.py
+++ b/tests/acceptance/test_hooked_encoder.py
@@ -222,6 +222,6 @@ def test_input_list_of_strings_mlm(our_bert, huggingface_bert, tokenizer):
 
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires a CUDA device")
-def test_cuda(mlm_tokens):
+def test_cuda(tokens):
     model = HookedEncoder.from_pretrained(MODEL_NAME)
-    model(mlm_tokens)
+    model(tokens)