From f2347e336920edcfd91a263b38ec1285ce801259 Mon Sep 17 00:00:00 2001 From: Brendan Long Date: Sat, 28 Mar 2026 13:26:27 -0700 Subject: [PATCH 1/2] Fix apertus test failing on machines with GPU Tensor equality includes the device, so set device="cpu" so weight tensors always match expected, even if there's GPU they could be created on. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/unit/pretrained_weight_conversions/test_apertus.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/pretrained_weight_conversions/test_apertus.py b/tests/unit/pretrained_weight_conversions/test_apertus.py index a9fc7f05f..dc3b678f4 100644 --- a/tests/unit/pretrained_weight_conversions/test_apertus.py +++ b/tests/unit/pretrained_weight_conversions/test_apertus.py @@ -27,6 +27,7 @@ def make_cfg(use_qk_norm=True, n_key_value_heads=4): use_qk_norm=use_qk_norm, n_key_value_heads=n_key_value_heads, dtype=torch.float32, + device="cpu", ) @@ -183,7 +184,7 @@ def test_zero_biases_have_correct_device(self): "blocks.0.mlp.b_out", "unembed.b_U", ]: - assert sd[key].device.type == cfg.device.type, f"{key} on wrong device" + assert sd[key].device.type == cfg.device, f"{key} on wrong device" def test_unembed_shapes(self): cfg = make_cfg() From a476e09fada1bd1ad3a0eac69cf7e12067e73174 Mon Sep 17 00:00:00 2001 From: Brendan Long Date: Sat, 28 Mar 2026 13:37:36 -0700 Subject: [PATCH 2/2] Fix test_cuda using nonexistent mlm_tokens fixture The test_cuda function referenced a fixture named mlm_tokens which was never defined, causing a fixture-not-found error. Changed to use the existing tokens fixture which provides the same MLM-style tokenized input. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/acceptance/test_hooked_encoder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/acceptance/test_hooked_encoder.py b/tests/acceptance/test_hooked_encoder.py index e62d35574..3afa69561 100644 --- a/tests/acceptance/test_hooked_encoder.py +++ b/tests/acceptance/test_hooked_encoder.py @@ -222,6 +222,6 @@ def test_input_list_of_strings_mlm(our_bert, huggingface_bert, tokenizer): @pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires a CUDA device") -def test_cuda(mlm_tokens): +def test_cuda(tokens): model = HookedEncoder.from_pretrained(MODEL_NAME) - model(mlm_tokens) + model(tokens)