-
-
Notifications
You must be signed in to change notification settings - Fork 4
Open
Milestone
Description
Summary
Failed on commit: 7736417
System MacOS.
Maybe make less strict?
def test_attention_no_look_ahead():
"""Test that attention does not look ahead - causal masking is working correctly."""
model, processor, collator = setup_model()
model.eval()
# Test sequences that share prefixes
texts = ["a b c x y z", "a b d m"]
# Force every word to predict a single byte (and EOS)
# "a <eos>, b <eos>, c <eos>, <eos> <pad>" and "a <eos>, b <eos>, d <eos>, <eos> <pad>"
processor.max_word_length = 1
_, outputs = predict_dataset(texts, model, processor, collator)
for text in texts:
print(f"Loss for '{text}':", outputs[text].loss.cpu().numpy())
# Check that the first 4 tokens have identical losses
for i in range(4):
> assert abs(outputs[texts[0]].loss[i] - outputs[texts[1]].loss[i]) < 1e-4, \
f"Loss at position {i} should be identical: {outputs[texts[0]].loss[i]} vs {outputs[texts[1]].loss[i]}"
E AssertionError: Loss at position 1 should be identical: 24.81259536743164 vs 24.813331604003906
E assert tensor(0.0007) < 0.0001
E + where tensor(0.0007) = abs((tensor(24.8126) - tensor(24.8133)))
tests/test_model.py:89: AssertionError
FAILED tests/test_model.py::test_attention_no_look_ahead - AssertionError: Loss at position 1 should be identical: 24.81259536743164 vs 24.813331604003906Reproduce
pytestMetadata
Metadata
Assignees
Labels
No labels