From 1255c6f4a3c66a833b8428aa907e79df351497e9 Mon Sep 17 00:00:00 2001 From: Kyle Howells Date: Tue, 3 Jun 2025 22:18:15 +0100 Subject: [PATCH] Update perplexity.py to handle models without bos_token_id in the tokeniser --- metrics/perplexity/perplexity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/perplexity/perplexity.py b/metrics/perplexity/perplexity.py index ad307e8ad..557172cdb 100644 --- a/metrics/perplexity/perplexity.py +++ b/metrics/perplexity/perplexity.py @@ -166,7 +166,7 @@ def _compute( encoded_batch = encoded_texts[start_index:end_index] attn_mask = attn_masks[start_index:end_index] - if add_start_token: + if add_start_token and tokenizer.bos_token_id is not None: bos_tokens_tensor = torch.tensor([[tokenizer.bos_token_id]] * encoded_batch.size(dim=0)).to(device) encoded_batch = torch.cat([bos_tokens_tensor, encoded_batch], dim=1) attn_mask = torch.cat(