From ad42a975e845ea6b6839960e2d827829da6f41ea Mon Sep 17 00:00:00 2001 From: SYSTEMS-OPERATOR <155610697+SYSTEMS-OPERATOR@users.noreply.github.com> Date: Thu, 26 Jun 2025 23:49:08 -0400 Subject: [PATCH] Fix sequencer token casting and correct annotation --- model/sequencer.py | 2 +- model/tokenizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/model/sequencer.py b/model/sequencer.py index 23d75d2..a70fcb6 100644 --- a/model/sequencer.py +++ b/model/sequencer.py @@ -54,7 +54,7 @@ def generate_sequence(self, length: int, start: str=None) -> str: for i in trange(length): probs, _ = self.model(token_ids, ignore_ids) next_id = self.gen_next_token(probs, idx) - tokens.append(self.tokenizer.get_byte(str(next_id.item()))) + tokens.append(self.tokenizer.get_byte(next_id.item())) token_ids, ignore_ids, idx = self.update_token_ids( idx, token_ids, next_id ) diff --git a/model/tokenizer.py b/model/tokenizer.py index 9f6cfa9..98a0619 100644 --- a/model/tokenizer.py +++ b/model/tokenizer.py @@ -221,7 +221,7 @@ def load(path: str) -> 'BytePairTokenizer': @staticmethod def train_bpe(filepaths: List[str], mincount: int, merges: int) \ - -> 'BytePairtokenizer': + -> 'BytePairTokenizer': """ Create trained byte pair tokenizer Args: