From db2ab7077d0b298fb92c912f662c9735e2462cb8 Mon Sep 17 00:00:00 2001 From: SYSTEMS-OPERATOR <155610697+SYSTEMS-OPERATOR@users.noreply.github.com> Date: Fri, 27 Jun 2025 05:21:52 -0400 Subject: [PATCH] Fix parsing of dataset lines --- model/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model/dataset.py b/model/dataset.py index 6f60a14..73e4e0c 100644 --- a/model/dataset.py +++ b/model/dataset.py @@ -31,7 +31,7 @@ def __init__(self, datapath: str, window_size: int, vocab_size: int, def __iter__(self): for line_idx in range(len(self.data)): - line_tokens = self.data[line_idx].strip().split(' ') + line_tokens = self.data[line_idx].split() if len(line_tokens) <= self.window_size: continue