openrelik · kev365 · Jul 13, 2025
diff --git a/openrelik_ai_common/utils/chunker.py b/openrelik_ai_common/utils/chunker.py
@@ -164,6 +164,7 @@ def _get_next_chunk(
         Returns:
             A tuple containing the next chunk (or None if end of file) and the updated offset.
         """
+        offset = int(offset)
         if offset >= len(self.file_content):
             return None, offset
 
@@ -173,10 +174,10 @@ def _get_next_chunk(
         )
 
         # Estimate the end character index based on available tokens
-        estimated_end_char = min(offset + available_tokens * 4, len(self.file_content))
+        estimated_end_char = int(min(offset + available_tokens * 4, len(self.file_content)))
 
         # Find a suitable breakpoint for a clean chunk break
-        breakpoint = self._find_breakpoint(offset, estimated_end_char)
+        breakpoint = int(self._find_breakpoint(offset, estimated_end_char))
 
         # Extract the chunk and update the offset
         chunk = self.file_content[offset:breakpoint]

diff --git a/tests/test_get_next_chunk.py b/tests/test_get_next_chunk.py
@@ -0,0 +1,26 @@
+# tests/test_get_next_chunk.py
+
+# --- stub out the LLM interface needed by TextFileChunker ---
+class DummyLLM:
+    def __init__(self):
+        self.config = {"model": "dummy", "system_instructions": ""}
+    def get_max_input_tokens(self, model_name):
+        return 100  # or whatever token limit you want to test
+    def count_tokens(self, text):
+        return len(text)  # simplistic: 1 char = 1 token
+
+# --- now import and test ---
+from openrelik_ai_common.utils.chunker import TextFileChunker
+
+def test_get_next_chunk_int_cast():
+    content = "x" * 200
+    dummy = DummyLLM()
+    c = TextFileChunker(prompt="p", file_content=content, llm=dummy)
+    # this used to raise a TypeError before the int() cast
+    chunk, offset = c._get_next_chunk(prompt="p",
+                                       prompt_chunk_wrapper="",
+                                       offset=0.0)
+    # verify the fix
+    assert isinstance(offset, int)
+    assert len(chunk) == offset
+    assert offset <= dummy.get_max_input_tokens("") * 4