pinecone-io · acatav · Mar 5, 2025 · Mar 5, 2025 · Mar 5, 2025
diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: [macos-latest, windows-latest, ubuntu-latest]
-        python-version: [3.8, 3.9, '3.10', 3.11, 3.12]
+        python-version: [3.9, '3.10', 3.11, 3.12]
     defaults:
       run:
         shell: bash

diff --git a/pinecone_text/sparse/bm25_tokenizer.py b/pinecone_text/sparse/bm25_tokenizer.py
@@ -34,9 +34,9 @@ def __init__(
     @staticmethod
     def nltk_setup() -> None:
         try:
-            nltk.data.find("tokenizers/punkt")
+            nltk.data.find("tokenizers/punkt_tab")
         except LookupError:
-            nltk.download("punkt")
+            nltk.download("punkt_tab")
 
         try:
             nltk.data.find("corpora/stopwords")

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,19 +1,19 @@
 [tool.poetry]
 name = "pinecone-text"
-version = "0.9.0"
+version = "0.10.0"
 description = "Text utilities library by Pinecone.io"
 authors = ["Pinecone.io"]
 readme = "README.md"
 packages = [{include = "pinecone_text"}]
 
 [tool.poetry.dependencies]
-python = ">=3.8,<4.0"
+python = ">=3.9,<4.0"
 torch = { version = ">=1.13.1", optional = true }
 transformers = { version = ">=4.26.1", optional = true }
 sentence-transformers = { version = ">=2.0.0", optional = true }
 wget = "^3.2"
 mmh3 = "^4.1.0"
-nltk = "^3.6.5"
+nltk = "^3.9.1"
 openai =  { version = "^1.2.3", optional = true }
 cohere = { version = "^4.37", optional = true }
 numpy = [

diff --git a/tests/unit/test_bm25_tokenizer.py b/tests/unit/test_bm25_tokenizer.py
@@ -152,7 +152,7 @@ def test_nltk_download(self):
             language="english",
         )
 
-        nltk.find("tokenizers/punkt")
+        nltk.find("tokenizers/punkt_tab")
         nltk.find("corpora/stopwords")
 
         assert tokenizer("The quick brown fox jumps over the lazy dog") == [