From 43bec85a4c40e499e9f83f290221f85ddc791d5d Mon Sep 17 00:00:00 2001 From: acatav Date: Wed, 5 Mar 2025 14:18:45 +0200 Subject: [PATCH 1/2] Upgrade NLTK version to ^3.91 --- pinecone_text/sparse/bm25_tokenizer.py | 4 ++-- pyproject.toml | 4 ++-- tests/unit/test_bm25_tokenizer.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pinecone_text/sparse/bm25_tokenizer.py b/pinecone_text/sparse/bm25_tokenizer.py index 9513176..298deff 100644 --- a/pinecone_text/sparse/bm25_tokenizer.py +++ b/pinecone_text/sparse/bm25_tokenizer.py @@ -34,9 +34,9 @@ def __init__( @staticmethod def nltk_setup() -> None: try: - nltk.data.find("tokenizers/punkt") + nltk.data.find("tokenizers/punkt_tab") except LookupError: - nltk.download("punkt") + nltk.download("punkt_tab") try: nltk.data.find("corpora/stopwords") diff --git a/pyproject.toml b/pyproject.toml index 97d8bcf..1350e86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pinecone-text" -version = "0.9.0" +version = "0.10.0" description = "Text utilities library by Pinecone.io" authors = ["Pinecone.io"] readme = "README.md" @@ -13,7 +13,7 @@ transformers = { version = ">=4.26.1", optional = true } sentence-transformers = { version = ">=2.0.0", optional = true } wget = "^3.2" mmh3 = "^4.1.0" -nltk = "^3.6.5" +nltk = "^3.9.1" openai = { version = "^1.2.3", optional = true } cohere = { version = "^4.37", optional = true } numpy = [ diff --git a/tests/unit/test_bm25_tokenizer.py b/tests/unit/test_bm25_tokenizer.py index 5ed1115..95984d6 100644 --- a/tests/unit/test_bm25_tokenizer.py +++ b/tests/unit/test_bm25_tokenizer.py @@ -152,7 +152,7 @@ def test_nltk_download(self): language="english", ) - nltk.find("tokenizers/punkt") + nltk.find("tokenizers/punkt_tab") nltk.find("corpora/stopwords") assert tokenizer("The quick brown fox jumps over the lazy dog") == [ From 56c5cd090f731c63fe5e12bd27c974027d689d5c Mon Sep 17 00:00:00 2001 From: acatav Date: Wed, 5 Mar 2025 14:24:56 +0200 Subject: [PATCH 2/2] stop support python 3.8 --- .github/workflows/CI.yaml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 4a66e07..3a7d87a 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -11,7 +11,7 @@ jobs: strategy: matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python-version: [3.8, 3.9, '3.10', 3.11, 3.12] + python-version: [3.9, '3.10', 3.11, 3.12] defaults: run: shell: bash diff --git a/pyproject.toml b/pyproject.toml index 1350e86..05d14ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" packages = [{include = "pinecone_text"}] [tool.poetry.dependencies] -python = ">=3.8,<4.0" +python = ">=3.9,<4.0" torch = { version = ">=1.13.1", optional = true } transformers = { version = ">=4.26.1", optional = true } sentence-transformers = { version = ">=2.0.0", optional = true }