diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 000000000..7b43fb181 --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,89 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ master ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ master ] + schedule: + - cron: '39 9 * * 3' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + python-version: [ "3.7", "3.8", "3.9"] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] + # Learn more: + # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed + + steps: + - name: Set up Python ${{ matrix.python }} (deadsnakes) + uses: deadsnakes/action@v1.0.0 + if: endsWith(matrix.python-version, '-dev') + with: + python-version: ${{ matrix.python-version }} + + - name: Set up Python ${{ matrix.python-version }} + if: "!endsWith(matrix.python-version, '-dev')" + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Checkout repository + uses: actions/checkout@v2 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # â„šī¸ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # âœī¸ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 000000000..9565b0abb --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,42 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Python application + +on: + push: + branches: + - master + pull_request: + branches: + - master + workflow_dispatch: + + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: "3.9" + - name: Work in python env + run: | + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics diff --git a/.gitignore b/.gitignore index 0d937453a..6185f4961 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ venv # Database files *.sqlite3 +/.venv/ +.idea/ diff --git a/.travis.yml b/.travis.yml index f694e3a10..d1ca474a5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,8 @@ language: python cache: pip python: - - '3.6' + - '3.9' + os: - linux diff --git a/README.md b/README.md index 00c30fffc..29e6d62e3 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,10 @@ known conversations. The language independent design of ChatterBot allows it to be trained to speak any language. [![Package Version](https://img.shields.io/pypi/v/chatterbot.svg)](https://pypi.python.org/pypi/chatterbot/) -[![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/) -[![Django 2.0](https://img.shields.io/badge/Django-2.0-blue.svg)](https://docs.djangoproject.com/en/2.1/releases/2.0/) -[![Requirements Status](https://requires.io/github/gunthercox/ChatterBot/requirements.svg?branch=master)](https://requires.io/github/gunthercox/ChatterBot/requirements/?branch=master) -[![Build Status](https://travis-ci.org/gunthercox/ChatterBot.svg?branch=master)](https://travis-ci.org/gunthercox/ChatterBot) +[![Python 3.9](https://img.shields.io/badge/python-3.9-blue.svg)](https://www.python.org/downloads/release/python-390/) +[![Django 4.0](https://img.shields.io/badge/Django-4.0-blue.svg)](https://docs.djangoproject.com/en/4.0/releases/4.0/) +[![Requirements Status](https://requires.io/github/batman202012/ChatterBot/requirements.svg?branch=master)](https://requires.io/github/batman202012/ChatterBot/requirements/?branch=master) +[![Build Status](https://github.com/batman202012/ChatterBot/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/batman202012/ChatterBot/actions/workflows/codeql-analysis.yml) [![Documentation Status](https://readthedocs.org/projects/chatterbot/badge/?version=stable)](http://chatterbot.readthedocs.io/en/stable/?badge=stable) [![Coverage Status](https://img.shields.io/coveralls/gunthercox/ChatterBot.svg)](https://coveralls.io/r/gunthercox/ChatterBot) [![Code Climate](https://codeclimate.com/github/gunthercox/ChatterBot/badges/gpa.svg)](https://codeclimate.com/github/gunthercox/ChatterBot) diff --git a/chatterbot/comparisons.py b/chatterbot/comparisons.py index 403d4a9f9..b2213ffd5 100644 --- a/chatterbot/comparisons.py +++ b/chatterbot/comparisons.py @@ -4,7 +4,12 @@ """ from chatterbot.exceptions import OptionalDependencyImportError from difflib import SequenceMatcher - +import os +import torch +import cupy +os.environ['CUDA_VISIBLE_DEVICES'] = '0' +os.environ['CUDA_LAUNCH_BLOCKING'] = '1' +os.environ['CUPY_GPU_MEMORY_LIMIT'] = '90%' class Comparator: @@ -66,6 +71,13 @@ def __init__(self, language): super().__init__(language) try: import spacy + from thinc.api import set_gpu_allocator, prefer_gpu + dev0 = cupy.cuda.Device(0) + dev0.use() + handle = dev0.get_cublas_handle() + print(handle) + set_gpu_allocator("pytorch") + prefer_gpu(0) except ImportError: message = ( 'Unable to import "spacy".\n' @@ -73,8 +85,7 @@ def __init__(self, language): 'pip3 install "spacy>=2.1,<2.2"' ) raise OptionalDependencyImportError(message) - - self.nlp = spacy.load(self.language.ISO_639_1) + self.nlp = spacy.load("en_core_web_trf") def compare(self, statement_a, statement_b): """ @@ -85,7 +96,6 @@ def compare(self, statement_a, statement_b): """ document_a = self.nlp(statement_a.text) document_b = self.nlp(statement_b.text) - return document_a.similarity(document_b) @@ -119,6 +129,8 @@ def __init__(self, language): super().__init__(language) try: import spacy + from thinc.api import prefer_gpu + prefer_gpu() except ImportError: message = ( 'Unable to import "spacy".\n' diff --git a/chatterbot/search.py b/chatterbot/search.py index 6c1be0ddb..c8623c332 100644 --- a/chatterbot/search.py +++ b/chatterbot/search.py @@ -46,7 +46,7 @@ def search(self, input_statement, **additional_parameters): input_search_text = input_statement.search_text if not input_statement.search_text: - self.chatbot.logger.warn( + self.chatbot.logger.warning( 'No value for search_text was available on the provided input' ) diff --git a/chatterbot/storage/sql_storage.py b/chatterbot/storage/sql_storage.py index 6c2483027..f60f01a73 100644 --- a/chatterbot/storage/sql_storage.py +++ b/chatterbot/storage/sql_storage.py @@ -19,7 +19,7 @@ class SQLStorageAdapter(StorageAdapter): def __init__(self, **kwargs): super().__init__(**kwargs) - from sqlalchemy import create_engine + from sqlalchemy import create_engine, inspect from sqlalchemy.orm import sessionmaker self.database_uri = kwargs.get('database_uri', False) @@ -43,7 +43,7 @@ def set_sqlite_pragma(dbapi_connection, connection_record): dbapi_connection.execute('PRAGMA journal_mode=WAL') dbapi_connection.execute('PRAGMA synchronous=NORMAL') - if not self.engine.dialect.has_table(self.engine, 'Statement'): + if not inspect(self.engine).has_table('Statement'): self.create_database() self.Session = sessionmaker(bind=self.engine, expire_on_commit=True) diff --git a/chatterbot/tagging.py b/chatterbot/tagging.py index ea3c31d26..35bed72f3 100644 --- a/chatterbot/tagging.py +++ b/chatterbot/tagging.py @@ -2,41 +2,60 @@ from chatterbot import languages -class LowercaseTagger(object): - """ - Returns the text in lowercase. - """ +class Tagger: def __init__(self, language=None): self.language = language or languages.ENG + def get_text_index_string(self, text): + return text + + def get_text_index_string_multi(self, texts): + return texts + + +class LowercaseTagger(Tagger): + """ + Returns the text in lowercase. + """ + def get_text_index_string(self, text): return text.lower() -class PosLemmaTagger(object): +class PosLemmaTagger(Tagger): def __init__(self, language=None): + super().__init__(language) import spacy - self.language = language or languages.ENG - self.punctuation_table = str.maketrans(dict.fromkeys(string.punctuation)) - self.nlp = spacy.load(self.language.ISO_639_1.lower()) - - def get_text_index_string(self, text): - """ - Return a string of text containing part-of-speech, lemma pairs. - """ - bigram_pairs = [] + self.nlp = spacy.load(self.language.ISO_639_1.lower(), disable=["transformer", "parser", "ner"]) + def punctuation_check(self, text): if len(text) <= 2: text_without_punctuation = text.translate(self.punctuation_table) if len(text_without_punctuation) >= 1: - text = text_without_punctuation + return text_without_punctuation + return text + + def get_text_index_string_multi(self, texts): + new_texts = [self.punctuation_check(text) for text in texts] + + return [self._process_document(doc) for doc in self.nlp.pipe(new_texts)] + def get_text_index_string(self, text): + """ + Return a string of text containing part-of-speech, lemma pairs. + """ + text = self.punctuation_check(text) document = self.nlp(text) + return self._process_document(document) + + def _process_document(self, document): + bigram_pairs = [] + text = document.text if len(text) <= 2: bigram_pairs = [ @@ -52,6 +71,9 @@ def get_text_index_string(self, text): token for token in document if token.is_alpha ] + if len(tokens) > 512: + tokens = tokens[:512] + for index in range(1, len(tokens)): bigram_pairs.append('{}:{}'.format( tokens[index - 1].pos_, diff --git a/chatterbot/trainers.py b/chatterbot/trainers.py index 7411cc838..9dfcc8c57 100644 --- a/chatterbot/trainers.py +++ b/chatterbot/trainers.py @@ -90,6 +90,11 @@ def train(self, conversation): statements_to_create = [] + search_texts = self.chatbot.storage.tagger.get_text_index_string_multi(conversation) + + if len(search_texts) != len(conversation): + print("Uh oh") + for conversation_count, text in enumerate(conversation): if self.show_training_progress: utils.print_progress_bar( @@ -97,7 +102,8 @@ def train(self, conversation): conversation_count + 1, len(conversation) ) - statement_search_text = self.chatbot.storage.tagger.get_text_index_string(text) + # statement_search_text = self.chatbot.storage.tagger.get_text_index_string(text) + statement_search_text = search_texts[conversation_count] statement = self.get_preprocessed_statement( Statement( diff --git a/dev-requirements.txt b/dev-requirements.txt deleted file mode 100644 index 86e3de486..000000000 --- a/dev-requirements.txt +++ /dev/null @@ -1,15 +0,0 @@ -coveralls -flake8 -nltk>=3.2,<4.0 -nose -pint>=0.8.1 -pymongo>=3.3,<4.0 -twine -twython -spacy>=2.1,<2.2 -sphinx>=3.0,<3.1 -sphinx_rtd_theme -pyyaml>=5.3,<5.4 -git+git://github.com/gunthercox/chatterbot-corpus@master#egg=chatterbot_corpus -https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz#egg=en_core_web_sm -https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.1.0/de_core_news_sm-2.1.0.tar.gz#egg=de_core_news_sm \ No newline at end of file diff --git a/examples/django_app/requirements.txt b/examples/django_app/requirements.txt index 82bf00052..ec12c99d7 100644 --- a/examples/django_app/requirements.txt +++ b/examples/django_app/requirements.txt @@ -1,2 +1,2 @@ -django>=2.2,<2.3 +django>=4.0 # chatterbot>=0.8,<1.1 diff --git a/requirements.txt b/requirements.txt index a12a58026..ff2222692 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,11 @@ -mathparse>=0.1,<0.2 -python-dateutil>=2.8,<2.9 -sqlalchemy>=1.3,<1.4 +mathparse +python-dateutil +sqlalchemy>=1.4.20 pytz +spacy[cuda117]>=3.4.1 +# this requirement for typer should be ditched once spacy stop supporting or red change and take in account installed lib version to install cogs deps +typer<0.5.0 +nvidia-pyindex>=1.0.9 +torch>=1.12.0+cu116 +cupy-cuda117<11.0.0 +thinc==8.1.0 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 4ca902a1e..dfdd241d8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -23,7 +23,7 @@ max_line_length = 175 exclude = .eggs, .git, .tox, build, [chatterbot] -version = 1.1.0a7 +version = 1.1.0.dev4 author = Gunther Cox email = gunthercx@gmail.com url = https://github.com/gunthercox/ChatterBot diff --git a/setup.py b/setup.py index 8963e6ea6..5c03f4f65 100644 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ include_package_data=True, install_requires=REQUIREMENTS, dependency_links=DEPENDENCIES, - python_requires='>=3.4, <3.9', + python_requires='>=3.4, <3.10', license='BSD', zip_safe=True, platforms=['any'],