Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
command: |
sudo pip install pipenv --upgrade
pipenv install --dev
pipenv run python -m nltk.downloader stopwords
pipenv run python -m nltk.downloader stopwords wordnet

- save_cache: # save dependency cache
key: deps-{{ .Branch }}-{{ checksum "Pipfile.lock" }}
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Welcome to HQ Trivia Bot! Thanks for contributing. Here are the steps to get sta
* Ensure you have Python 3 installed on your system.
* Install Pipenv `sudo pip install pipenv`
* Create Pipenv virtual environment `pipenv --three install --dev`
* Install NLTK corpora `pipenv run python3 -m nltk.downloader stopwords`
* Install NLTK corpora `pipenv run python3 -m nltk.downloader stopwords wordnet`


### Run HQ Trivia Bot
Expand Down
9 changes: 7 additions & 2 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import re
from enum import Enum
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

WORDNET = WordNetLemmatizer()

class Colours(Enum):
""" console colours """
Expand Down Expand Up @@ -32,8 +34,11 @@ def get_significant_words(question_words):
return list(filter(lambda word: word not in our_stopwords, question_words.split(' ')))




def get_raw_words(data):
""" Extract raw words from data """
data = re.sub(r'[^\w ]', '', data).replace(' and ', ' ').strip()
words = data.replace(' ', ' ').lower()
data = re.sub(r'[^\w ]', '', data).lower().replace(' and ', ' ')
words_list = data.replace(' ', ' ').strip().split(' ')
words = ' '.join([WORDNET.lemmatize(word) for word in words_list])
return words