From 291ee9a1b8c994ef88b8e850b4552ff8b6068ba5 Mon Sep 17 00:00:00 2001 From: shkr Date: Wed, 19 Feb 2014 14:51:32 -0500 Subject: [PATCH] ignore stopwords case insensitive --- tweetokenize/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tweetokenize/tokenizer.py b/tweetokenize/tokenizer.py index 10e1c3a..fc07108 100644 --- a/tweetokenize/tokenizer.py +++ b/tweetokenize/tokenizer.py @@ -263,7 +263,7 @@ def tokenize(self, message): message = self.quotes_re.sub(" ", message) message = self._replacetokens(self.tokenize_re.findall(message)) if self.ignorestopwords: - message = [word for word in message if word not in self._stopwords] + message = [word for word in message if word.lower() not in self._stopwords] return message def emoticons(self, iterable=None, filename=None):