From 3391a8cf0e446dc7a3899dc529350b615dae9abb Mon Sep 17 00:00:00 2001
From: olivier <olivier.terrier@kairntech.com>
Date: Tue, 2 Apr 2019 10:17:03 +0200
Subject: [PATCH 1/2] Added feature/endpoint allowing a genuine POS tagger
 response

---
 README.md                             | 108 +++++++++++++++-----
 displacy_service/parse.py             |  97 ++++++++++++++++++
 displacy_service/server.py            |  29 +++++-
 displacy_service_tests/test_server.py | 142 +++++++++++++++++++-------
 4 files changed, 308 insertions(+), 68 deletions(-)

diff --git a/README.md b/README.md
index f341e3f..1deb6a4 100644
--- a/README.md
+++ b/README.md
@@ -227,21 +227,25 @@ curl -s localhost:8000/dep -d '{"text":"Pastafarians are smarter than people wit
 
 ---
 
-### `POST` `/ent/`
+### `POST` `/tag/`
 
 Example request:
 
 ```json
 {
-  "text": "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously.",
+  "text": "Fed raises interest rates 0.5 percent.",
   "model": "en"
+  "include_sentences": false,
+  "attr_filter": ["text", "start", "end", "lemma", "pos"]
 }
 ```
 
-| Name    | Type   | Description                                           |
-| ------- | ------ | ----------------------------------------------------- |
-| `text`  | string | text to be parsed                                     |
-| `model` | string | identifier string for a model installed on the server |
+| Name                | Type    | Description                                           |
+| ------------------- | ------- | ----------------------------------------------------- |
+| `text`              | string  | text to be parsed                                     |
+| `model`             | string  | identifier string for a model installed on the server |
+| `include_sentences` | boolean | include sentence layer                                |
+| `attr_filter`       | array   | array of token attributes to include in response      |
 
 Example request using the Python [Requests library](http://docs.python-requests.org/en/master/):
 
@@ -250,9 +254,9 @@ import json
 import requests
 
 url = "http://localhost:8000/ent"
-message_text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
+message_text = "Fed raises interest rates 0.5 percent."
 headers = {'content-type': 'application/json'}
-d = {'text': message_text, 'model': 'en'}
+d = {'text': message_text, 'model': 'en', 'include_sentences': False, "attr_filter": ['text', 'start', 'end', 'lemma', 'pos']}
 
 response = requests.post(url, data=json.dumps(d), headers=headers)
 r = response.json()
@@ -262,34 +266,84 @@ Example response:
 
 ```json
 [
-  { "end": 20, "start": 5, "type": "PERSON" },
-  { "end": 67, "start": 61, "type": "ORG" },
-  { "end": 75, "start": 71, "type": "DATE" }
+{"start": 0, "end": 3, "text": "Fed", "lemma": "fed", "pos": "PROPN"},
+{"start": 4, "end": 10, "text": "raises", "lemma": "raise", "pos": "VERB"},
+{"start": 11, "end": 19, "text": "interest", "lemma": "interest", "pos": "NOUN"},
+{"start": 20, "end": 25, "text": "rates", "lemma": "rate", "pos": "NOUN"},
+{"start": 26, "end": 29, "text": "0.5", "lemma": "0.5", "pos": "NUM"},
+{"start": 30, "end": 37, "text": "percent", "lemma": "percent", "pos": "NOUN"},
+{"start": 37, "end": 38, "text": ".", "lemma": ".", "pos": "PUNCT"}
 ]
 ```
 
-| Name    | Type    | Description                                |
-| ------- | ------- | ------------------------------------------ |
-| `end`   | integer | character offset the entity ends **after** |
-| `start` | integer | character offset the entity starts **on**  |
-| `type`  | string  | entity type                                |
+| Name             | Type    | Description                               |
+| ---------------- | ------- | ----------------------------------------- |
+| `end`            | integer | character offset the token ends **after** |
+| `start`          | integer | character offset the token starts **on**  |
+| `text`           | string  |                                           |
+| `orth`           | string  |                                           |
+| `lemma`          | string  |                                           |
+| `pos`            | string  |                                           |
+| `tag`            | string  |                                           |
+| `dep`            | string  |                                           |
+| `text`           | string  |                                           |
+| `ent_type`       | string  |                                           |
+| `ent_iob`        | string  |                                           |
+| `norm`           | string  |                                           |
+| `lower`          | string  |                                           |
+| `shape`          | string  |                                           |
+| `prefix`         | string  |                                           |
+| `suffix`         | string  |                                           |
+| `is_alpha`       | string  |                                           |
+| `is_ascii`       | string  |                                           |
+| `is_digit`       | string  |                                           |
+| `is_lower`       | string  |                                           |
+| `is_upper`       | string  |                                           |
+| `is_title`       | string  |                                           |
+| `is_punct`       | string  |                                           |
+| `is_left_punct`  | string  |                                           |
+| `is_right_punct` | string  |                                           |
+| `is_space`       | string  |                                           |
+| `is_bracket`     | string  |                                           |
+| `is_currency`    | string  |                                           |
+| `like_url`       | string  |                                           |
+| `like_num`       | string  |                                           |
+| `like_email`     | string  |                                           |
+| `is_oov`         | string  |                                           |
+| `is_stop`        | string  |                                           |
+| `cluster`        | string  |                                           |
 
 ```
-curl -s localhost:8000/ent -d '{"text":"Pastafarians are smarter than people with Coca Cola bottles.", "model":"en"}'
+curl -s localhost:8000/tag -d '{"text":"This a test that should split into sentences! This is the second.", "model":"en", "include_sentences": true, "attr_filter": ["text", "start", "end", "lemma", "pos"]}'
 ```
 
 ```json
 [
-  {
-    "end": 12,
-    "start": 0,
-    "type": "NORP"
-  },
-  {
-    "end": 51,
-    "start": 42,
-    "type": "ORG"
-  }
+{"text": "This a test that should split into sentences!",
+ "start": 0,
+ "end": 45,
+ "tokens": [
+     {"text": "This", "start": 0, "end": 4, "lemma": "this", "pos": "DET"},
+     {"text": "a", "start": 5, "end": 6, "lemma": "a", "pos": "DET"},
+     {"text": "test", "start": 7, "end": 11, "lemma": "test", "pos": "NOUN"},
+     {"text": "that", "start": 12, "end": 16, "lemma": "that", "pos": "ADJ"},
+     {"text": "should", "start": 17, "end": 23, "lemma": "should", "pos": "VERB"},
+     {"text": "split", "start": 24, "end": 29, "lemma": "split", "pos": "VERB"},
+     {"text": "into", "start": 30, "end": 34, "lemma": "into", "pos": "ADP"},
+     {"text": "sentences", "start": 35, "end": 44, "lemma": "sentence", "pos": "NOUN"},
+     {"text": "!", "start": 44, "end": 45, "lemma": "!", "pos": "PUNCT"}
+ ]},
+{
+    "text": "This is the second.",
+    "start": 46,
+    "end": 65,
+    "tokens": [
+        {"text": "This", "start": 46, "end": 50, "lemma": "this", "pos": "DET"},
+        {"text": "is", "start": 51, "end": 53, "lemma": "be", "pos": "VERB"},
+        {"text": "the", "start": 54, "end": 57, "lemma": "the", "pos": "DET"},
+        {"text": "second", "start": 58, "end": 64, "lemma": "second", "pos": "ADJ"},
+        {"text": ".", "start": 64, "end": 65, "lemma": ".", "pos": "PUNCT"}
+    ]}
 ]
 ```
 
diff --git a/displacy_service/parse.py b/displacy_service/parse.py
index eab6839..8cfbb44 100644
--- a/displacy_service/parse.py
+++ b/displacy_service/parse.py
@@ -63,6 +63,103 @@ def to_json(self):
             } for ent in self.doc.ents
         ]
 
+class Tokens(object):
+    def __init__(self, nlp, text, include_sentences, attr_filter):
+        self.doc = nlp(text)
+        self.filter = attr_filter
+        self.inc_sents = include_sentences
+
+    def to_json(self):
+        if self.inc_sents:
+            return [ self.sent_to_dict(sent) for sent in self.doc.sents]
+        else:
+            return [ self.token_to_dict(tok) for tok in self.doc ]
+
+    def sent_to_dict(self, sent):
+        all = len(self.filter) == 0
+        attrs = {
+            'text': sent.text,
+            'start': sent.start_char,
+            'end': sent.end_char,
+            "tokens" : [ self.token_to_dict(tok) for tok in sent ]
+        }
+        # if all or 'vector' in self.filter:
+        #     attrs['vector'] = sent.vector
+        return attrs
+
+    def token_to_dict(self, tok):
+        all = len(self.filter) == 0
+        attrs = {
+            'start': tok.idx,
+            'end': tok.idx + len(tok),
+        }
+        if all or 'text' in self.filter:
+            attrs['text'] = tok.text
+        if all or 'orth' in self.filter:
+            attrs['orth'] = tok.orth_
+        if all or 'lemma' in self.filter:
+            attrs['lemma'] = tok.lemma_
+        if all or 'pos' in self.filter:
+            attrs['pos'] = tok.pos_
+        if all or 'tag' in self.filter:
+            attrs['tag'] = tok.tag_
+        if all or 'dep' in self.filter:
+            attrs['dep'] = tok.dep_
+        # if all or 'vector' in self.filter:
+        #     attrs['vector'] = tok.vector.tolist()
+        if all or 'ent_type' in self.filter:
+            attrs['ent_type'] = tok.ent_type_
+        if all or 'ent_iob_' in self.filter:
+            attrs['ent_iob'] = tok.ent_iob_
+        if all or 'norm' in self.filter:
+            attrs['norm'] = tok.norm_
+        if all or 'lower' in self.filter:
+            attrs['lower'] = tok.lower_
+        if all or 'shape' in self.filter:
+            attrs['shape'] = tok.shape_
+        if all or 'prefix' in self.filter:
+            attrs['prefix'] = tok.prefix_
+        if all or 'suffix' in self.filter:
+            attrs['suffix'] = tok.suffix_
+        if all or 'is_alpha' in self.filter:
+            attrs['is_alpha'] = tok.is_alpha
+        if all or 'is_ascii' in self.filter:
+            attrs['is_ascii'] = tok.is_ascii
+        if all or 'is_digit' in self.filter:
+            attrs['is_digit'] = tok.is_digit
+        if all or 'is_lower' in self.filter:
+            attrs['is_lower'] = tok.is_lower
+        if all or 'is_upper' in self.filter:
+            attrs['is_upper'] = tok.is_upper
+        if all or 'is_title' in self.filter:
+            attrs['is_title'] = tok.is_title
+        if all or 'is_punct' in self.filter:
+            attrs['is_punct'] = tok.is_punct
+        if all or 'is_left_punct' in self.filter:
+            attrs['is_left_punct'] = tok.is_left_punct
+        if all or 'is_right_punct' in self.filter:
+            attrs['is_right_punct'] = tok.is_right_punct
+        if all or 'is_space' in self.filter:
+            attrs['is_space'] = tok.is_space
+        if all or 'is_bracket' in self.filter:
+            attrs['is_bracket'] = tok.is_bracket
+        if all or 'is_quote' in self.filter:
+            attrs['is_quote'] = tok.is_quote
+        if all or 'is_currency' in self.filter:
+            attrs['is_currency'] = tok.is_currency
+        if all or 'like_url' in self.filter:
+            attrs['like_url'] = tok.like_url
+        if all or 'like_num' in self.filter:
+            attrs['like_num'] = tok.like_num
+        if all or 'like_email' in self.filter:
+            attrs['like_email'] = tok.like_email
+        if all or 'is_oov' in self.filter:
+            attrs['is_oov'] = tok.is_oov
+        if all or 'is_stop' in self.filter:
+            attrs['is_stop'] = tok.is_stop
+        if all or 'cluster' in self.filter:
+            attrs['cluster'] = tok.cluster
+        return attrs
 
 class Sentences(object):
     def __init__(self, nlp, text):
diff --git a/displacy_service/server.py b/displacy_service/server.py
index 6310797..33ef440 100644
--- a/displacy_service/server.py
+++ b/displacy_service/server.py
@@ -9,8 +9,8 @@
 import spacy.about
 import spacy.util
 
-from .parse import Parse, Entities, Sentences
 
+from .parse import Parse, Entities, Sentences, Tokens
 
 MODELS = os.getenv("languages", "").split()
 
@@ -155,6 +155,28 @@ def on_post(self, req, resp):
             resp.status = falcon.HTTP_500
 
 
+class TaggerResource(object):
+    """Returns tokens."""
+
+    def on_post(self, req, resp):
+        req_body = req.stream.read()
+        json_data = json.loads(req_body.decode('utf8'))
+        text = json_data.get('text')
+        model_name = json_data.get('model', 'en')
+        include_sentences = json_data.get('include_sentences', False)
+        attr_filter = json_data.get('attr_filter', [])
+        try:
+            model = get_model(model_name)
+            tokens = Tokens(model, text, include_sentences,attr_filter)
+            resp.body = json.dumps(tokens.to_json(),
+                                   indent=2)
+            resp.content_type = 'application/json'
+            resp.append_header('Access-Control-Allow-Origin', "*")
+            resp.status = falcon.HTTP_200
+        except Exception as err:
+            resp.status = falcon.HTTP_500
+
+
 class SentsResources(object):
     """Returns sentences"""
 
@@ -169,16 +191,17 @@ def on_post(self, req, resp):
             sentences = Sentences(model, text)
             resp.body = json.dumps(sentences.to_json(), sort_keys=True,
                                    indent=2)
-            resp.content_type = 'text/string'
+            resp.content_type = 'application/json'
             resp.append_header('Access-Control-Allow-Origin', "*")
             resp.status = falcon.HTTP_200
-        except Exception:
+        except Exception as err:
             resp.status = falcon.HTTP_500
 
 
 APP = falcon.API()
 APP.add_route('/dep', DepResource())
 APP.add_route('/ent', EntResource())
+APP.add_route('/tag', TaggerResource())
 APP.add_route('/sents', SentsResources())
 APP.add_route('/{model_name}/schema', SchemaResource())
 APP.add_route('/models', ModelsResource())
diff --git a/displacy_service_tests/test_server.py b/displacy_service_tests/test_server.py
index 0e145fb..13edffc 100644
--- a/displacy_service_tests/test_server.py
+++ b/displacy_service_tests/test_server.py
@@ -5,41 +5,107 @@
 
 
 class TestAPI(falcon.testing.TestCase):
-    def __init__(self):
-        self.api = APP
-
-
-def test_deps():
-    test_api = TestAPI()
-    result = test_api.simulate_post(
-        path='/dep',
-        body='''{"text": "This is a test.", "model": "en",
-             "collapse_punctuation": false,
-             "collapse_phrases": false}'''
-    )
-    result = json.loads(result.text)
-    words = [w['text'] for w in result['words']]
-    assert words == ["This", "is", "a", "test", "."]
-
-
-def test_ents():
-    test_api = TestAPI()
-    result = test_api.simulate_post(
-        path='/ent',
-        body='''{"text": "What a great company Google is.",
-                "model": "en"}''')
-    ents = json.loads(result.text)
-    assert ents == [
-        {"start": 21, "end": 27, "type": "ORG", "text": "Google"}]
-
-
-def test_sents():
-    test_api = TestAPI()
-    sentences = test_api.simulate_post(
-        path='/sent',
-        body='''{"text": "This a test that should split into sentences!
-        This is the second. Is this the third?", "model": "en"}'''
-    )
-
-    assert sentences == ['This a test that should split into sentences!',
-                         'This is the second.', 'Is this the third?']
+    def setUp(self):
+        super(TestAPI, self).setUp()
+        self.app = APP
+
+    def test_deps(self):
+        result = self.simulate_post(
+            path='/dep',
+            body='''{"text": "This is a test.", "model": "en",
+                 "collapse_punctuation": false,
+                 "collapse_phrases": false}'''
+        )
+        result = json.loads(result.text)
+        words = [w['text'] for w in result['words']]
+        assert words == ["This", "is", "a", "test", "."]
+
+    def test_ents(self):
+        result = self.simulate_post(
+            path='/ent',
+            body='''{"text": "What a great company Google is.",
+                    "model": "en"}''')
+        ents = json.loads(result.text)
+        assert ents == [
+            {"start": 21, "end": 27, "type": "ORG", "text": "Google"}]
+
+    def test_tag_full(self):
+        toks = self.simulate_post(
+            path='/tag',
+            json={
+                "text": "Foo",
+                "model": "en",
+            }).json
+        assert toks[0] == {'start': 0, 'end': 3, 'text': 'Foo', 'orth' : 'Foo', 'lemma': 'foo', 'pos': 'PROPN', 'tag': 'NNP',
+                           'dep': 'ROOT', 'ent_type': '', 'ent_iob': 'O', 'norm': 'foo',
+                           'lower': 'foo', 'shape': 'Xxx', 'prefix': 'F', 'suffix': 'Foo', 'is_alpha': True,
+                           'is_ascii': True, 'is_digit': False, 'is_lower': False, 'is_upper': False, 'is_title': True,
+                           'is_punct': False, 'is_left_punct': False, 'is_right_punct': False, 'is_space': False,
+                           'is_bracket': False, 'is_quote': False, 'is_currency': False, 'like_url': False,
+                           'like_num': False, 'like_email': False, 'is_oov': True, 'is_stop': False, 'cluster': 0}
+
+    def test_tag_with_filter(self):
+        toks = self.simulate_post(
+            path='/tag',
+            json={
+                "text": "Fed raises interest rates 0.5 percent.",
+                "model": "en",
+                "attr_filter": ["text", "start", "end", "lemma", "pos"]
+            }).json
+
+        assert toks == [{'start': 0, 'end': 3, 'text': 'Fed', 'lemma': 'fed', 'pos': 'PROPN'},
+                        {'start': 4, 'end': 10, 'text': 'raises', 'lemma': 'raise', 'pos': 'VERB'},
+                        {'start': 11, 'end': 19, 'text': 'interest', 'lemma': 'interest', 'pos': 'NOUN'},
+                        {'start': 20, 'end': 25, 'text': 'rates', 'lemma': 'rate', 'pos': 'NOUN'},
+                        {'start': 26, 'end': 29, 'text': '0.5', 'lemma': '0.5', 'pos': 'NUM'},
+                        {'start': 30, 'end': 37, 'text': 'percent', 'lemma': 'percent', 'pos': 'NOUN'},
+                        {'start': 37, 'end': 38, 'text': '.', 'lemma': '.', 'pos': 'PUNCT'}]
+
+    def test_tag_with_sents(self):
+        sents = self.simulate_post(
+            path='/tag',
+            json={
+                "text": "This a test that should split into sentences! This is the second.",
+                "model": "en",
+                "include_sentences": True,
+                "attr_filter": ["text", "start", "end", "lemma", "pos"]
+            }).json
+        assert sents == [
+            {'text': 'This a test that should split into sentences!',
+             'start': 0,
+             'end': 45,
+             'tokens': [
+                 {'text': 'This', 'start': 0, 'end': 4, 'lemma': 'this', 'pos': 'DET'},
+                 {'text': 'a', 'start': 5, 'end': 6, 'lemma': 'a', 'pos': 'DET'},
+                 {'text': 'test', 'start': 7, 'end': 11, 'lemma': 'test', 'pos': 'NOUN'},
+                 {'text': 'that', 'start': 12, 'end': 16, 'lemma': 'that', 'pos': 'ADJ'},
+                 {'text': 'should', 'start': 17, 'end': 23, 'lemma': 'should', 'pos': 'VERB'},
+                 {'text': 'split', 'start': 24, 'end': 29, 'lemma': 'split', 'pos': 'VERB'},
+                 {'text': 'into', 'start': 30, 'end': 34, 'lemma': 'into', 'pos': 'ADP'},
+                 {'text': 'sentences', 'start': 35, 'end': 44, 'lemma': 'sentence', 'pos': 'NOUN'},
+                 {'text': '!', 'start': 44, 'end': 45, 'lemma': '!', 'pos': 'PUNCT'}
+             ]},
+            {
+                'text': 'This is the second.',
+                'start': 46,
+                'end': 65,
+                'tokens': [
+                    {'text': 'This', 'start': 46, 'end': 50, 'lemma': 'this', 'pos': 'DET'},
+                    {'text': 'is', 'start': 51, 'end': 53, 'lemma': 'be', 'pos': 'VERB'},
+                    {'text': 'the', 'start': 54, 'end': 57, 'lemma': 'the', 'pos': 'DET'},
+                    {'text': 'second', 'start': 58, 'end': 64, 'lemma': 'second', 'pos': 'ADJ'},
+                    {'text': '.', 'start': 64, 'end': 65, 'lemma': '.', 'pos': 'PUNCT'}
+                ]}
+        ]
+
+    def test_sents(self):
+        sentences = self.simulate_post(
+            path='/sents',
+            json={
+                "text": """This a test that should split into sentences!
+                This is the second. Is this the third?""",
+                "model": "en"
+            }
+        )
+        assert sentences.json == ['This a test that should split into sentences!',
+                             'This is the second.', 'Is this the third?']

From d910cf324bfe9d73277dd7a99e35e7c90b989e62 Mon Sep 17 00:00:00 2001
From: olivier <olivier.terrier@kairntech.com>
Date: Tue, 2 Apr 2019 11:28:52 +0200
Subject: [PATCH 2/2] Add a new sentence_filter argument

---
 README.md                             | 15 +++--
 displacy_service/parse.py             | 85 ++++++++++++++-------------
 displacy_service/scripts/app.py       |  3 +
 displacy_service/server.py            |  5 +-
 displacy_service_tests/test_server.py |  5 +-
 5 files changed, 63 insertions(+), 50 deletions(-)

diff --git a/README.md b/README.md
index 1deb6a4..ceaa999 100644
--- a/README.md
+++ b/README.md
@@ -236,7 +236,7 @@ Example request:
   "text": "Fed raises interest rates 0.5 percent.",
   "model": "en"
   "include_sentences": false,
-  "attr_filter": ["text", "start", "end", "lemma", "pos"]
+  "token_filter": ["text", "start", "end", "lemma", "pos"]
 }
 ```
 
@@ -245,7 +245,8 @@ Example request:
 | `text`              | string  | text to be parsed                                     |
 | `model`             | string  | identifier string for a model installed on the server |
 | `include_sentences` | boolean | include sentence layer                                |
-| `attr_filter`       | array   | array of token attributes to include in response      |
+| `token_filter`      | array   | array of token attributes to include in response      |
+| `sentence_filter`   | array   | array of sentence attributes to include in response      |
 
 Example request using the Python [Requests library](http://docs.python-requests.org/en/master/):
 
@@ -256,7 +257,7 @@ import requests
 url = "http://localhost:8000/ent"
 message_text = "Fed raises interest rates 0.5 percent."
 headers = {'content-type': 'application/json'}
-d = {'text': message_text, 'model': 'en', 'include_sentences': False, "attr_filter": ['text', 'start', 'end', 'lemma', 'pos']}
+d = {'text': message_text, 'model': 'en', 'include_sentences': False, "token_filter": ['text', 'start', 'end', 'lemma', 'pos']}
 
 response = requests.post(url, data=json.dumps(d), headers=headers)
 r = response.json()
@@ -314,7 +315,7 @@ Example response:
 | `cluster`        | string  |                                           |
 
 ```
-curl -s localhost:8000/tag -d '{"text":"This a test that should split into sentences! This is the second.", "model":"en", "include_sentences": true, "attr_filter": ["text", "start", "end", "lemma", "pos"]}'
+curl -s localhost:8000/tag -d '{"text":"This a test that should split into sentences! This is the second.", "model":"en", "include_sentences": true, "token_filter": ["text", "start", "end", "lemma", "pos"], "sentence_filter": ["text", "start", "end", "tokens"]}'
 ```
 
 ```json
@@ -346,6 +347,12 @@ curl -s localhost:8000/tag -d '{"text":"This a test that should split into sente
     ]}
 ]
 ```
+| Name             | Type    | Description                                  |
+| ---------------- | ------- | -------------------------------------------- |
+| `end`            | integer | character offset the sentence ends **after** |
+| `start`          | integer | character offset the sentence starts **on**  |
+| `text`           | string  |                                              |
+| `tokens`         | array   |                                              |
 
 ---
 
diff --git a/displacy_service/parse.py b/displacy_service/parse.py
index 8cfbb44..a4760e4 100644
--- a/displacy_service/parse.py
+++ b/displacy_service/parse.py
@@ -64,9 +64,10 @@ def to_json(self):
         ]
 
 class Tokens(object):
-    def __init__(self, nlp, text, include_sentences, attr_filter):
+    def __init__(self, nlp, text, include_sentences, token_filter, sentence_filter):
         self.doc = nlp(text)
-        self.filter = attr_filter
+        self.token_filter = token_filter
+        self.sentence_filter = sentence_filter
         self.inc_sents = include_sentences
 
     def to_json(self):
@@ -76,88 +77,88 @@ def to_json(self):
             return [ self.token_to_dict(tok) for tok in self.doc ]
 
     def sent_to_dict(self, sent):
-        all = len(self.filter) == 0
+        all = len(self.sentence_filter) == 0
         attrs = {
-            'text': sent.text,
             'start': sent.start_char,
-            'end': sent.end_char,
-            "tokens" : [ self.token_to_dict(tok) for tok in sent ]
+            'end': sent.end_char
         }
-        # if all or 'vector' in self.filter:
-        #     attrs['vector'] = sent.vector
+        if all or 'text' in self.sentence_filter:
+            attrs['text'] = sent.text
+        if all or 'tokens' in self.sentence_filter:
+            attrs['tokens'] = [ self.token_to_dict(tok) for tok in sent ]
         return attrs
 
     def token_to_dict(self, tok):
-        all = len(self.filter) == 0
+        all = len(self.token_filter) == 0
         attrs = {
             'start': tok.idx,
             'end': tok.idx + len(tok),
         }
-        if all or 'text' in self.filter:
+        if all or 'text' in self.token_filter:
             attrs['text'] = tok.text
-        if all or 'orth' in self.filter:
+        if all or 'orth' in self.token_filter:
             attrs['orth'] = tok.orth_
-        if all or 'lemma' in self.filter:
+        if all or 'lemma' in self.token_filter:
             attrs['lemma'] = tok.lemma_
-        if all or 'pos' in self.filter:
+        if all or 'pos' in self.token_filter:
             attrs['pos'] = tok.pos_
-        if all or 'tag' in self.filter:
+        if all or 'tag' in self.token_filter:
             attrs['tag'] = tok.tag_
-        if all or 'dep' in self.filter:
+        if all or 'dep' in self.token_filter:
             attrs['dep'] = tok.dep_
-        # if all or 'vector' in self.filter:
+        # if all or 'vector' in self.token_filter:
         #     attrs['vector'] = tok.vector.tolist()
-        if all or 'ent_type' in self.filter:
+        if all or 'ent_type' in self.token_filter:
             attrs['ent_type'] = tok.ent_type_
-        if all or 'ent_iob_' in self.filter:
+        if all or 'ent_iob_' in self.token_filter:
             attrs['ent_iob'] = tok.ent_iob_
-        if all or 'norm' in self.filter:
+        if all or 'norm' in self.token_filter:
             attrs['norm'] = tok.norm_
-        if all or 'lower' in self.filter:
+        if all or 'lower' in self.token_filter:
             attrs['lower'] = tok.lower_
-        if all or 'shape' in self.filter:
+        if all or 'shape' in self.token_filter:
             attrs['shape'] = tok.shape_
-        if all or 'prefix' in self.filter:
+        if all or 'prefix' in self.token_filter:
             attrs['prefix'] = tok.prefix_
-        if all or 'suffix' in self.filter:
+        if all or 'suffix' in self.token_filter:
             attrs['suffix'] = tok.suffix_
-        if all or 'is_alpha' in self.filter:
+        if all or 'is_alpha' in self.token_filter:
             attrs['is_alpha'] = tok.is_alpha
-        if all or 'is_ascii' in self.filter:
+        if all or 'is_ascii' in self.token_filter:
             attrs['is_ascii'] = tok.is_ascii
-        if all or 'is_digit' in self.filter:
+        if all or 'is_digit' in self.token_filter:
             attrs['is_digit'] = tok.is_digit
-        if all or 'is_lower' in self.filter:
+        if all or 'is_lower' in self.token_filter:
             attrs['is_lower'] = tok.is_lower
-        if all or 'is_upper' in self.filter:
+        if all or 'is_upper' in self.token_filter:
             attrs['is_upper'] = tok.is_upper
-        if all or 'is_title' in self.filter:
+        if all or 'is_title' in self.token_filter:
             attrs['is_title'] = tok.is_title
-        if all or 'is_punct' in self.filter:
+        if all or 'is_punct' in self.token_filter:
             attrs['is_punct'] = tok.is_punct
-        if all or 'is_left_punct' in self.filter:
+        if all or 'is_left_punct' in self.token_filter:
             attrs['is_left_punct'] = tok.is_left_punct
-        if all or 'is_right_punct' in self.filter:
+        if all or 'is_right_punct' in self.token_filter:
             attrs['is_right_punct'] = tok.is_right_punct
-        if all or 'is_space' in self.filter:
+        if all or 'is_space' in self.token_filter:
             attrs['is_space'] = tok.is_space
-        if all or 'is_bracket' in self.filter:
+        if all or 'is_bracket' in self.token_filter:
             attrs['is_bracket'] = tok.is_bracket
-        if all or 'is_quote' in self.filter:
+        if all or 'is_quote' in self.token_filter:
             attrs['is_quote'] = tok.is_quote
-        if all or 'is_currency' in self.filter:
+        if all or 'is_currency' in self.token_filter:
             attrs['is_currency'] = tok.is_currency
-        if all or 'like_url' in self.filter:
+        if all or 'like_url' in self.token_filter:
             attrs['like_url'] = tok.like_url
-        if all or 'like_num' in self.filter:
+        if all or 'like_num' in self.token_filter:
             attrs['like_num'] = tok.like_num
-        if all or 'like_email' in self.filter:
+        if all or 'like_email' in self.token_filter:
             attrs['like_email'] = tok.like_email
-        if all or 'is_oov' in self.filter:
+        if all or 'is_oov' in self.token_filter:
             attrs['is_oov'] = tok.is_oov
-        if all or 'is_stop' in self.filter:
+        if all or 'is_stop' in self.token_filter:
             attrs['is_stop'] = tok.is_stop
-        if all or 'cluster' in self.filter:
+        if all or 'cluster' in self.token_filter:
             attrs['cluster'] = tok.cluster
         return attrs
 
diff --git a/displacy_service/scripts/app.py b/displacy_service/scripts/app.py
index 6d4371d..4a693dd 100644
--- a/displacy_service/scripts/app.py
+++ b/displacy_service/scripts/app.py
@@ -11,3 +11,6 @@ def run():
     print("Loaded all models. Starting HTTP server.")
     httpd = simple_server.make_server('0.0.0.0', 8000, APP)
     httpd.serve_forever()
+
+if __name__ == '__main__':
+    run()
\ No newline at end of file
diff --git a/displacy_service/server.py b/displacy_service/server.py
index 33ef440..f3e1360 100644
--- a/displacy_service/server.py
+++ b/displacy_service/server.py
@@ -164,10 +164,11 @@ def on_post(self, req, resp):
         text = json_data.get('text')
         model_name = json_data.get('model', 'en')
         include_sentences = json_data.get('include_sentences', False)
-        attr_filter = json_data.get('attr_filter', [])
+        token_filter = json_data.get('token_filter', [])
+        sentence_filter = json_data.get('sentence_filter', [])
         try:
             model = get_model(model_name)
-            tokens = Tokens(model, text, include_sentences,attr_filter)
+            tokens = Tokens(model, text, include_sentences,token_filter,sentence_filter)
             resp.body = json.dumps(tokens.to_json(),
                                    indent=2)
             resp.content_type = 'application/json'
diff --git a/displacy_service_tests/test_server.py b/displacy_service_tests/test_server.py
index 13edffc..3e97997 100644
--- a/displacy_service_tests/test_server.py
+++ b/displacy_service_tests/test_server.py
@@ -50,7 +50,7 @@ def test_tag_with_filter(self):
             json={
                 "text": "Fed raises interest rates 0.5 percent.",
                 "model": "en",
-                "attr_filter": ["text", "start", "end", "lemma", "pos"]
+                "token_filter": ["text", "start", "end", "lemma", "pos"]
             }).json
 
         assert toks == [{'start': 0, 'end': 3, 'text': 'Fed', 'lemma': 'fed', 'pos': 'PROPN'},
@@ -68,7 +68,8 @@ def test_tag_with_sents(self):
                 "text": "This a test that should split into sentences! This is the second.",
                 "model": "en",
                 "include_sentences": True,
-                "attr_filter": ["text", "start", "end", "lemma", "pos"]
+                "token_filter": ["text", "start", "end", "lemma", "pos"],
+                "sentence_filter": ["text", "start", "end", "tokens"],
             }).json
         assert sents == [
             {'text': 'This a test that should split into sentences!',