From 56da34a5e264f3fe9b198f43a976cca43052c967 Mon Sep 17 00:00:00 2001
From: yma-het <yma.het@gmail.com>
Date: Sat, 11 Jul 2015 23:15:45 +0300
Subject: [PATCH 1/3] added decoding layer for HTLM encoede unicode, tat comes
 from feedparser

---
 util.py | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/util.py b/util.py
index 61ff2a5..9cb12e1 100644
--- a/util.py
+++ b/util.py
@@ -8,6 +8,7 @@
 import urlparse
 import threading
 import feedparser
+from HTMLParser import HTMLParser
 from htmlentitydefs import name2codepoint
 from settings import settings
 
@@ -75,13 +76,75 @@ def abspath(path):
     path = os.path.abspath(path)
     path = 'file:///%s' % path.replace('\\', '/')
     return path
+
+def unescHTMLSpcChr(str):
+    return HTMLParser().unescape(str)
+
+def unescapeRSSObject(fpDict):
+    '''
+    This function tries to find all human readable
+    strings in dict, that has been returned by feedparser
+    and if string is in dict, replaces it with HTML
+    escaped symbols.
+    '''
+
+    if 'author' in fpDict:
+        fpDict['author'] = unescHTMLSpcChr(fpDict['author'])
+
+    if 'author_detail' in fpDict:
+        if 'name' in fpDict['author_detail']:
+            fpDict['author_detail']['name'] = unescHTMLSpcChr(fpDict['author_detail']['name'])
+
+    if 'comments' in fpDict:
+        fpDict['comments'] = unescHTMLSpcChr(fpDict['comments'])
+
+    if 'content' in fpDict:
+        fpDict['content'] = unescHTMLSpcChr(fpDict['content'])
+
+    if 'contributors' in fpDict:
+        fpDict['contributors'] = unescHTMLSpcChr(fpDict['contributors'])
+
+    if 'summary' in fpDict:
+        fpDict['summary'] = unescHTMLSpcChr(fpDict['summary'])
+
+    if 'summary_detail' in fpDict:
+        if 'value' in fpDict['summary_detail']:
+            fpDict['summary_detail']['value'] = unescHTMLSpcChr(fpDict['summary_detail']['value'])
+
+    if 'summary_detail' in fpDict:
+        if 'value' in fpDict['summary_detail']:
+            fpDict['summary_detail']['value'] = unescHTMLSpcChr(fpDict['summary_detail']['value'])
+
+    if 'tags' in fpDict:
+        for index, tag in enumerate(fpDict['tags']):
+            if 'term' in tag:
+                fpDict['tags'][index]['term'] = unescHTMLSpcChr(fpDict['tags'][index]['term'])
+            if 'label' in tag:
+                fpDict['tags'][index]['label'] = unescHTMLSpcChr(fpDict['tags'][index]['label'])
+
+    if 'title' in fpDict:
+        fpDict['title'] = unescHTMLSpcChr(fpDict['title'])
+
+    if 'title_detail' in fpDict:
+        if 'value' in fpDict['title_detail']:
+            fpDict['title_detail']['value'] = unescHTMLSpcChr(fpDict['title_detail']['value'])
+
+    return fpDict
+
+def decodeRSS(rss):
+    for index, record in enumerate(rss):
+       rss[index] = unescapeRSSObject(record)
+    return rss
     
 def parse(url, username=None, password=None, etag=None, modified=None):
     agent = settings.USER_AGENT
     handlers = [get_proxy()]
     if username and password:
         url = insert_credentials(url, username, password)
-    return feedparser.parse(url, etag=etag, modified=modified, agent=agent, handlers=handlers)
+    response = feedparser.parse(url, etag=etag, modified=modified, agent=agent, handlers=handlers)
+    if "entries" in response:
+        response["entries"] = decodeRSS(response["entries"])
+    return response
     
 def is_valid_feed(data):
     entries = get(data, 'entries', [])

From 94da1276ee4cb8d60fb07613c2d315f5c9ac7e42 Mon Sep 17 00:00:00 2001
From: yma-het <yma.het@gmail.com>
Date: Sun, 12 Jul 2015 22:24:39 +0300
Subject: [PATCH 2/3] small code refactoring and fixed incorrect behaviour with
 none type objects

---
 util.py | 81 +++++++++++++++++++++++++++++----------------------------
 1 file changed, 41 insertions(+), 40 deletions(-)

diff --git a/util.py b/util.py
index 9cb12e1..6621179 100644
--- a/util.py
+++ b/util.py
@@ -9,6 +9,7 @@
 import threading
 import feedparser
 from HTMLParser import HTMLParser
+import operator
 from htmlentitydefs import name2codepoint
 from settings import settings
 
@@ -80,6 +81,22 @@ def abspath(path):
 def unescHTMLSpcChr(str):
     return HTMLParser().unescape(str)
 
+def keyExistsAndNotNull(fpElement, *keys):
+    if len(keys) == 1:
+        if keys[0] in fpElement:
+            if fpElement[keys[0]]:
+                return True
+        return False
+    if  len(keys) == 2:
+        if keys[0] in fpElement:
+            if fpElement[keys[0]]:
+                if keys[1] in fpElement[keys[0]]:
+                    if fpElement[keys[0]][keys[1]]:
+                        return True
+        return False
+    else:
+        raise Exception("Unknown signature of doIfExists() call!")
+
 def unescapeRSSObject(fpDict):
     '''
     This function tries to find all human readable
@@ -87,48 +104,32 @@ def unescapeRSSObject(fpDict):
     and if string is in dict, replaces it with HTML
     escaped symbols.
     '''
+    unescapeToVar = lambda item, key: operator.setitem(item, key, unescHTMLSpcChr(item[key]))
 
-    if 'author' in fpDict:
-        fpDict['author'] = unescHTMLSpcChr(fpDict['author'])
-
-    if 'author_detail' in fpDict:
-        if 'name' in fpDict['author_detail']:
-            fpDict['author_detail']['name'] = unescHTMLSpcChr(fpDict['author_detail']['name'])
-
-    if 'comments' in fpDict:
-        fpDict['comments'] = unescHTMLSpcChr(fpDict['comments'])
-
-    if 'content' in fpDict:
-        fpDict['content'] = unescHTMLSpcChr(fpDict['content'])
-
-    if 'contributors' in fpDict:
-        fpDict['contributors'] = unescHTMLSpcChr(fpDict['contributors'])
-
-    if 'summary' in fpDict:
-        fpDict['summary'] = unescHTMLSpcChr(fpDict['summary'])
-
-    if 'summary_detail' in fpDict:
-        if 'value' in fpDict['summary_detail']:
-            fpDict['summary_detail']['value'] = unescHTMLSpcChr(fpDict['summary_detail']['value'])
-
-    if 'summary_detail' in fpDict:
-        if 'value' in fpDict['summary_detail']:
-            fpDict['summary_detail']['value'] = unescHTMLSpcChr(fpDict['summary_detail']['value'])
-
-    if 'tags' in fpDict:
+    if keyExistsAndNotNull(fpDict, 'author'):
+        unescapeToVar(fpDict, 'author')
+    if keyExistsAndNotNull(fpDict, 'author_detail', 'name'):
+        unescapeToVar(fpDict['author_detail'], 'name')
+    if keyExistsAndNotNull(fpDict, 'comments'):
+        unescapeToVar(fpDict, 'comments')
+    if keyExistsAndNotNull(fpDict, 'content'):
+        unescapeToVar(fpDict, 'content')
+    if keyExistsAndNotNull(fpDict, 'contributors'):
+        unescapeToVar(fpDict, 'contributors')
+    if keyExistsAndNotNull(fpDict, 'summary'):
+        unescapeToVar(fpDict, 'summary')
+    if keyExistsAndNotNull(fpDict, 'summary_detail', 'value'):
+        unescapeToVar(fpDict['summary_detail'], 'value')
+    if keyExistsAndNotNull(fpDict, 'tags'):
         for index, tag in enumerate(fpDict['tags']):
-            if 'term' in tag:
-                fpDict['tags'][index]['term'] = unescHTMLSpcChr(fpDict['tags'][index]['term'])
-            if 'label' in tag:
-                fpDict['tags'][index]['label'] = unescHTMLSpcChr(fpDict['tags'][index]['label'])
-
-    if 'title' in fpDict:
-        fpDict['title'] = unescHTMLSpcChr(fpDict['title'])
-
-    if 'title_detail' in fpDict:
-        if 'value' in fpDict['title_detail']:
-            fpDict['title_detail']['value'] = unescHTMLSpcChr(fpDict['title_detail']['value'])
-
+            if keyExistsAndNotNull(tag, 'term'):
+                unescapeToVar(fpDict['tags'][index], 'term')
+            if keyExistsAndNotNull(tag, 'label'):
+                unescapeToVar(fpDict['tags'][index], 'label')
+    if keyExistsAndNotNull(fpDict, 'title'):
+        unescapeToVar(fpDict, 'title')
+    if keyExistsAndNotNull(fpDict, 'title_detail', 'value'):
+        unescapeToVar(fpDict['title_detail'], 'value')
     return fpDict
 
 def decodeRSS(rss):

From afdde3e997187e25b7b7d55b2db2c6ed66bd2e7f Mon Sep 17 00:00:00 2001
From: yma-het <yma.het@gmail.com>
Date: Fri, 17 Jul 2015 01:07:48 +0300
Subject: [PATCH 3/3] added support of filtering by category

---
 feeds.py   |  2 ++
 filters.py | 17 ++++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/feeds.py b/feeds.py
index ef87bb2..2b340e8 100644
--- a/feeds.py
+++ b/feeds.py
@@ -29,6 +29,7 @@ def __init__(self, feed, id):
         self.description = ''
         self.link = ''
         self.author = ''
+        self.categories = []
         self.read = False
     @property
     def time_since(self):
@@ -136,6 +137,7 @@ def poll(self, timestamp, filters):
             item.description = util.format(util.get(entry, 'description', ''), settings.POPUP_BODY_LENGTH)
             item.link = util.get(entry, 'link', '')
             item.author = util.format(util.get(entry, 'author', '')) # TODO: max length
+            item.categories = util.get(entry, 'tags', [])
             if all(filter.filter(item) for filter in filters):
                 result.append(item)
         self.clean_cache(settings.FEED_CACHE_SIZE)
diff --git a/filters.py b/filters.py
index 40da2d4..a13abdc 100644
--- a/filters.py
+++ b/filters.py
@@ -8,6 +8,7 @@
 LINK = 2
 AUTHOR = 4
 CONTENT = 8
+CATEGORY = 16
 
 TYPES = {
     None: INCLUDE,
@@ -21,6 +22,7 @@
     'link:': LINK,
     'author:': AUTHOR,
     'content:': CONTENT,
+    'category:': CATEGORY,
 }
 
 TYPE_STR = {
@@ -34,6 +36,7 @@
     LINK: 'link',
     AUTHOR: 'author',
     CONTENT: 'content',
+    CATEGORY: 'category',
 }
 
 class Rule(object):
@@ -51,6 +54,12 @@ def evaluate(self, item, ignore_case=True, whole_word=True):
             strings.append(item.author)
         if self.qualifier & CONTENT:
             strings.append(item.description)
+        if self.qualifier & CATEGORY:
+            if item.categories:
+                for category_item in item.categories:
+                    if category_item:
+                        if 'term' in category_item:
+                            strings.append(category_item['term'])
         text = '\n'.join(strings)
         word = self.word
         if ignore_case:
@@ -113,6 +122,7 @@ def __str__(self):
     'LINK',
     'AUTHOR',
     'CONTENT',
+    'CATEGORY',
     'WORD',
 ] + reserved.values()
 
@@ -136,7 +146,11 @@ def t_AUTHOR(t):
 def t_CONTENT(t):
     r'content:'
     return t
-    
+
+def t_CATEGORY(t):
+    r'category:'
+    return t
+
 def t_WORD(t):
     r'(\'[^\']+\') | (\"[^\"]+\") | ([^ \n\t\r+\-()\'"]+)'
     t.type = reserved.get(t.value, 'WORD')
@@ -197,6 +211,7 @@ def p_qualifier(t):
                  | LINK 
                  | AUTHOR 
                  | CONTENT
+                 | CATEGORY
                  | empty'''
     t[0] = t[1]