From f6c22d8b8997e310458a6146bb34c25615c1f8d3 Mon Sep 17 00:00:00 2001 From: mbirky Date: Mon, 16 Nov 2015 11:45:58 -0800 Subject: [PATCH 1/2] Adding support for documents with feed link that needs added to entry url --- krill/krill.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/krill/krill.py b/krill/krill.py index fed59c2..ed591e6 100755 --- a/krill/krill.py +++ b/krill/krill.py @@ -66,13 +66,18 @@ def get_feed_items(self, xml, url): feed_data = feedparser.parse(xml) # Default to feed URL if no title element is present feed_title = feed_data.feed.get("title", url) + feed_link = feed_data.feed.get("link") + if feed_link is None: + feed_link = '' + if feed_link.endswith("/"): + feed_link = feed_link[:-1] for entry in feed_data.entries: time = datetime.fromtimestamp(calendar.timegm(entry.published_parsed)) \ if "published_parsed" in entry else None title = entry.get("title") text = self._html_to_text(entry.description) if "description" in entry else None - link = entry.get("link") + link = feed_link + entry.get("link") # Some feeds put the text in the title element if text is None and title is not None: From 156051a7dcef31f6c52b66c44287501f82a1d9fe Mon Sep 17 00:00:00 2001 From: mbirky Date: Mon, 16 Nov 2015 17:14:06 -0800 Subject: [PATCH 2/2] Adding checks for if the full URL is already set in the entry link --- krill/krill.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/krill/krill.py b/krill/krill.py index ed591e6..1dfff6a 100755 --- a/krill/krill.py +++ b/krill/krill.py @@ -66,6 +66,7 @@ def get_feed_items(self, xml, url): feed_data = feedparser.parse(xml) # Default to feed URL if no title element is present feed_title = feed_data.feed.get("title", url) + # Store the feed's URL in case it needs appended to the entry links feed_link = feed_data.feed.get("link") if feed_link is None: feed_link = '' @@ -77,7 +78,13 @@ def get_feed_items(self, xml, url): if "published_parsed" in entry else None title = entry.get("title") text = self._html_to_text(entry.description) if "description" in entry else None - link = feed_link + entry.get("link") + link = entry.get("link") + + # If the entry link does not have the full URL append the feed's link + if not re.match("^(http|https)://", link): + if not link.startswith("/"): + link = "/" + link + link = feed_link + link # Some feeds put the text in the title element if text is None and title is not None: