From 94160dc4c77326c1d20d4bf724c0fa1bd28b10ce Mon Sep 17 00:00:00 2001 From: kjwon15 Date: Thu, 29 May 2014 23:25:33 +0900 Subject: [PATCH] Let crawler check url is reachable. --- libearth/crawler.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/libearth/crawler.py b/libearth/crawler.py index 534b097..be21470 100644 --- a/libearth/crawler.py +++ b/libearth/crawler.py @@ -7,6 +7,7 @@ import collections import logging import sys +from contextlib import closing try: import urllib.request as urllib2 @@ -24,7 +25,17 @@ from .version import VERSION -__all__ = 'CrawlError', 'CrawlResult', 'crawl', 'get_feed' +__all__ = ('CrawlError', 'CrawlResult', 'check_reachable_url', 'crawl', + 'get_feed') + + +def check_reachable_url(url): + try: + with closing(open_url(url)) as fp: + fp.read() + return True + except: + return False def open_url(url):