-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdiffbot.py
More file actions
36 lines (28 loc) · 1.19 KB
/
diffbot.py
File metadata and controls
36 lines (28 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import urllib2
import json
import keys
def extract_article(url, diffbotAPIkey):
"""Uses the Diffbot article API to fetch relevant data from the url.
Data is retruned as a json object."""
request = "http://www.diffbot.com/api/article?token="+diffbotAPIkey+"&url="+url
response = urllib2.urlopen(request).read()
response = json.loads(response)
return response
def is_article(url, diffbotAPIkey):
"""Uses the Diffbot analyze API to determine if a link is an article or blog post or not.
Returns a boolean. """
request = "http://www.diffbot.com/api/analyze?token="+diffbotAPIkey+"&url="+url
response = urllib2.urlopen(request).read()
response = json.loads(response)
if response["type"] and response["type"]=="article":
return True
else:
return False
### Example of use
def main():
url = "http://www.nytimes.com/2013/06/02/health/colonoscopies-explain-why-us-leads-the-world-in-health-expenditures.html"
diffbotAPIkey = keys.diffbotAPIkey
diffbot_response = extract_article(url, diffbotAPIkey)
print json.dumps(diffbot_response, sort_keys=True, indent=4, separators=(',', ': '))
if __name__ == "__main__":
main()