-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
130 lines (108 loc) · 4.43 KB
/
server.py
File metadata and controls
130 lines (108 loc) · 4.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 17 17:20:34 2016
@author: Rohan Kulkarni
@email : rohan.kulkarni@columbia.edu
"""
from __future__ import print_function
import sys
from flask import Flask,render_template
from mechanize import Browser
from goose import Goose
from multiprocessing import Pool,cpu_count
import math
import praw
from espncricinfo.summary import Summary
from bs4 import BeautifulSoup
from urllib import urlopen
app = Flask(__name__,static_url_path='/static')
class HackerNews():
def __init__(self,browser,goose):
self.browser_obj = browser
self.browser_obj.set_handle_robots(False)
self.browser_obj.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
self.browser_obj.open('https://news.ycombinator.com/')
self.text_map = list()
self.goose = goose
def set_filters(self,filter_words):
self.filters = filter_words
def get_links(self):
landing_page_links = list()
for link in self.browser_obj.links(url_regex="^http{1}"):
landing_page_links.append(link)
self.news_links = landing_page_links
def strip_inlinks(self):
self.news_links=self.news_links[1:-2]
def print_textmap(self):
for i,tup in enumerate(self.text_map):
print(tup,file=sys.stderr)
browser = Browser()
goose = Goose()
hn = HackerNews(browser,goose)
def extract_link(link):
global hn
global browser
global goose
try:
browser.follow_link(link)
url = browser.geturl()
article = goose.extract(url=url)
print(url, file=sys.stderr)
browser.back()
return (url,article.title,article.cleaned_text[:500])
except:
return None
def extract_reddit_link(x):
return {'title':x.title,'url':x.url,'text':goose.extract(url=x.url).cleaned_text[:500]+'...'}
class AppStatus():
def __init__(self,page_number):
self.current_page=page_number
@app.route('/')
def loadInitialResults():
hn.get_links()
hn.strip_inlinks()
numProc = cpu_count()*2
pool = Pool(processes=numProc)
initial_res = pool.map(extract_link,hn.news_links[:25])
result = [x for x in initial_res if x is not None]
news = [{'title':x[1],'url':x[0],'text':x[2]+'...'} for x in result]
obj = dict()
obj['link_data'] = news
obj['num_pages'] = range(2,int(math.ceil((float(len(result))/10.0)+1)))
return render_template('homepage.html',returnObj=obj)
@app.route('/reddit_page/')
def loadRedditResults():
reddit = praw.Reddit(user_agent='rohan_news_client')
submissions = reddit.get_subreddit('worldnews').get_hot(limit=15)
numProc = cpu_count()*2
pool = Pool(processes=numProc)
news = pool.map(extract_reddit_link,submissions)
obj = dict()
obj['link_data'] = news
obj['num_pages'] = range(2,int(math.ceil((float(len(news))/10.0)+1)))
return render_template('homepage.html',returnObj=obj)
@app.route('/live_cricket/')
def loadCricketResults():
s = Summary()
obj = dict()
match_list = list()
for match in s.all_matches:
match_list.append(match)
obj['match_data'] = match_list#grouped_list
return render_template('cricketpage.html',returnObj = obj)
@app.route('/live_football/')
def loadFootballResults():
football_page = urlopen("http://www.livescores.com").read()
football_soup=BeautifulSoup(football_page,'lxml')
#teams = set(map(lambda x:x.lower(),["Arsenal","Chelsea","Liverpool","Manchester City","Manchester United","Tottenham","Napoli","Juventus","Inter","AC Milan","Barcelona","Athletico Madrid","Real Madrid","Bayern Munich","Borussia Dortmund","Bayer Leverkusen","Monaco","Paris Saint Germain","Marseille","Spain","Germany","Argentina","Colombia","Belgium","Uruguay","Switzerland","Netherlands","Italy","England","Brazil","Chile","United States","Portugal","Greece","Bosnia and Herzegovina","Ivory Coast","Croatia","Russia","Ukraine","Cote d'Ivoire"]))
match_list = list()
obj = dict()
for data in football_soup.findAll('div',class_="row-gray"):
data_string = data.text.strip()
#if any(team.lower() in data_string.lower() for team in teams):
match_list.append(data_string)
obj['match_data'] = match_list
return render_template('footballpage.html',returnObj = obj)
if __name__ == '__main__':
app.debug = True
app.run(host='localhost',port=8078)