-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathprocess_stores.py
More file actions
77 lines (67 loc) · 2.33 KB
/
process_stores.py
File metadata and controls
77 lines (67 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from __future__ import division
import csv
import re
import os
"""
TODO:
Add the ratios
Sort output by ratios
Make HTML output
Add links to html
add links to the set of tweets downloaded
cut people who have less than X qualifying tweets
"""
def process_file(filename, database, cutoff_date=None):
# So what do we want?
total_tweets = 0
tweets_with_numbers = 0
tweets_with_numbers_and_links = 0
# Read the CSV
f = open(filename, 'rt')
#print filename
try:
reader = csv.reader(f)
headings = next(reader, None)
for tweet in reader:
total_tweets += 1
if re.match(".* \\d+,*\\d* .*", tweet[2]):
tweets_with_numbers += 1
if "http" in tweet[2]:
tweets_with_numbers_and_links += 1
if tweets_with_numbers==0:
ratio=0
else:
ratio=tweets_with_numbers_and_links/tweets_with_numbers
print headings
print "here"
database[
headings[0]] = (
headings[1],
total_tweets,
tweets_with_numbers,
tweets_with_numbers_and_links,
ratio)
#print "there"
except TypeError:
pass
finally:
f.close()
return (total_tweets,
tweets_with_numbers,
tweets_with_numbers_and_links)
def populate_database():
import glob
database = {}
for f in glob.glob('full/*.csv'):
process_file(f, database)
return database
def produce_html(database):
# For each filename
processed_mps=database.iteritems()
processed_mps=sorted(processed_mps,key=lambda k:k[1][4], reverse=True)
rank=0
for key, value in processed_mps:
rank+=1
# print "{} (@{}): {}, {}, {} {:.2f}%".format(value[0], key, value[1], value[2], value[3], value[4]*100)
print "<tr><td>{}</td><td>{} </td><td> ({}) </td><td> {} </td><td> {} </td><td> {} </td><td> {:.2f}% </td></tr>".format(rank,value[0], key, value[1], value[2], value[3], value[4]*100)
produce_html(populate_database())