forked from neuralinfo/Assignments
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAssignment3_1_2b.py
More file actions
executable file
·55 lines (41 loc) · 1.1 KB
/
Assignment3_1_2b.py
File metadata and controls
executable file
·55 lines (41 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# David Paculdo
# W205
# Assignment 3
from boto.s3.connection import S3Connection
from boto.s3.key import Key
import os
import pymongo
import string
import ast
#Amazon AWS variables and connection
AWS_KEY=os.environ.get("AWS_ACCESS_KEY")
AWS_SECRET=os.environ.get("AWS_SECRET_KEY")
conn = S3Connection(AWS_KEY, AWS_SECRET)
bucket = conn.get_bucket("w205-assignment-2-dpaculdo")
k=Key(bucket)
filecount=0
#hardcoded rawfile to be transferred
rawfile="microsoft_OR_mojang_2015-02-07_2015-02-14_"+str(filecount)+".raw"
k.key=rawfile
#mongodb variables and connection
db_name="db_restT"
coll="tweets"
conn=pymongo.MongoClient()
db=conn[db_name]
collection=db[coll]
#Iterates through all raw Twitter data files from S3 and inserts into mongodb
while k.exists():
k.get_contents_to_filename(rawfile)
my_file=open(rawfile,"r")
for line in my_file:
#print line
linedict=ast.literal_eval(line)
collection.insert(linedict)
my_file.close()
os.remove(rawfile)
filecount+=1
rawfile="microsoft_OR_mojang_2015-02-07_2015-02-14_"+str(filecount)+".raw"
k.key=rawfile