-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstartup.py
More file actions
47 lines (38 loc) · 1.18 KB
/
startup.py
File metadata and controls
47 lines (38 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/python3
print("Checking for nltk")
try:
import nltk
except ImportError:
print("You should install nltk before continuing")
print("Checking for numpy")
try:
import numpy
except ImportError:
print("You should install numpy before continuing")
print("Checking for scipy")
try:
import scipy
except:
print("You should install scipy before continuing")
print("Checking for sklearn")
try:
import sklearn
except:
print("You should install sklearn before continuing")
print("Downloading the Enron dataset (this may take a while)")
print("To check on progress, you can cd up one level, then execute <ls -lthr>")
print("Enron dataset should be last item on the list, along with its current size")
print("Download will complete at about 1.82 GB")
import requests
url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
filename = "../enron_mail_20150507.tar.gz"
with open(filename, "wb") as f:
r = requests.get(url)
f.write(r.content)
print("Download Complete!")
print("Unzipping Enron dataset (This may take a while)")
import tarfile
tfile = tarfile.open("../enron_mail_20150507.tar.gz")
tfile.extractall(".")
tfile.close()
print("You're ready to go!")