-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlab1.py
More file actions
64 lines (46 loc) · 1.7 KB
/
lab1.py
File metadata and controls
64 lines (46 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
import argparse
import re
# Replace the string value of the following variable with your names.
ME = 'Ryan Brand';
COLLABORATORS = ["nobody"]
def process_file(infile):
titles = set()
# Loop through each line of the file
for line in infile:
# This prints each line. You will not want to remove this line.
#print(line.rstrip())
# Add your code here to clean the input
title = re.search( r'<SEP>(.*)<SEP>(.*)<SEP>(.*)', line, re.M|re.I)
#print(title.group(3))
title = title.group(3)
title = re.sub( r'\[.*\]', '', title)
title = re.sub( r'\(.*\)', '', title)
title = re.sub( r'[!&\.\?;]', '', title)
# adds the clean line to the titles
# you may want to keep this line
lower_case_clean_title = title.lower()
print(lower_case_clean_title)
titles.add(lower_case_clean_title)
# loop over the cleaned titles and compute the bigram counts
bigram_count = 'SOME DATA STRUCTURE'
for title in titles:
pass
# using bigram_count, find most common word following 'word'
def most_common_word(word):
return random.choice(choices)
# return most common word
return most_common_word
# DON'T WORRY ABOUT CODE BELOW HERE, IT JUST MAKES YOUR LIVE EASIER
def get_file_name():
parser = argparse.ArgumentParser()
parser.add_argument('file_name')
return parser.parse_args().file_name
def main():
print('CSCI 305 Lab 1 submitted by %s' % ME)
print(' with help from %s\n\n' % ', '.join(COLLABORATORS))
file_name = get_file_name()
with open(file_name, 'r') as infile:
process_file(infile)
if __name__ == '__main__':
main()