Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added homeworks/B17423/homework1/day3-画图/UML.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3,974 changes: 3,974 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data1/2016.01.02.TXT

Large diffs are not rendered by default.

3,974 changes: 3,974 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.01.02.TXT

Large diffs are not rendered by default.

4,994 changes: 4,994 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.01.09.TXT

Large diffs are not rendered by default.

4,952 changes: 4,952 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.01.16.TXT

Large diffs are not rendered by default.

5,745 changes: 5,745 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.01.23.TXT

Large diffs are not rendered by default.

5,164 changes: 5,164 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.01.30.TXT

Large diffs are not rendered by default.

5,763 changes: 5,763 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.02.06.TXT

Large diffs are not rendered by default.

5,351 changes: 5,351 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.02.13.TXT

Large diffs are not rendered by default.

5,120 changes: 5,120 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.02.20.TXT

Large diffs are not rendered by default.

5,731 changes: 5,731 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.02.27.TXT

Large diffs are not rendered by default.

4,776 changes: 4,776 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.03.05.TXT

Large diffs are not rendered by default.

5,615 changes: 5,615 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.03.12.TXT

Large diffs are not rendered by default.

5,186 changes: 5,186 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.03.19.TXT

Large diffs are not rendered by default.

5,613 changes: 5,613 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.03.26.TXT

Large diffs are not rendered by default.

5,052 changes: 5,052 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.04.02.TXT

Large diffs are not rendered by default.

4,994 changes: 4,994 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.04.09.TXT

Large diffs are not rendered by default.

4,251 changes: 4,251 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.04.16.TXT

Large diffs are not rendered by default.

5,100 changes: 5,100 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.04.23.TXT

Large diffs are not rendered by default.

5,176 changes: 5,176 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.04.30.TXT

Large diffs are not rendered by default.

5,697 changes: 5,697 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.05.07.TXT

Large diffs are not rendered by default.

6,498 changes: 6,498 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.05.14.TXT

Large diffs are not rendered by default.

3,680 changes: 3,680 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.05.21.TXT

Large diffs are not rendered by default.

5,595 changes: 5,595 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.05.28.TXT

Large diffs are not rendered by default.

5,270 changes: 5,270 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.06.04.TXT

Large diffs are not rendered by default.

6,814 changes: 6,814 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.06.11.TXT

Large diffs are not rendered by default.

5,163 changes: 5,163 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.06.18.TXT

Large diffs are not rendered by default.

5,347 changes: 5,347 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.06.25.TXT

Large diffs are not rendered by default.

5,356 changes: 5,356 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.07.02.TXT

Large diffs are not rendered by default.

3,915 changes: 3,915 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.07.09.TXT

Large diffs are not rendered by default.

5,357 changes: 5,357 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.07.16.TXT

Large diffs are not rendered by default.

5,428 changes: 5,428 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.07.23.TXT

Large diffs are not rendered by default.

4,823 changes: 4,823 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.07.30.TXT

Large diffs are not rendered by default.

5,282 changes: 5,282 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.08.06.TXT

Large diffs are not rendered by default.

5,214 changes: 5,214 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.08.13.TXT

Large diffs are not rendered by default.

5,364 changes: 5,364 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.08.20.TXT

Large diffs are not rendered by default.

5,148 changes: 5,148 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.08.27.TXT

Large diffs are not rendered by default.

3,446 changes: 3,446 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.09.03.TXT

Large diffs are not rendered by default.

4,862 changes: 4,862 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.09.10.TXT

Large diffs are not rendered by default.

5,797 changes: 5,797 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.09.17.TXT

Large diffs are not rendered by default.

5,080 changes: 5,080 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.09.24.TXT

Large diffs are not rendered by default.

3,899 changes: 3,899 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.10.01.TXT

Large diffs are not rendered by default.

5,078 changes: 5,078 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.10.08.TXT

Large diffs are not rendered by default.

5,068 changes: 5,068 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.10.15.TXT

Large diffs are not rendered by default.

6,371 changes: 6,371 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.10.22.TXT

Large diffs are not rendered by default.

4,552 changes: 4,552 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.10.29.TXT

Large diffs are not rendered by default.

4,396 changes: 4,396 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.11.05.TXT

Large diffs are not rendered by default.

6,446 changes: 6,446 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.11.12.TXT

Large diffs are not rendered by default.

4,256 changes: 4,256 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.11.19.TXT

Large diffs are not rendered by default.

6,221 changes: 6,221 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.11.26.TXT

Large diffs are not rendered by default.

4,380 changes: 4,380 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.12.03.TXT

Large diffs are not rendered by default.

4,392 changes: 4,392 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.12.10.TXT

Large diffs are not rendered by default.

5,116 changes: 5,116 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.12.17.TXT

Large diffs are not rendered by default.

5,484 changes: 5,484 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2016.12.24.TXT

Large diffs are not rendered by default.

3,741 changes: 3,741 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.01.07.txt

Large diffs are not rendered by default.

4,584 changes: 4,584 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.01.14.txt

Large diffs are not rendered by default.

6,350 changes: 6,350 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.01.21.txt

Large diffs are not rendered by default.

4,010 changes: 4,010 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.01.28.txt

Large diffs are not rendered by default.

3,830 changes: 3,830 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.02.04.txt

Large diffs are not rendered by default.

4,081 changes: 4,081 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.02.11.txt

Large diffs are not rendered by default.

5,950 changes: 5,950 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.02.18.txt

Large diffs are not rendered by default.

10,500 changes: 10,500 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.02.25.txt

Large diffs are not rendered by default.

6,432 changes: 6,432 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.03.04.txt

Large diffs are not rendered by default.

8,990 changes: 8,990 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.03.11.txt

Large diffs are not rendered by default.

11,972 changes: 11,972 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.03.18.txt

Large diffs are not rendered by default.

6,078 changes: 6,078 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.03.25.txt

Large diffs are not rendered by default.

5,103 changes: 5,103 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.04.01.txt

Large diffs are not rendered by default.

6,390 changes: 6,390 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.04.08.txt

Large diffs are not rendered by default.

5,693 changes: 5,693 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.04.15.txt

Large diffs are not rendered by default.

4,051 changes: 4,051 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.04.22.txt

Large diffs are not rendered by default.

5,852 changes: 5,852 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.04.29.txt

Large diffs are not rendered by default.

4,441 changes: 4,441 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.05.06.txt

Large diffs are not rendered by default.

5,742 changes: 5,742 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.05.13.txt

Large diffs are not rendered by default.

6,122 changes: 6,122 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.05.20.txt

Large diffs are not rendered by default.

5,375 changes: 5,375 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/data2/2017.05.27.txt

Large diffs are not rendered by default.

140 changes: 140 additions & 0 deletions homeworks/B17423/homework1/day4-vocabulary/homework.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# -*- coding: utf-8 -*-

import codecs
import os

#1. 读取文件
#['aa', 'aaa-bbb-sds'] => ['aa', 'aaa', 'bbb', 'sds']
def word_split(words):
new_list = []
for word in words:
if '-' not in word:
new_list.append(word)
else:
lst = word.split('-')
new_list.extend(lst)
return new_list


def read_file(file_path):
f = codecs.open(file_path, 'r', "utf-8") #打开文件
lines = f.readlines()
word_list = []
for line in lines:
line = line.strip()
words = line.split(" ") #用空格分割
words = word_split(words) #用-分割
word_list.extend(words)
return word_list

def get_file_from_folder(folder_path):
file_paths = []
for root, dirs, files in os.walk(folder_path):
for file in files:
file_path = os.path.join(root, file)
file_paths.append(file_path)
return file_paths

#读取多文件里的单词
def read_files(file_paths):
final_words = []
for path in file_paths:
final_words.extend(read_file(path))
return final_words


#2.获取格式化之后的单词
def format_word(word):
fmt = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-'
for char in word:
if char not in fmt:
word = word.replace(char, '')
return word.lower()

def format_words(words):
word_list = []
for word in words:
wd = format_word(word)
if wd:
word_list.append(wd)
return word_list

#3. 统计单词数目
# {'aa':4, 'bb':1}
def statictcs_words(words):
s_word_dict = {}
for word in words:
if word in s_word_dict:
s_word_dict[word] = s_word_dict[word] + 1
else:
s_word_dict[word] = 1
#排序
sorted_dict = sorted(s_word_dict.items(), key=lambda d: d[1], reverse=True)
return sorted_dict

#获取词频
def get_rate(word_list, total_count):
rate = {}
current_count = 0
for val in word_list:
num = val[1]
current_count = current_count + num
word_rate = (float(current_count)/total_count) * 100
rate[val] = word_rate
return rate

#截取单词
def cut_words(word_rate, range):
start = range[0]*100
end = range[1]*100
cut_list = []
for val in word_rate:
if((word_rate[val]>= start) and (word_rate[val]<= end)):
cut_list.append(val[0])
return cut_list

#4.输出成csv
def print_to_csv(volcaulay_list, to_file_path, rate):
nfile = open(to_file_path, 'w+')
for val in volcaulay_list:
nfile.write("%s,%s,%0.2f\n" % (val[0], str(val[1]), rate[val]))
nfile.close()

def print_to_csv_no_rate(volcaulay_list, to_file_path):
nfile = open(to_file_path, 'w+')
for val in volcaulay_list:
nfile.write("%s,%s\n" % (val[0], str(val[1])))
nfile.close()

def main():
#1. 读取文本

is_rate = True #是否算百分比
words = read_files(get_file_from_folder('data1'))
print ('获取了未格式化的单词 %d 个' % (len(words)))

#2. 清洗文本
f_words = format_words(words)
total_word_count = len(f_words)
print ('获取了已经格式化的单词 %d 个' %(len(f_words)))

#3. 统计单词和排序
word_list = statictcs_words(f_words)

if(is_rate):
#获取词频
word_rate = get_rate(word_list, total_word_count)

# 截取这一部分的单词
start_and_end = [0.5, 0.7]
partition_words = cut_words(word_rate, start_and_end)
print(len(partition_words))

#4. 输出文件
print_to_csv(word_list, 'output/test.csv', word_rate)

else:
print_to_csv_no_rate(word_list, 'output/test.csv')

if __name__ == "__main__":
main()
Binary file not shown.
Loading