-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathml.py
More file actions
113 lines (78 loc) · 2.21 KB
/
ml.py
File metadata and controls
113 lines (78 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# -*- coding: utf-8 -*-
"""testcreator.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1XnaQQ204P_ElstfoJcAm5XfyvHgHymTE
# Import
"""
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
import re
import pprint
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
import random
"""# Input"""
para= "With a satisfied expression he regarded the field of ripe corn with its flowers, draped in a curtain of rain. But suddenly a strong wind began to blow and along with the rain very large hailstones began to fall. These truly did resemble new silver coins. The boys, exposing themselves to the rain, ran out to collect the frozen pearls."
"""# Functions
"""
def listToString(s):
str1 = " "
return (str1.join(s))
def remove_punc(desc):
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'\w+')
desc= tokenizer.tokenize(desc)
desc= listToString(desc)
desc=desc.lower()
return(desc)
def remove_stopwords(desc):
stop_words = stopwords.words("english")
blank_word = ' '.join([word for word in desc.split(' ') if word not in stop_words])
blank_word = blank_word.split()
blank_word= random.choice(blank_word)
return(blank_word)
def findandreplace(new, desc):
new.replace(desc, "_________")
return new
def onesentencefromonepara(para):
desc= random.choice(para)
para.remove(desc)
return desc
def implementall(desc):
new= desc
desc= remove_punc(desc)
desc= remove_stopwords(desc)
new= new.replace(desc, "_________")
return new
def tokenize(para):
para = nltk.tokenize.sent_tokenize(para)
return para
"""# Implementing the functions
"""
def add(para):
lis=[]
para = tokenize(para)
desc = onesentencefromonepara(para)
new = implementall(desc)
a = new
lis.append(a)
desc= onesentencefromonepara(para)
new= implementall(desc)
a= new
lis.append(a)
desc= onesentencefromonepara(para)
new= implementall(desc)
a= new
lis.append(a)
desc= onesentencefromonepara(para)
new= implementall(desc)
a= new
lis.append(a)
desc= onesentencefromonepara(para)
new= implementall(desc)
a= new
lis.append(a)
return lis
#print(lis)