-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
40 lines (39 loc) · 1.37 KB
/
app.py
File metadata and controls
40 lines (39 loc) · 1.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from main import VectorStore
import numpy as np
vector_store = VectorStore()
sentences = [
"I eat mangoes.",
"mango is my favourite fruit.",
"mango,apple and orange are fruits.",
"fruits are good for health."
]
# Tokenization
vocabulary = set()
for sentence in sentences:
tokens = sentence.lower().split()
vocabulary.update(tokens)
# Assign unique indices to words in vocab
word_to_index = {word: i for i, word in enumerate(vocabulary)}
# Vectorization
sentence_vectors = {}
for sentence in sentences:
tokens = sentence.lower().split()
vector = np.zeros(len(vocabulary))
for token in tokens:
vector[word_to_index[token]] += 1
sentence_vectors[sentence] = vector
# Adding vector in vector store
for sentence, vector in sentence_vectors.items():
vector_store.add_vector(sentence, vector)
# Searching for similarity
query_sentence = "Mango is the best fruit."
query_vector = np.zeros(len(vocabulary))
query_tokens = query_sentence.lower().split()
for token in query_tokens:
if token in word_to_index:
query_vector[word_to_index[token]] += 1
similar_sentences = vector_store.find_similar_vector(query_vector, num_result=2)
print("Query sentence: ", query_sentence)
print("Similar sentences: ")
for sentence, similarity in similar_sentences:
print(f"{sentence}: Similarity = {similarity : .4f}")