llm_app/17_entity_similarity.py at master · behoss/llm_app · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import spacy
from sentence_transformers import SentenceTransformer, util

# Load spaCy's NER model
nlp = spacy.load("en_core_web_trf")


# Extract named entities
def extract_entities(text):
    doc = nlp(text)
    return [ent.text for ent in doc.ents]  # Extracts entity text only


def compare_entities(entities1, entities2):
    model = SentenceTransformer("all-MiniLM-L6-v2")
    embeddings1 = model.encode(entities1, convert_to_tensor=True)
    embeddings2 = model.encode(entities2, convert_to_tensor=True)

    similarities = util.pytorch_cos_sim(embeddings1, embeddings2)
    return similarities.cpu().numpy()


sentence1 = "My product is the book Prompting Happiness, which explores the pursuit of happiness in the age of AI."
sentence2 = "Prompting Happiness is a book that explores the intersection of AI and human happiness."

entities1 = extract_entities(sentence1)
entities2 = extract_entities(sentence2)

if entities1 and entities2:
    similarity_matrix = compare_entities(entities1, entities2)
    print("\nEntities 1:", entities1)
    print("Entities 2:", entities2)
    print("Similarity Matrix:\n", similarity_matrix, "\n")
else:
    print("No named entities found in one or both sentences.")