Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
# python-knowledge-graph
A Python and scaCy implementation of a basic Knowledge Graph.
See more details here in this blog post: https://programmerbackpack.com/python-nlp-tutorial-information-extraction-and-knowledge-graphs/

## Python library setup

Run the following commands

`pip3 install -r requirements.txt`

`python3 -m spacy download en_core_web_sm`

## Start the program
`python3 knowledgegraph.py`
17 changes: 13 additions & 4 deletions knowledgegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,32 @@
import networkx as nx
import matplotlib.pyplot as plt


def getSentences(text):
nlp = English()
nlp.add_pipe(nlp.create_pipe('sentencizer'))
nlp.add_pipe('sentencizer')
document = nlp(text)
return [sent.string.strip() for sent in document.sents]
return [str(sent).strip() for sent in document.sents]


def printToken(token):
print(token.text, "->", token.dep_)


def appendChunk(original, chunk):
return original + ' ' + chunk


def isRelationCandidate(token):
deps = ["ROOT", "adj", "attr", "agent", "amod"]
return any(subs in token.dep_ for subs in deps)


def isConstructionCandidate(token):
deps = ["compound", "prep", "conj", "mod"]
return any(subs in token.dep_ for subs in deps)


def processSubjectObjectPairs(tokens):
subject = ''
object = ''
Expand All @@ -49,13 +55,15 @@ def processSubjectObjectPairs(tokens):
object = appendChunk(objectConstruction, object)
objectConstruction = ''

print (subject.strip(), ",", relation.strip(), ",", object.strip())
print(subject.strip(), ",", relation.strip(), ",", object.strip())
return (subject.strip(), relation.strip(), object.strip())


def processSentence(sentence):
tokens = nlp_model(sentence)
return processSubjectObjectPairs(tokens)


def printGraph(triples):
G = nx.Graph()
for triple in triples:
Expand All @@ -73,6 +81,7 @@ def printGraph(triples):
plt.axis('off')
plt.show()


if __name__ == "__main__":

text = "London is the capital and largest city of England and the United Kingdom. Standing on the River " \
Expand All @@ -91,7 +100,7 @@ def printGraph(triples):
nlp_model = spacy.load('en_core_web_sm')

triples = []
print (text)
print(text)
for sentence in sentences:
triples.append(processSentence(sentence))

Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
spacy==3.1.0
matplotlib==3.4.2
networkx==2.5.1