diff --git a/README.md b/README.md index a45c796..53bfc5c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,14 @@ # python-knowledge-graph A Python and scaCy implementation of a basic Knowledge Graph. See more details here in this blog post: https://programmerbackpack.com/python-nlp-tutorial-information-extraction-and-knowledge-graphs/ + +## Python library setup + +Run the following commands + +`pip3 install -r requirements.txt` + +`python3 -m spacy download en_core_web_sm` + +## Start the program +`python3 knowledgegraph.py` \ No newline at end of file diff --git a/knowledgegraph.py b/knowledgegraph.py index c69ee4c..e2d30ca 100644 --- a/knowledgegraph.py +++ b/knowledgegraph.py @@ -3,26 +3,32 @@ import networkx as nx import matplotlib.pyplot as plt + def getSentences(text): nlp = English() - nlp.add_pipe(nlp.create_pipe('sentencizer')) + nlp.add_pipe('sentencizer') document = nlp(text) - return [sent.string.strip() for sent in document.sents] + return [str(sent).strip() for sent in document.sents] + def printToken(token): print(token.text, "->", token.dep_) + def appendChunk(original, chunk): return original + ' ' + chunk + def isRelationCandidate(token): deps = ["ROOT", "adj", "attr", "agent", "amod"] return any(subs in token.dep_ for subs in deps) + def isConstructionCandidate(token): deps = ["compound", "prep", "conj", "mod"] return any(subs in token.dep_ for subs in deps) + def processSubjectObjectPairs(tokens): subject = '' object = '' @@ -49,13 +55,15 @@ def processSubjectObjectPairs(tokens): object = appendChunk(objectConstruction, object) objectConstruction = '' - print (subject.strip(), ",", relation.strip(), ",", object.strip()) + print(subject.strip(), ",", relation.strip(), ",", object.strip()) return (subject.strip(), relation.strip(), object.strip()) + def processSentence(sentence): tokens = nlp_model(sentence) return processSubjectObjectPairs(tokens) + def printGraph(triples): G = nx.Graph() for triple in triples: @@ -73,6 +81,7 @@ def printGraph(triples): plt.axis('off') plt.show() + if __name__ == "__main__": text = "London is the capital and largest city of England and the United Kingdom. Standing on the River " \ @@ -91,7 +100,7 @@ def printGraph(triples): nlp_model = spacy.load('en_core_web_sm') triples = [] - print (text) + print(text) for sentence in sentences: triples.append(processSentence(sentence)) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..470c5a7 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +spacy==3.1.0 +matplotlib==3.4.2 +networkx==2.5.1 \ No newline at end of file