diff --git a/Sophie HW b/Sophie HW new file mode 100644 index 0000000..e1be084 --- /dev/null +++ b/Sophie HW @@ -0,0 +1,16 @@ +import nltk +from nltk.tokenize import word_tokenize + +# download resources +nltk.download('punkt') + +# Read file +with open("book_9.txt", "r", encoding="utf-8") as file: + text = file.read() + +# Tokenize the text +tokens = word_tokenize(text) + +# Print tokenized text +print(tokens) +