-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_epub.py
More file actions
26 lines (21 loc) · 742 Bytes
/
parse_epub.py
File metadata and controls
26 lines (21 loc) · 742 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from ebooklib import epub
from bs4 import BeautifulSoup
def extract_epub_text(file_path):
book = epub.read_epub(file_path)
text_segments = []
for item in book.get_items():
if isinstance(item, epub.EpubHtml):
content = item.get_content()
soup = BeautifulSoup(content, 'html.parser')
text = soup.get_text()
text_segments.append(text)
return text_segments
if __name__ == "__main__":
epub_path = input("Enter the path to the EPUB file: ")
segments = extract_epub_text(epub_path)
book = ""
for segment in segments:
print(segment)
book += segment
with open ("book.txt", 'w', encoding="utf_8") as file:
file.write(book)