diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..2b0fdfe --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +# DeepSeek API Configuration +# Get your API key from: https://platform.deepseek.com/api_keys + +OPENAI_API_KEY=your_api_key_here +OPENAI_BASE_URL=https://api.deepseek.com +OPENAI_MODEL=deepseek-chat diff --git a/.gitignore b/.gitignore index 9e1d25d..03392c4 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,36 @@ wheels/ # Virtual environments .venv +# IDE settings +.vscode/ +.idea/ + # Custom -*_data/ *.epub + +# Books directory (but keep the folder structure) +books/* +!books/.gitkeep + +# Temp directory for uploads +temp/ + +# AI Features & Data +.env +reader_data.db +test.db + +# Backup files +backups/ +*.db.backup + +# Export files +reader_data_*.json +highlights_*.csv +ai_analyses_*.csv +report_*.txt + +# OS files +.DS_Store +Thumbs.db +desktop.ini diff --git a/README.md b/README.md index 5d868d7..1600a37 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,171 @@ -# reader 3 +# Reader3 - EPUB Reader with AI Analysis -![reader3](reader3.png) +A lightweight, self-hosted EPUB reader with integrated AI analysis capabilities. -A lightweight, self-hosted EPUB reader that lets you read through EPUB books one chapter at a time. This makes it very easy to copy paste the contents of a chapter to an LLM, to read along. Basically - get epub books (e.g. [Project Gutenberg](https://www.gutenberg.org/) has many), open them up in this reader, copy paste text around to your favorite LLM, and read together and along. +## Features -This project was 90% vibe coded just to illustrate how one can very easily [read books together with LLMs](https://x.com/karpathy/status/1990577951671509438). I'm not going to support it in any way, it's provided here as is for other people's inspiration and I don't intend to improve it. Code is ephemeral now and libraries are over, ask your LLM to change it in whatever way you like. +### Reading Experience +- 📚 **Clean Layout** - Three-column design (TOC, Content, AI Panel) +- 📖 **Sticky Navigation** - Top navigation bar stays visible while scrolling +- ⌨️ **Keyboard Shortcuts** - Arrow keys for prev/next chapter, ESC to close panels +- 🔗 **Internal Links** - Footnotes and author comments open in modal popups +- 🎯 **Clickable Covers** - Click book covers to start reading instantly -## Usage +### AI & Annotations +- 🤖 **AI Analysis** - Right-click on text for fact-checking or discussion (DeepSeek) +- � ***Personal Comments** - Add your own notes without AI (no API cost) +- 💾 **Manual Save** - Choose what to save to avoid clutter +- ✨ **Color-Coded Highlights** - Yellow (fact check), Blue (discussion), Green (comments) +- 🏷️ **Smart Tooltips** - Hover over highlights to see type +- 🗑️ **Edit & Delete** - Manage all your highlights and comments +- 🎨 **Markdown Support** - AI responses render with proper formatting + +### Library & Organization +- 📝 **Highlights View** - See all your notes and analyses for each book +- 📤 **Export to Markdown** - Export highlights with AI context warnings +- 🌐 **Web Upload** - Upload EPUB files via click or drag & drop +- 🖼️ **Cover Images** - Automatic cover extraction and display +- 🔍 **Search** - Find books by title or author +- 🗂️ **Organized Storage** - All books in `books/` directory, data in SQLite + +## Quick Start + +### 1. Configure API Key + +Edit `.env` file: +```bash +OPENAI_API_KEY=your_deepseek_key +OPENAI_BASE_URL=https://api.deepseek.com +OPENAI_MODEL=deepseek-chat +``` + +Get your key from: https://platform.deepseek.com/api_keys -The project uses [uv](https://docs.astral.sh/uv/). So for example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345) to this directory as `dracula.epub`, then: +### 2. Add Books +**Option A: Upload via Web Interface (Recommended)** +1. Start server: `uv run server.py` +2. Open http://127.0.0.1:8123 +3. Click the "+" card OR drag & drop EPUB file +4. Wait for automatic processing + +**Option B: Command Line** ```bash -uv run reader3.py dracula.epub +uv run reader3.py your_book.epub ``` -This creates the directory `dracula_data`, which registers the book to your local library. We can then run the server: +### 3. Start Server ```bash uv run server.py ``` -And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting the folder. It's not supposed to be complicated or complex. +### 4. Read and Analyze + +1. Open http://127.0.0.1:8123 +2. Select a book +3. Right-click on text → Choose analysis type +4. Review AI response in side panel +5. Save if important +6. Highlights appear on next visit! + +## Usage + +### AI Analysis +- Select text → Right-click → Choose: + - **📋 Fact Check** - Verify facts and get context + - **💡 Discussion** - Deep analysis and insights + - **💬 Add Comment** - Your personal notes (no AI) +- View response in right panel +- Click "Save" for important insights + +### Highlights +- **Yellow** - Fact checks +- **Blue** - Discussions +- **Green** - Your comments +- Hover to see type, click to view/edit +- All highlights are editable and deletable + +### View & Export Highlights +- Click ⋮ menu on any book → "View Highlights" +- See all your notes and analyses in one page +- Filter by type (Fact Check, Discussion, Comment) +- Export to markdown for AI processing +- Context length warnings for large exports +- Jump directly to any chapter + +### Keyboard Shortcuts +- **← →** - Navigate between chapters +- **ESC** - Close panels and modals +- Works anywhere except when typing in text fields + +## Project Structure + +``` +reader3/ +├── reader3.py # EPUB processor +├── server.py # Web server +├── database.py # SQLite operations +├── ai_service.py # AI integration +├── books/ # All book data here +│ └── book_name_data/ +│ ├── book.pkl +│ └── images/ +├── templates/ # HTML templates +├── reader_data.db # SQLite database +└── .env # API configuration +``` + +## Data Management + +### View Your Highlights +- Click ⋮ menu on any book → "View Highlights" +- See all notes, comments, and analyses in one page +- Filter by type and jump to chapters + +### View Database (Advanced) +```bash +uv run check_database.py +``` + +### Backup +```bash +# Double-click: backup.bat +# Or manually: +copy reader_data.db backups\reader_data_backup.db +``` + +## Tools + +- `check_database.py` - View raw database contents (advanced) +- `backup.bat` - Quick database backup + +## Why DeepSeek? + +- ✅ Cost-effective (¥1/M tokens input, ¥2/M output) +- ✅ Excellent Chinese language support +- ✅ Fast response in China +- ✅ OpenAI-compatible API + +## Troubleshooting + +### API Key Error +1. Check `.env` file exists and has correct key +2. Restart server + +### No Highlights Showing +1. Check browser console (F12) for errors +2. Verify data exists: `uv run check_database.py` +3. Hard refresh (Ctrl+Shift+R) + +### Server Won't Start +1. Check if port 8123 is available +2. Verify `.env` configuration ## License -MIT \ No newline at end of file +MIT + +--- + +**Note**: This project is designed to be simple and hackable. Ask your LLM to modify it however you like! diff --git a/TECHNICAL_CHALLENGES.md b/TECHNICAL_CHALLENGES.md new file mode 100644 index 0000000..21b1a86 --- /dev/null +++ b/TECHNICAL_CHALLENGES.md @@ -0,0 +1,158 @@ +# Technical Challenges Solved + +This document outlines the key technical challenges we encountered and solved while building this AI-powered EPUB reader. + +## 1. EPUB Cover Image Extraction + +**Challenge**: Cover images weren't being extracted from EPUB files. Some books had covers marked as `ITEM_COVER` type instead of `ITEM_IMAGE`, causing them to be skipped. + +**Solution**: +- Modified image extraction to handle both `ITEM_COVER` and `ITEM_IMAGE` types +- Implemented multi-method cover detection: check ITEM_COVER type → search by filename pattern → use first large image as fallback +- Added size filtering (>10KB) to avoid using small icons as covers + +**Code**: `reader3.py` lines 190-230 + +## 2. Multi-Paragraph Text Highlighting + +**Challenge**: When users highlighted text spanning multiple paragraphs, the highlight wouldn't display because wrapping `

` tags in a `` creates invalid HTML that browsers reject. + +**Solution**: +- Detect when highlighted text spans block elements +- Apply highlight class directly to the paragraph elements instead of wrapping +- Use Range API with whitespace-tolerant regex matching to handle text across multiple elements +- Normalize whitespace in search patterns to handle variations in HTML structure + +**Code**: `templates/reader.html` - `applyHighlights()` and `findTextRange()` functions + +## 3. FastAPI Route Ordering for Image Serving + +**Challenge**: Image URLs like `/read/{book_id}/images/{image_name}` were returning 404 because the catch-all route `/read/{book_id}/{chapter_ref:path}` was matching first. + +**Solution**: +- Moved the specific image route definition before the generic chapter route +- FastAPI matches routes in order, so more specific routes must come first +- Also fixed path handling to preserve spaces in book folder names (removed incorrect `os.path.basename()` usage) + +**Code**: `server.py` - route ordering around line 125-175 + +## 4. Reading Progress with Precise Scroll Position + +**Challenge**: +- `scrollTop` was always returning 0 when read directly +- `beforeunload` event doesn't fire reliably +- Need to track exact scroll position within chapters, not just chapter numbers + +**Solution**: +- Use scroll event listener to continuously track `currentScrollPosition` variable +- Intercept navigation clicks with `preventDefault()` to ensure save completes before navigation +- Add `pagehide` event as backup for mobile browsers +- Store both chapter index and scroll position in database +- Implement retry mechanism for scroll restoration to handle content loading delays + +**Code**: `templates/reader.html` - scroll tracking and `saveProgress()` function + +## 5. Database Schema Migration + +**Challenge**: Adding `scroll_position` column to existing `reading_progress` table without breaking existing data. + +**Solution**: +- Created migration script that checks if column exists before adding +- Used `ALTER TABLE ADD COLUMN` with `DEFAULT 0` for backward compatibility +- Gracefully handles both new installations and existing databases + +**Code**: `migrate_progress.py` + +## 6. AI Prompt Engineering for Reading Context + +**Challenge**: Generic AI prompts weren't providing useful reading assistance. Needed different types of help for different reading scenarios. + +**Solution**: +- Split into two distinct functions: + - **解释说明 (Explanation)**: Quick lookups for terms, people, events, concepts + - **深入讨论 (Discussion)**: Academic analysis with theoretical frameworks and critical thinking +- Structured prompts with clear dimensions (论点解析, 理论视角, 批判思考, 启发问题) +- Removed context parameter from fact-check to keep it focused and fast + +**Code**: `ai_service.py` - `fact_check()` and `discuss()` methods + +## 7. Dark Mode Implementation + +**Challenge**: Implementing comprehensive dark mode across all pages with proper contrast and readability. + +**Solution**: +- Used CSS class toggle (`body.dark-mode`) instead of media queries for user control +- Defined dark mode colors for every UI element including highlights, progress bars, modals +- Persisted theme preference in localStorage +- Synchronized theme across all pages (library, reader, highlights) +- Used `!important` for highlight colors to override inline styles + +**Code**: All template files - CSS dark mode sections + +## 8. TOC Auto-Scroll to Active Item + +**Challenge**: When opening a book mid-way through, the TOC sidebar didn't show the current chapter, requiring manual scrolling. + +**Solution**: +- Calculate active TOC item position using `offsetTop` +- Scroll sidebar to center the active item in viewport +- Execute after DOM load to ensure elements are rendered + +**Code**: `templates/reader.html` - TOC auto-scroll in DOMContentLoaded + +## 9. Book Detection Without Naming Convention + +**Challenge**: Initially required `_data` suffix in folder names, limiting flexibility and creating ugly folder names. + +**Solution**: +- Changed detection from filename pattern matching to presence of `book.pkl` file +- Updated library scanning to check for file existence instead of name patterns +- Maintained backward compatibility with old `_data` folders + +**Code**: `server.py` - `library_view()` function + +## 10. Whitespace-Tolerant Text Matching + +**Challenge**: Saved highlights couldn't be found when text spanned multiple paragraphs due to whitespace differences (newlines, multiple spaces). + +**Solution**: +- Created regex pattern that replaces `\s+` in search text with `\s+` pattern +- Allows flexible matching of any whitespace sequence +- Escapes special regex characters in user text before pattern creation +- Falls back to exact match first for performance + +**Code**: `templates/reader.html` - `findTextRange()` function + +--- + +## Key Technologies Used + +- **FastAPI**: Async web framework with automatic API documentation +- **SQLite**: Lightweight database for highlights and progress +- **ebooklib**: EPUB parsing and extraction +- **BeautifulSoup**: HTML processing and cleaning +- **MathJax**: Mathematical equation rendering +- **Marked.js**: Markdown rendering for AI responses +- **Jinja2**: Server-side templating +- **Vanilla JavaScript**: No framework dependencies for frontend + +## Architecture Decisions + +1. **Server-side rendering** for initial page load (SEO-friendly, fast first paint) +2. **Client-side interactivity** for highlights and AI features (responsive UX) +3. **SQLite for data** (simple, portable, no separate database server) +4. **Pickle for book data** (fast serialization, preserves Python objects) +5. **localStorage for preferences** (theme, font settings persist across sessions) +6. **Event-driven progress saving** (reliable, doesn't interfere with reading) + +## Performance Optimizations + +- **LRU cache** for book loading (avoid repeated disk reads) +- **Lazy AI service initialization** (only load when needed) +- **Async/await** throughout (non-blocking I/O) +- **keepalive flag** on fetch requests (ensures completion on page unload) +- **Debounced scroll tracking** (via event listener, not polling) + +--- + +*This document serves as a reference for understanding the technical depth and problem-solving approaches used in this project.* diff --git a/ai_service.py b/ai_service.py new file mode 100644 index 0000000..4cb6ca2 --- /dev/null +++ b/ai_service.py @@ -0,0 +1,102 @@ +""" +AI service for fact-checking and discussion. +""" +import os +import httpx +from typing import Optional + + +class AIService: + """Handles AI API calls.""" + + def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None): + self.api_key = api_key or os.getenv("OPENAI_API_KEY") + self.base_url = base_url or os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") + self.model = os.getenv("OPENAI_MODEL", "gpt-4o-mini") + + if not self.api_key: + raise ValueError("API key not provided. Set OPENAI_API_KEY environment variable.") + + async def fact_check(self, text: str, context: str = "") -> str: + """Quick explanation and fact-checking for unclear content.""" + prompt = f"""请帮我理解以下内容: + +{text} + +请根据内容类型提供相应的解释: + +**如果是专有名词/概念**:给出清晰的定义和解释 +**如果是人物**:介绍其身份、背景和重要性 +**如果是历史事件**:说明事件经过、时间、影响 +**如果是地点**:介绍其地理位置、特点、相关背景 +**如果是数据/事实陈述**:验证准确性,提供来源或背景 + +要求: +- 简洁明了,重点突出 +- 如有错误或争议,明确指出 +- 如果内容不完整或无法判断,说明需要更多上下文""" + + return await self._call_api(prompt) + + async def discuss(self, text: str, context: str = "") -> str: + """Generate insightful and academic discussion about the selected text.""" + prompt = f"""请对以下文本进行深入的学术性分析和讨论: + +{text} + +请从以下几个维度展开分析: + +**1. 核心论点解析** +- 作者的主要观点是什么? +- 论证逻辑和结构如何? +- 使用了哪些论证方法(举例、类比、引用等)? + +**2. 理论与学术视角** +- 这段文本涉及哪些学术领域或理论框架? +- 与哪些经典理论、学派或学者的观点相关? +- 在学术史或思想史上的位置如何? + +**3. 批判性思考** +- 论证是否充分?有无逻辑漏洞? +- 是否存在隐含的假设或前提? +- 可能的反驳观点是什么? + +**4. 启发性问题** +- 这段文本引发了哪些值得深入思考的问题? +- 如何将这些观点应用到其他领域或情境? +- 对当代有什么启示意义? + +要求: +- 保持学术严谨性,但避免过于晦涩 +- 提出具有启发性的问题,引导深入思考 +- 如涉及专业术语,简要解释 +- 鼓励多角度、批判性的思考""" + + return await self._call_api(prompt) + + async def _call_api(self, prompt: str) -> str: + """Make API call to OpenAI-compatible endpoint.""" + async with httpx.AsyncClient(timeout=60.0) as client: + try: + response = await client.post( + f"{self.base_url}/chat/completions", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + }, + json={ + "model": self.model, + "messages": [ + {"role": "user", "content": prompt} + ], + "temperature": 0.7 + } + ) + response.raise_for_status() + data = response.json() + return data["choices"][0]["message"]["content"] + + except httpx.HTTPError as e: + return f"API调用失败: {str(e)}" + except Exception as e: + return f"处理失败: {str(e)}" diff --git a/backup.bat b/backup.bat new file mode 100644 index 0000000..4677b45 --- /dev/null +++ b/backup.bat @@ -0,0 +1,28 @@ +@echo off +echo ======================================== +echo 备份 Reader3 数据库 +echo ======================================== +echo. + +REM 创建backups文件夹 +if not exist backups mkdir backups + +REM 生成带时间戳的文件名 +set datetime=%date:~0,4%%date:~5,2%%date:~8,2%_%time:~0,2%%time:~3,2%%time:~6,2% +set datetime=%datetime: =0% + +REM 备份数据库 +copy reader_data.db backups\reader_data_%datetime%.db + +echo. +echo ✓ 备份完成! +echo 文件: backups\reader_data_%datetime%.db +echo. + +REM 显示backups文件夹内容 +echo 现有备份: +dir /b backups\*.db + +echo. +echo ======================================== +pause diff --git a/books/.gitkeep b/books/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/check_book.py b/check_book.py new file mode 100644 index 0000000..5b2a0a0 --- /dev/null +++ b/check_book.py @@ -0,0 +1,12 @@ +import pickle +import sys +from reader3 import Book, BookMetadata, ChapterContent, TOCEntry + +book_path = sys.argv[1] if len(sys.argv) > 1 else 'books/Evicted/book.pkl' + +with open(book_path, 'rb') as f: + book = pickle.load(f) + print(f"Cover image: {book.cover_image}") + print(f"\nFirst few spine items:") + for i, item in enumerate(book.spine[:3]): + print(f" {i}: {item.href}") diff --git a/check_database.py b/check_database.py new file mode 100644 index 0000000..cb60f06 --- /dev/null +++ b/check_database.py @@ -0,0 +1,94 @@ +"""查看数据库内容""" +import sqlite3 +from datetime import datetime + +db_path = "reader_data.db" + +print("=" * 60) +print("数据库内容检查") +print("=" * 60) +print(f"\n数据库位置: {db_path}") +print() + +conn = sqlite3.connect(db_path) +cursor = conn.cursor() + +# 检查highlights表 +print("📚 Highlights (高亮) 表:") +print("-" * 60) +cursor.execute("SELECT COUNT(*) FROM highlights") +count = cursor.fetchone()[0] +print(f"总记录数: {count}") + +if count > 0: + cursor.execute(""" + SELECT id, book_id, chapter_index, + substr(selected_text, 1, 50) as text_preview, + created_at + FROM highlights + ORDER BY created_at DESC + LIMIT 5 + """) + + print("\n最近的5条记录:") + for row in cursor.fetchall(): + print(f"\nID: {row[0]}") + print(f" 书籍: {row[1]}") + print(f" 章节: {row[2]}") + print(f" 文本: {row[3]}...") + print(f" 时间: {row[4]}") + +print("\n" + "=" * 60) + +# 检查ai_analyses表 +print("🤖 AI Analyses (AI分析) 表:") +print("-" * 60) +cursor.execute("SELECT COUNT(*) FROM ai_analyses") +count = cursor.fetchone()[0] +print(f"总记录数: {count}") + +if count > 0: + cursor.execute(""" + SELECT id, highlight_id, analysis_type, + substr(prompt, 1, 50) as prompt_preview, + substr(response, 1, 100) as response_preview, + created_at + FROM ai_analyses + ORDER BY created_at DESC + LIMIT 5 + """) + + print("\n最近的5条记录:") + for row in cursor.fetchall(): + print(f"\nID: {row[0]}") + print(f" 关联高亮ID: {row[1]}") + print(f" 分析类型: {row[2]}") + print(f" 提示: {row[3]}...") + print(f" 响应: {row[4]}...") + print(f" 时间: {row[5]}") + +print("\n" + "=" * 60) + +# 统计信息 +print("📊 统计信息:") +print("-" * 60) + +cursor.execute(""" + SELECT analysis_type, COUNT(*) + FROM ai_analyses + GROUP BY analysis_type +""") +stats = cursor.fetchall() + +if stats: + print("\n按分析类型统计:") + for row in stats: + print(f" {row[0]}: {row[1]} 条") +else: + print(" 暂无数据") + +conn.close() + +print("\n" + "=" * 60) +print("✓ 检查完成") +print("=" * 60) diff --git a/database.py b/database.py new file mode 100644 index 0000000..5f52c21 --- /dev/null +++ b/database.py @@ -0,0 +1,251 @@ +""" +Database models for storing highlights and AI interactions. +""" +import sqlite3 +import json +from datetime import datetime +from typing import List, Dict, Optional +from dataclasses import dataclass, asdict + + +@dataclass +class Highlight: + """User highlight with position info.""" + id: Optional[int] = None + book_id: str = "" + chapter_index: int = 0 + selected_text: str = "" + context_before: str = "" + context_after: str = "" + created_at: str = "" + + +@dataclass +class AIAnalysis: + """AI analysis result (fact-check or discussion).""" + id: Optional[int] = None + highlight_id: int = 0 + analysis_type: str = "" # 'fact_check' or 'discussion' + prompt: str = "" + response: str = "" + created_at: str = "" + + +class Database: + """Simple SQLite database for storing highlights and AI analyses.""" + + def __init__(self, db_path: str = "reader_data.db"): + self.db_path = db_path + self.init_db() + + def init_db(self): + """Create tables if they don't exist.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + CREATE TABLE IF NOT EXISTS highlights ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + book_id TEXT NOT NULL, + chapter_index INTEGER NOT NULL, + selected_text TEXT NOT NULL, + context_before TEXT, + context_after TEXT, + created_at TEXT NOT NULL + ) + """) + + cursor.execute(""" + CREATE TABLE IF NOT EXISTS ai_analyses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + highlight_id INTEGER NOT NULL, + analysis_type TEXT NOT NULL, + prompt TEXT NOT NULL, + response TEXT NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (highlight_id) REFERENCES highlights (id) + ) + """) + + cursor.execute(""" + CREATE TABLE IF NOT EXISTS reading_progress ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + book_id TEXT NOT NULL UNIQUE, + chapter_index INTEGER NOT NULL, + scroll_position INTEGER DEFAULT 0, + last_read_at TEXT NOT NULL + ) + """) + + conn.commit() + conn.close() + + def save_highlight(self, highlight: Highlight) -> int: + """Save a highlight and return its ID.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + INSERT INTO highlights (book_id, chapter_index, selected_text, + context_before, context_after, created_at) + VALUES (?, ?, ?, ?, ?, ?) + """, ( + highlight.book_id, + highlight.chapter_index, + highlight.selected_text, + highlight.context_before, + highlight.context_after, + highlight.created_at or datetime.now().isoformat() + )) + + highlight_id = cursor.lastrowid + conn.commit() + conn.close() + + return highlight_id + + def save_analysis(self, analysis: AIAnalysis) -> int: + """Save an AI analysis and return its ID.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + INSERT INTO ai_analyses (highlight_id, analysis_type, prompt, response, created_at) + VALUES (?, ?, ?, ?, ?) + """, ( + analysis.highlight_id, + analysis.analysis_type, + analysis.prompt, + analysis.response, + analysis.created_at or datetime.now().isoformat() + )) + + analysis_id = cursor.lastrowid + conn.commit() + conn.close() + + return analysis_id + + def get_highlights_for_chapter(self, book_id: str, chapter_index: int) -> List[Dict]: + """Get all highlights for a specific chapter.""" + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(""" + SELECT * FROM highlights + WHERE book_id = ? AND chapter_index = ? + ORDER BY created_at DESC + """, (book_id, chapter_index)) + + rows = cursor.fetchall() + conn.close() + + return [dict(row) for row in rows] + + def get_all_highlights_for_book(self, book_id: str) -> List[Dict]: + """Get all highlights for a book (all chapters).""" + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(""" + SELECT * FROM highlights + WHERE book_id = ? + ORDER BY created_at DESC + """, (book_id,)) + + rows = cursor.fetchall() + conn.close() + + return [dict(row) for row in rows] + + def get_analyses_for_highlight(self, highlight_id: int) -> List[Dict]: + """Get all AI analyses for a highlight.""" + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(""" + SELECT * FROM ai_analyses + WHERE highlight_id = ? + ORDER BY created_at DESC + """, (highlight_id,)) + + rows = cursor.fetchall() + conn.close() + + return [dict(row) for row in rows] + + def update_analysis(self, analysis_id: int, response: str): + """Update an existing analysis response (for editing comments).""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + UPDATE ai_analyses + SET response = ? + WHERE id = ? + """, (response, analysis_id)) + + conn.commit() + conn.close() + + def delete_analysis(self, analysis_id: int): + """Delete an analysis and its highlight if no other analyses exist.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + # Get the highlight_id before deleting + cursor.execute("SELECT highlight_id FROM ai_analyses WHERE id = ?", (analysis_id,)) + result = cursor.fetchone() + + if result: + highlight_id = result[0] + + # Delete the analysis + cursor.execute("DELETE FROM ai_analyses WHERE id = ?", (analysis_id,)) + + # Check if there are other analyses for this highlight + cursor.execute("SELECT COUNT(*) FROM ai_analyses WHERE highlight_id = ?", (highlight_id,)) + count = cursor.fetchone()[0] + + # If no other analyses, delete the highlight too + if count == 0: + cursor.execute("DELETE FROM highlights WHERE id = ?", (highlight_id,)) + + conn.commit() + conn.close() + + def save_progress(self, book_id: str, chapter_index: int, scroll_position: int = 0): + """Save or update reading progress for a book.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + INSERT INTO reading_progress (book_id, chapter_index, scroll_position, last_read_at) + VALUES (?, ?, ?, ?) + ON CONFLICT(book_id) DO UPDATE SET + chapter_index = excluded.chapter_index, + scroll_position = excluded.scroll_position, + last_read_at = excluded.last_read_at + """, (book_id, chapter_index, scroll_position, datetime.now().isoformat())) + + conn.commit() + conn.close() + + def get_progress(self, book_id: str) -> Optional[Dict]: + """Get the last read position for a book.""" + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(""" + SELECT chapter_index, scroll_position FROM reading_progress + WHERE book_id = ? + """, (book_id,)) + + result = cursor.fetchone() + conn.close() + + return dict(result) if result else None diff --git a/list_epub_images.py b/list_epub_images.py new file mode 100644 index 0000000..e744c96 --- /dev/null +++ b/list_epub_images.py @@ -0,0 +1,35 @@ +import sys +import ebooklib +from ebooklib import epub + +if len(sys.argv) < 2: + print("Usage: python list_epub_images.py ") + sys.exit(1) + +epub_file = sys.argv[1] +book = epub.read_epub(epub_file) + +print("All images (ITEM_IMAGE type):") +print("-" * 60) +for item in book.get_items(): + if item.get_type() == ebooklib.ITEM_IMAGE: + print(f" {item.get_name()}") + print(f" Size: {len(item.get_content())} bytes") + +print("\n" + "=" * 60) +print("All items with image extensions:") +print("-" * 60) +for item in book.get_items(): + name = item.get_name().lower() + if name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg')): + print(f" {item.get_name()}") + print(f" Type: {item.get_type()}") + print(f" Size: {len(item.get_content())} bytes") + +print("\n" + "=" * 60) +print("COVER type items:") +print("-" * 60) +for item in book.get_items(): + if item.get_type() == ebooklib.ITEM_COVER: + print(f" {item.get_name()}") + print(f" Size: {len(item.get_content())} bytes") diff --git a/migrate_progress.py b/migrate_progress.py new file mode 100644 index 0000000..b064200 --- /dev/null +++ b/migrate_progress.py @@ -0,0 +1,34 @@ +""" +Migration script to add scroll_position column to reading_progress table. +""" +import sqlite3 +import os + +db_path = os.getenv("DATABASE_PATH", "reader_data.db") + +conn = sqlite3.connect(db_path) +cursor = conn.cursor() + +try: + # Check if column exists + cursor.execute("PRAGMA table_info(reading_progress)") + columns = [row[1] for row in cursor.fetchall()] + + if 'scroll_position' not in columns: + print("Adding scroll_position column...") + cursor.execute(""" + ALTER TABLE reading_progress + ADD COLUMN scroll_position INTEGER DEFAULT 0 + """) + conn.commit() + print("✓ Migration completed successfully!") + else: + print("✓ Column already exists, no migration needed.") + +except sqlite3.OperationalError as e: + if "no such table" in str(e): + print("Table doesn't exist yet, will be created on first run.") + else: + print(f"Error: {e}") +finally: + conn.close() diff --git a/pyproject.toml b/pyproject.toml index 31e6179..6480fee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,4 +10,6 @@ dependencies = [ "fastapi>=0.121.2", "jinja2>=3.1.6", "uvicorn>=0.38.0", + "httpx>=0.27.0", + "python-multipart>=0.0.6", ] diff --git a/reader3.py b/reader3.py index d0b9d3f..3b48210 100644 --- a/reader3.py +++ b/reader3.py @@ -64,6 +64,7 @@ class Book: # Meta info source_file: str processed_at: str + cover_image: Optional[str] = None # Cover image filename version: str = "3.0" @@ -187,27 +188,76 @@ def process_epub(epub_path: str, output_dir: str) -> Book: images_dir = os.path.join(output_dir, 'images') os.makedirs(images_dir, exist_ok=True) - # 4. Extract Images & Build Map + # 4. Extract Images & Build Map (including cover) print("Extracting images...") image_map = {} # Key: internal_path, Value: local_relative_path + cover_image = None + # Try to find cover image from metadata + cover_item = None + + # Method 1: Check for ITEM_COVER type (most reliable) for item in book.get_items(): - if item.get_type() == ebooklib.ITEM_IMAGE: + if item.get_type() == ebooklib.ITEM_COVER: + cover_item = item + print(f"✓ Found cover (type COVER): {item.get_name()}") + break + + # Method 2: Look for images with 'cover' or 'cvi' in the name + if not cover_item: + for item in book.get_items(): + if item.get_type() in (ebooklib.ITEM_IMAGE, ebooklib.ITEM_COVER): + name_lower = item.get_name().lower() + if 'cover' in name_lower or 'cvi' in name_lower: + cover_item = item + print(f"✓ Found cover (by name): {item.get_name()}") + break + + # Method 3: Use first large image as fallback (skip small icons/logos) + if not cover_item: + for item in book.get_items(): + if item.get_type() in (ebooklib.ITEM_IMAGE, ebooklib.ITEM_COVER): + # Skip very small images (likely icons) + if len(item.get_content()) > 10000: # > 10KB + cover_item = item + print(f"✓ Using first large image as cover: {item.get_name()}") + break + + saved_files = {} # Track saved filenames to detect collisions + + for item in book.get_items(): + # Extract both ITEM_IMAGE and ITEM_COVER types + if item.get_type() in (ebooklib.ITEM_IMAGE, ebooklib.ITEM_COVER): # Normalize filename original_fname = os.path.basename(item.get_name()) # Sanitize filename for OS safe_fname = "".join([c for c in original_fname if c.isalpha() or c.isdigit() or c in '._-']).strip() + # Handle filename collisions by adding a counter + if safe_fname in saved_files: + base, ext = os.path.splitext(safe_fname) + counter = 1 + while f"{base}_{counter}{ext}" in saved_files: + counter += 1 + safe_fname = f"{base}_{counter}{ext}" + print(f"Warning: Filename collision, renamed to {safe_fname}") + # Save to disk local_path = os.path.join(images_dir, safe_fname) with open(local_path, 'wb') as f: f.write(item.get_content()) + + saved_files[safe_fname] = item.get_name() # Map keys: We try both the full internal path and just the basename # to be robust against messy HTML src attributes rel_path = f"images/{safe_fname}" image_map[item.get_name()] = rel_path image_map[original_fname] = rel_path + + # Check if this is the cover image + if cover_item and item.get_name() == cover_item.get_name(): + cover_image = safe_fname # 5. Process TOC print("Parsing Table of Contents...") @@ -277,7 +327,8 @@ def process_epub(epub_path: str, output_dir: str) -> Book: toc=toc_structure, images=image_map, source_file=os.path.basename(epub_path), - processed_at=datetime.now().isoformat() + processed_at=datetime.now().isoformat(), + cover_image=cover_image ) return final_book @@ -292,6 +343,26 @@ def save_to_pickle(book: Book, output_dir: str): # --- CLI --- +def sanitize_folder_name(name: str) -> str: + """ + Sanitize folder name while preserving Unicode characters (including Chinese). + Only removes characters that are invalid for Windows/Unix filesystems. + """ + # Characters not allowed in Windows filenames + invalid_chars = '<>:"/\\|?*' + for char in invalid_chars: + name = name.replace(char, '_') + + # Remove leading/trailing spaces and dots + name = name.strip('. ') + + # Limit length to avoid path issues (Windows has 260 char limit) + if len(name) > 100: + name = name[:100] + + return name + + if __name__ == "__main__": import sys @@ -301,13 +372,48 @@ def save_to_pickle(book: Book, output_dir: str): epub_file = sys.argv[1] assert os.path.exists(epub_file), "File not found." - out_dir = os.path.splitext(epub_file)[0] + "_data" + + # Create books directory if it doesn't exist + books_dir = "books" + os.makedirs(books_dir, exist_ok=True) + + # First, do a quick metadata extraction to get the real title + print(f"Reading metadata from {epub_file}...") + temp_book = epub.read_epub(epub_file) + temp_metadata = extract_metadata_robust(temp_book) + + # Use the actual book title for folder name (supports Chinese!) + book_title = temp_metadata.title or os.path.splitext(os.path.basename(epub_file))[0] + safe_title = sanitize_folder_name(book_title) + out_dir = os.path.join(books_dir, safe_title) + + # If folder exists, add a number suffix + if os.path.exists(out_dir): + counter = 1 + while os.path.exists(f"{out_dir}_{counter}"): + counter += 1 + out_dir = f"{out_dir}_{counter}" + + print(f"Output directory: {out_dir}") book_obj = process_epub(epub_file, out_dir) save_to_pickle(book_obj, out_dir) - print("\n--- Summary ---") - print(f"Title: {book_obj.metadata.title}") - print(f"Authors: {', '.join(book_obj.metadata.authors)}") - print(f"Physical Files (Spine): {len(book_obj.spine)}") - print(f"TOC Root Items: {len(book_obj.toc)}") - print(f"Images extracted: {len(book_obj.images)}") + + # Use safe printing to avoid Unicode errors on Windows + try: + print("\n--- Summary ---") + print(f"Title: {book_obj.metadata.title}") + print(f"Authors: {', '.join(book_obj.metadata.authors)}") + print(f"Physical Files (Spine): {len(book_obj.spine)}") + print(f"TOC Root Items: {len(book_obj.toc)}") + print(f"Images extracted: {len(book_obj.images)}") + print(f"\nBook data saved to: {out_dir}") + except UnicodeEncodeError: + # Fallback for Windows console encoding issues + print("\n--- Summary ---") + print(f"Title: [Unicode title]") + print(f"Authors: [Unicode authors]") + print(f"Physical Files (Spine): {len(book_obj.spine)}") + print(f"TOC Root Items: {len(book_obj.toc)}") + print(f"Images extracted: {len(book_obj.images)}") + print(f"\nBook data saved to: {out_dir}") diff --git a/server.py b/server.py index 9c870dc..a10953c 100644 --- a/server.py +++ b/server.py @@ -2,19 +2,74 @@ import pickle from functools import lru_cache from typing import Optional +from datetime import datetime +from pathlib import Path -from fastapi import FastAPI, Request, HTTPException -from fastapi.responses import HTMLResponse, FileResponse +from fastapi import FastAPI, Request, HTTPException, UploadFile, File +from fastapi.responses import HTMLResponse, FileResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates +from pydantic import BaseModel +import shutil +import subprocess from reader3 import Book, BookMetadata, ChapterContent, TOCEntry +from database import Database, Highlight, AIAnalysis +from ai_service import AIService + +# Load .env file at startup +def load_env(): + """Load environment variables from .env file.""" + env_path = Path(".env") + if env_path.exists(): + print("Loading .env file...") + with open(env_path) as f: + for line in f: + line = line.strip() + if line and not line.startswith("#") and "=" in line: + key, value = line.split("=", 1) + os.environ[key.strip()] = value.strip() + print(f"✓ Loaded API configuration: {os.getenv('OPENAI_BASE_URL', 'Not set')}") + else: + print("⚠ Warning: .env file not found. AI features will not work.") + +load_env() app = FastAPI() templates = Jinja2Templates(directory="templates") +# Initialize database and AI service +db = Database() +ai_service = None # Will be initialized on first use + +def get_ai_service(): + """Lazy initialization of AI service.""" + global ai_service + if ai_service is None: + try: + ai_service = AIService() + except ValueError as e: + print(f"Warning: {e}") + return ai_service + + +# Request models +class HighlightRequest(BaseModel): + book_id: str + chapter_index: int + selected_text: str + context_before: str = "" + context_after: str = "" + + +class AIRequest(BaseModel): + highlight_id: int + analysis_type: str # 'fact_check' or 'discussion' + selected_text: str + context: str = "" + # Where are the book folders located? -BOOKS_DIR = "." +BOOKS_DIR = "books" @lru_cache(maxsize=10) def load_book_cached(folder_name: str) -> Optional[Book]: @@ -39,30 +94,95 @@ async def library_view(request: Request): """Lists all available processed books.""" books = [] - # Scan directory for folders ending in '_data' that have a book.pkl - if os.path.exists(BOOKS_DIR): - for item in os.listdir(BOOKS_DIR): - if item.endswith("_data") and os.path.isdir(item): - # Try to load it to get the title - book = load_book_cached(item) - if book: - books.append({ - "id": item, - "title": book.metadata.title, - "author": ", ".join(book.metadata.authors), - "chapters": len(book.spine) - }) + # Create books directory if it doesn't exist + os.makedirs(BOOKS_DIR, exist_ok=True) + # Scan directory for folders that have a book.pkl + for item in os.listdir(BOOKS_DIR): + item_path = os.path.join(BOOKS_DIR, item) + # Check if it's a directory and has book.pkl + if os.path.isdir(item_path) and os.path.exists(os.path.join(item_path, "book.pkl")): + # Try to load it to get the title + book = load_book_cached(item) + if book: + # Extract folder suffix if it exists (e.g., "_1", "_2") + folder_suffix = None + # Check if there's a number suffix + if item.endswith(tuple(f"_{i}" for i in range(1, 100))): + suffix_num = item.split("_")[-1] + folder_suffix = f"Copy {suffix_num}" + + # Get reading progress + progress_data = db.get_progress(item) + total_chapters = len(book.spine) + progress_percent = 0 + current_chapter = None + if progress_data: + current_chapter = progress_data['chapter_index'] + progress_percent = int((current_chapter + 1) / total_chapters * 100) + + books.append({ + "id": item, + "title": book.metadata.title, + "author": ", ".join(book.metadata.authors), + "chapters": total_chapters, + "folder_suffix": folder_suffix, + "cover": book.cover_image if hasattr(book, 'cover_image') else None, + "progress": current_chapter, + "progress_percent": progress_percent + }) return templates.TemplateResponse("library.html", {"request": request, "books": books}) @app.get("/read/{book_id}", response_class=HTMLResponse) -async def redirect_to_first_chapter(book_id: str): - """Helper to just go to chapter 0.""" - return await read_chapter(book_id=book_id, chapter_index=0) +async def redirect_to_last_position(book_id: str): + """Redirect to last read chapter or chapter 0 if new.""" + from fastapi.responses import RedirectResponse + progress_data = db.get_progress(book_id) + chapter_index = progress_data['chapter_index'] if progress_data else 0 + return RedirectResponse(url=f"/read/{book_id}/{chapter_index}", status_code=302) + +@app.get("/read/{book_id}/images/{image_name}") +async def serve_image(book_id: str, image_name: str): + """ + Serves images specifically for a book. + The HTML contains . + The browser resolves this to /read/{book_id}/images/pic.jpg. + """ + # Security check: prevent path traversal + if ".." in book_id or "/" in book_id or "\\" in book_id: + raise HTTPException(status_code=400, detail="Invalid book ID") + if ".." in image_name or "/" in image_name or "\\" in image_name: + raise HTTPException(status_code=400, detail="Invalid image name") -@app.get("/read/{book_id}/{chapter_index}", response_class=HTMLResponse) -async def read_chapter(request: Request, book_id: str, chapter_index: int): - """The main reader interface.""" + img_path = os.path.join(BOOKS_DIR, book_id, "images", image_name) + + if not os.path.exists(img_path): + raise HTTPException(status_code=404, detail="Image not found") + + return FileResponse(img_path) + +@app.get("/read/{book_id}/{chapter_ref:path}", response_class=HTMLResponse) +async def read_chapter(request: Request, book_id: str, chapter_ref: str): + """The main reader interface. Accepts either chapter index (0, 1, 2) or filename (part0008.html).""" + + # Try to parse as integer first + try: + chapter_index = int(chapter_ref) + except ValueError: + # It's a filename, need to find the corresponding chapter index + book = load_book_cached(book_id) + chapter_index = None + + # Search through spine to find matching filename + for idx, item in enumerate(book.spine): + if item.href == chapter_ref or item.href.endswith(chapter_ref): + chapter_index = idx + break + + if chapter_index is None: + raise HTTPException(status_code=404, detail=f"Chapter file '{chapter_ref}' not found") + + # Now proceed with the chapter_index book = load_book_cached(book_id) if not book: raise HTTPException(status_code=404, detail="Book not found") @@ -76,6 +196,12 @@ async def read_chapter(request: Request, book_id: str, chapter_index: int): prev_idx = chapter_index - 1 if chapter_index > 0 else None next_idx = chapter_index + 1 if chapter_index < len(book.spine) - 1 else None + # Get saved scroll position if returning to this chapter + progress_data = db.get_progress(book_id) + saved_scroll = 0 + if progress_data and progress_data['chapter_index'] == chapter_index: + saved_scroll = progress_data['scroll_position'] + return templates.TemplateResponse("reader.html", { "request": request, "book": book, @@ -83,28 +209,248 @@ async def read_chapter(request: Request, book_id: str, chapter_index: int): "chapter_index": chapter_index, "book_id": book_id, "prev_idx": prev_idx, - "next_idx": next_idx + "next_idx": next_idx, + "saved_scroll": saved_scroll }) -@app.get("/read/{book_id}/images/{image_name}") -async def serve_image(book_id: str, image_name: str): - """ - Serves images specifically for a book. - The HTML contains . - The browser resolves this to /read/{book_id}/images/pic.jpg. - """ - # Security check: ensure book_id is clean - safe_book_id = os.path.basename(book_id) - safe_image_name = os.path.basename(image_name) - img_path = os.path.join(BOOKS_DIR, safe_book_id, "images", safe_image_name) +# AI-related endpoints - if not os.path.exists(img_path): - raise HTTPException(status_code=404, detail="Image not found") +@app.post("/api/progress") +async def save_reading_progress(book_id: str, chapter_index: int, scroll_position: int = 0): + """Save reading progress.""" + try: + db.save_progress(book_id, chapter_index, scroll_position) + return {"status": "success"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/api/highlight") +async def create_highlight(req: HighlightRequest): + """Save a user highlight.""" + highlight = Highlight( + book_id=req.book_id, + chapter_index=req.chapter_index, + selected_text=req.selected_text, + context_before=req.context_before, + context_after=req.context_after, + created_at=datetime.now().isoformat() + ) + + highlight_id = db.save_highlight(highlight) + return {"highlight_id": highlight_id, "status": "success"} + + +@app.post("/api/ai/analyze") +async def analyze_text(req: AIRequest): + """Perform AI analysis (fact-check or discussion) without saving.""" + service = get_ai_service() + if not service: + raise HTTPException(status_code=500, detail="AI service not configured. Please set OPENAI_API_KEY.") + + # Call appropriate AI function + if req.analysis_type == "fact_check": + response = await service.fact_check(req.selected_text, req.context) + elif req.analysis_type == "discussion": + response = await service.discuss(req.selected_text, req.context) + else: + raise HTTPException(status_code=400, detail="Invalid analysis type") + + return { + "response": response, + "status": "success" + } + + +class SaveAnalysisRequest(BaseModel): + highlight_id: int + analysis_type: str + prompt: str + response: str + + +@app.post("/api/ai/save") +async def save_analysis(req: SaveAnalysisRequest): + """Save AI analysis to database.""" + analysis = AIAnalysis( + highlight_id=req.highlight_id, + analysis_type=req.analysis_type, + prompt=req.prompt, + response=req.response, + created_at=datetime.now().isoformat() + ) + + analysis_id = db.save_analysis(analysis) + + return { + "analysis_id": analysis_id, + "status": "success" + } + + +@app.get("/api/highlights/{book_id}/{chapter_index}") +async def get_highlights(book_id: str, chapter_index: int): + """Get all highlights for a chapter.""" + highlights = db.get_highlights_for_chapter(book_id, chapter_index) + + # Attach analyses to each highlight + for highlight in highlights: + highlight["analyses"] = db.get_analyses_for_highlight(highlight["id"]) + + return {"highlights": highlights} + + +@app.get("/highlights/{book_id}") +async def view_highlights(book_id: str, request: Request): + """View all highlights for a book.""" + try: + # Get all highlights for this book + all_highlights = db.get_all_highlights_for_book(book_id) + + # Attach analyses and flatten + highlights_with_analyses = [] + for highlight in all_highlights: + analyses = db.get_analyses_for_highlight(highlight["id"]) + if analyses: + for analysis in analyses: + highlights_with_analyses.append({ + **highlight, + "analysis_type": analysis["analysis_type"], + "response": analysis["response"], + "analysis_created_at": analysis["created_at"] + }) + else: + # Highlight without analysis + highlights_with_analyses.append({ + **highlight, + "analysis_type": None, + "response": None, + "analysis_created_at": None + }) + + # Sort by creation date (newest first) + highlights_with_analyses.sort(key=lambda x: x["created_at"], reverse=True) + + # Calculate stats + stats = { + "total": len(highlights_with_analyses), + "fact_check": sum(1 for h in highlights_with_analyses if h["analysis_type"] == "fact_check"), + "discussion": sum(1 for h in highlights_with_analyses if h["analysis_type"] == "discussion"), + "comment": sum(1 for h in highlights_with_analyses if h["analysis_type"] == "comment") + } + + # Get book title + book_title = book_id.replace("_data", "").replace("_", " ") + + return templates.TemplateResponse("highlights.html", { + "request": request, + "book_id": book_id, + "book_title": book_title, + "highlights": highlights_with_analyses, + "stats": stats + }) + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.put("/api/ai/update/{analysis_id}") +async def update_analysis(analysis_id: int, req: dict): + """Update an existing analysis (for editing comments).""" + try: + db.update_analysis(analysis_id, req.get("response", "")) + return {"status": "success"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.delete("/api/ai/delete/{analysis_id}") +async def delete_analysis(analysis_id: int): + """Delete an analysis (and its highlight if no other analyses exist).""" + try: + db.delete_analysis(analysis_id) + return {"status": "success"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.delete("/delete/{book_id}") +async def delete_book(book_id: str): + """Delete a book folder (but keep database entries).""" + try: + # Security check: ensure book_id doesn't contain path traversal + if ".." in book_id or "/" in book_id or "\\" in book_id: + raise HTTPException(status_code=400, detail="Invalid book ID") + + book_path = os.path.join(BOOKS_DIR, book_id) + + if not os.path.exists(book_path): + raise HTTPException(status_code=404, detail="Book not found") + + # Delete the book folder + shutil.rmtree(book_path) + + # Clear cache for this book + load_book_cached.cache_clear() + + return { + "message": f"Book deleted. Your highlights and analyses are preserved in the database.", + "status": "success" + } + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.post("/upload") +async def upload_book(file: UploadFile = File(...)): + """Upload and process an EPUB file.""" + # Validate file type + if not file.filename.endswith('.epub'): + raise HTTPException(status_code=400, detail="Only EPUB files are supported") + + try: + # Create temp directory if it doesn't exist + temp_dir = "temp" + os.makedirs(temp_dir, exist_ok=True) + + # Save uploaded file + temp_file_path = os.path.join(temp_dir, file.filename) + with open(temp_file_path, "wb") as buffer: + shutil.copyfileobj(file.file, buffer) + + # Process the EPUB file using reader3.py with uv + result = subprocess.run( + ["uv", "run", "reader3.py", temp_file_path], + capture_output=True, + text=True, + timeout=60 + ) + + # Clean up temp file + os.remove(temp_file_path) + + if result.returncode == 0: + # Extract book title from output + book_name = os.path.splitext(file.filename)[0] + return { + "message": f"Successfully processed '{book_name}'", + "status": "success" + } + else: + raise HTTPException( + status_code=500, + detail=f"Failed to process EPUB: {result.stderr}" + ) + + except subprocess.TimeoutExpired: + raise HTTPException(status_code=500, detail="Processing timeout (file too large?)") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) - return FileResponse(img_path) if __name__ == "__main__": import uvicorn - print("Starting server at http://127.0.0.1:8123") - uvicorn.run(app, host="127.0.0.1", port=8123) + print("Starting server at http://0.0.0.0:8123 (accessible externally if firewall/NAT allow)") + uvicorn.run(app, host="0.0.0.0", port=8123) diff --git a/templates/highlights.html b/templates/highlights.html new file mode 100644 index 0000000..38a332e --- /dev/null +++ b/templates/highlights.html @@ -0,0 +1,362 @@ + + + + + + Highlights - {{ book_title }} - My Reader with AI + + + + + + +

+
+ ← Back to Library + +

{{ book_title }}

+
All your highlights and notes
+
+
+ 📋 + {{ stats.fact_check }} 解释说明 +
+
+ 💡 + {{ stats.discussion }} Discussions +
+
+ 💬 + {{ stats.comment }} Comments +
+
+ 📝 + {{ stats.total }} Total +
+
+
+ + {% if highlights %} +
+ + + + +
+ +
+ {% for item in highlights %} +
+
+
+ {% if item.analysis_type == 'fact_check' %} + 📋 解释说明 + {% elif item.analysis_type == 'discussion' %} + 💡 深入讨论 + {% elif item.analysis_type == 'comment' %} + 💬 Comment + {% endif %} +
+
{{ item.created_at }}
+
+ +
Chapter {{ item.chapter_index + 1 }}
+ +
"{{ item.selected_text }}"
+ + {% if item.response %} +
+
+ {% if item.analysis_type == 'comment' %} + Your Note: + {% else %} + AI Analysis: + {% endif %} +
+
{{ item.response }}
+
+ {% endif %} + + +
+ {% endfor %} +
+ {% else %} +
+
📚
+
No highlights yet
+
Start reading and highlight interesting passages!
+
+ Start Reading → +
+
+ {% endif %} +
+ + + + + + + diff --git a/templates/library.html b/templates/library.html index e7d094d..b504847 100644 --- a/templates/library.html +++ b/templates/library.html @@ -3,39 +3,407 @@ - My Library + My Reader with AI + + +
-

Library

+

My Reader with AI

- {% if not books %} -

No processed books found. Run reader3.py on an epub first.

- {% endif %} + +
+ + 🔍 +
+ +
+
+
+
+
+
+ {% for book in books %} -
-
{{ book.title }}
-
- {{ book.author }}
- {{ book.chapters }} sections +
+ +
+ {% if book.cover %} + {{ book.title }} + {% else %} + {{ book.title }} + {% endif %} +
+
+
+
{{ book.title }}
+
+ {{ book.author }}
+ {{ book.chapters }} sections + {% if book.folder_suffix %} +
{{ book.folder_suffix }} + {% endif %} +
+ {% if book.progress is not none %} +
+
+
+
{{ book.progress_percent }}% complete
+ {% endif %} +
+
+ Read Book +
+ + +
- Read Book
{% endfor %} + + +
+
+
+
Add New Book
Click to upload EPUB
+
+
+ + diff --git a/templates/reader.html b/templates/reader.html index c012edc..2d4e7e4 100644 --- a/templates/reader.html +++ b/templates/reader.html @@ -3,86 +3,295 @@ - {{ book.metadata.title }} + {{ book.metadata.title }} - My Reader with AI + + + + + + - + - +
-
+ +
+ {% if prev_idx is not none %} + ← 上一章 + {% else %} + ← 上一章 + {% endif %} + + + 第 {{ chapter_index + 1 }} / {{ book.spine|length }} 节 + + + {% if next_idx is not none %} + 下一章 → + {% else %} + 下一章 → + {% endif %} + +
+ +
+
+ +
+ + +
+
+ +
+ +
+ + + + + + + + +
+
+ +
+ + +
18px
+
+ +
+ + +
1.8
+
+
+
+
+ +
{{ current_chapter.content | safe }}
+
{% if prev_idx is not none %} - ← Previous + ← 上一章 {% else %} - ← Previous + ← 上一章 {% endif %} - Section {{ chapter_index + 1 }} of {{ book.spine|length }} + 第 {{ chapter_index + 1 }} / {{ book.spine|length }} 节 {% if next_idx is not none %} - Next → + 下一章 → {% else %} - Next → + 下一章 → {% endif %}
+ + + + +
+
+ 解释说明 +
+
+ 深入讨论 +
+
+ 添加笔记 +
+
+ + + + + + + + + + diff --git a/uv.lock b/uv.lock index e2e2f80..e84c5ac 100644 --- a/uv.lock +++ b/uv.lock @@ -48,6 +48,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" }, ] +[[package]] +name = "certifi" +version = "2025.11.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, +] + [[package]] name = "click" version = "8.3.1" @@ -118,6 +127,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + [[package]] name = "idna" version = "3.11" @@ -481,6 +518,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" }, ] +[[package]] +name = "python-multipart" +version = "0.0.20" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/87/f44d7c9f274c7ee665a29b885ec97089ec5dc034c7f3fafa03da9e39a09e/python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13", size = 37158, upload-time = "2024-12-16T19:45:46.972Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" }, +] + [[package]] name = "reader3" version = "0.1.0" @@ -489,7 +535,9 @@ dependencies = [ { name = "beautifulsoup4" }, { name = "ebooklib" }, { name = "fastapi" }, + { name = "httpx" }, { name = "jinja2" }, + { name = "python-multipart" }, { name = "uvicorn" }, ] @@ -498,7 +546,9 @@ requires-dist = [ { name = "beautifulsoup4", specifier = ">=4.14.2" }, { name = "ebooklib", specifier = ">=0.20" }, { name = "fastapi", specifier = ">=0.121.2" }, + { name = "httpx", specifier = ">=0.27.0" }, { name = "jinja2", specifier = ">=3.1.6" }, + { name = "python-multipart", specifier = ">=0.0.6" }, { name = "uvicorn", specifier = ">=0.38.0" }, ]