-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
343 lines (286 loc) · 13.1 KB
/
main.py
File metadata and controls
343 lines (286 loc) · 13.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
#!/usr/bin/env python3
"""
TreeRAG CLI — Folder-based, MCTS-powered document retrieval.
Folder management:
python main.py folder create "Project Alpha"
python main.py folder add "Project Alpha" doc1.pdf doc2.pdf
python main.py folder list
python main.py folder info "Project Alpha"
python main.py folder remove "Project Alpha" doc1.pdf
python main.py folder health "Project Alpha"
python main.py folder repair "Project Alpha"
python main.py folder delete "Project Alpha"
Search:
python main.py search "Project Alpha" "What was the budget?"
python main.py search-doc "Project Alpha" budget.pdf "Phase 2 costs"
python main.py interactive "Project Alpha"
Standalone:
python main.py query report.pdf "What was Q3 revenue?"
python main.py inspect .treerag_data/folders/project/indices/doc_tree.json
"""
import sys
import argparse
from pathlib import Path
from rich.console import Console
from rich.table import Table
console = Console()
def _handle_errors(func):
"""Decorator to catch TreeRAG exceptions and print friendly messages."""
def wrapper(args):
try:
return func(args)
except KeyboardInterrupt:
console.print("\n[dim]Interrupted.[/dim]")
sys.exit(1)
except Exception as e:
error_type = type(e).__name__
console.print(f"\n[bold red]Error ({error_type}):[/bold red] {e}")
sys.exit(1)
return wrapper
# =============================================================================
# Folder commands
# =============================================================================
@_handle_errors
def cmd_folder(args):
from treerag.config import TreeRAGConfig
from treerag.pipeline import TreeRAGPipeline
config = TreeRAGConfig.from_env()
pipeline = TreeRAGPipeline(config)
fm = pipeline.folder
if args.action == "create":
if not args.name:
console.print("[red]Usage: folder create <name>[/red]")
return
fm.create_folder(args.name)
elif args.action == "add":
if not args.name or not args.files:
console.print("[red]Usage: folder add <name> file1.pdf file2.pdf ...[/red]")
return
if len(args.files) == 1:
fm.add_document(args.name, args.files[0])
else:
fm.add_documents_batch(args.name, args.files)
elif args.action == "remove":
if not args.name or not args.files:
console.print("[red]Usage: folder remove <name> filename.pdf[/red]")
return
for f in args.files:
fm.remove_document(args.name, f)
elif args.action == "list":
folders = fm.list_folders()
if not folders:
console.print("[dim]No folders found. Create one: folder create <name>[/dim]")
return
table = Table(title="Folders")
table.add_column("Name", style="cyan")
table.add_column("Docs", justify="right")
table.add_column("Pages", justify="right")
for name in folders:
try:
fi = fm.load_folder(name)
table.add_row(name, str(fi.total_documents), str(fi.total_pages))
except Exception:
table.add_row(name, "?", "?")
console.print(table)
elif args.action == "info":
if not args.name:
console.print("[red]Usage: folder info <name>[/red]")
return
fi = fm.load_folder(args.name)
console.print(fi.pretty_print())
elif args.action == "health":
if not args.name:
console.print("[red]Usage: folder health <name>[/red]")
return
issues = fm.health_check(args.name)
console.print(f"\n[bold]Health Check: {args.name}[/bold]")
console.print(f" [green]Healthy: {len(issues['healthy'])}[/green]")
if issues["missing_pdfs"]:
console.print(f" [red]Missing PDFs: {', '.join(issues['missing_pdfs'])}[/red]")
if issues["missing_indices"]:
console.print(f" [yellow]Missing indices: {', '.join(issues['missing_indices'])}[/yellow]")
if issues["stale_entries"]:
console.print(f" [cyan]Changed files: {', '.join(issues['stale_entries'])}[/cyan]")
if issues["orphaned_indices"]:
console.print(f" [dim]Orphaned indices: {', '.join(issues['orphaned_indices'])}[/dim]")
if not any(issues[k] for k in ["missing_pdfs", "missing_indices", "stale_entries", "orphaned_indices"]):
console.print(" [green]Everything looks good![/green]")
else:
console.print(f"\n Run 'folder repair {args.name}' to fix issues.")
elif args.action == "repair":
if not args.name:
console.print("[red]Usage: folder repair <name>[/red]")
return
remove_broken = "--remove-broken" in (args.files or [])
fm.repair_folder(args.name, remove_broken=remove_broken)
elif args.action == "refresh":
if not args.name:
console.print("[red]Usage: folder refresh <name>[/red]")
return
fm.refresh_folder(args.name)
elif args.action == "delete":
if not args.name:
console.print("[red]Usage: folder delete <name>[/red]")
return
fm.delete_folder(args.name)
# =============================================================================
# Search commands
# =============================================================================
@_handle_errors
def cmd_search(args):
from treerag.config import TreeRAGConfig
from treerag.pipeline import TreeRAGPipeline
config = TreeRAGConfig.from_env()
pipeline = TreeRAGPipeline(config)
pipeline.query_folder(args.query, args.folder_name, use_vision=not args.no_vision)
@_handle_errors
def cmd_search_doc(args):
from treerag.config import TreeRAGConfig
from treerag.pipeline import TreeRAGPipeline
config = TreeRAGConfig.from_env()
pipeline = TreeRAGPipeline(config)
fm = pipeline.folder
fi = fm.load_folder(args.folder_name)
entry = fi.get_document(args.filename)
if not entry:
available = ', '.join(d.filename for d in fi.documents) or 'none'
console.print(f"[red]'{args.filename}' not found in '{args.folder_name}'. Available: {available}[/red]")
return
doc_index = fm.load_document_index(entry)
pipeline.query_document(args.query, doc_index, use_vision=not args.no_vision)
@_handle_errors
def cmd_interactive(args):
from treerag.config import TreeRAGConfig
from treerag.pipeline import TreeRAGPipeline, ChatMessage
config = TreeRAGConfig.from_env()
pipeline = TreeRAGPipeline(config)
if args.mode == "folder":
# Folder-specific interactive mode (existing)
fi = pipeline.folder.load_folder(args.target)
console.print(f"\n[bold green]Interactive mode[/bold green] — {args.target}")
console.print(f" Documents: {fi.total_documents} | Pages: {fi.total_pages}")
console.print(" Commands: 'quit', 'info', 'health'\n")
while True:
try:
query = console.input("[bold cyan]Query>[/bold cyan] ").strip()
if query.lower() in ("quit", "exit", "q"):
break
if query.lower() == "info":
console.print(fi.pretty_print())
continue
if not query:
continue
try:
pipeline.query_folder(query, args.target, use_vision=not args.no_vision)
except Exception as e:
console.print(f"[red]Error: {e}[/red]")
console.print()
except KeyboardInterrupt:
break
else:
# Unified chat mode (new) — auto-routes to folders
console.print(f"\n[bold green]TreeRAG Chat[/bold green] — auto-routes across all folders")
folders = pipeline.folder.list_folders()
console.print(f" Available folders: {', '.join(folders) if folders else 'none'}")
console.print(" Commands: 'quit', 'folders'\n")
chat_history = []
while True:
try:
query = console.input("[bold cyan]You>[/bold cyan] ").strip()
if query.lower() in ("quit", "exit", "q"):
break
if query.lower() == "folders":
for f in pipeline.folder.list_folders():
fi = pipeline.folder.load_folder(f)
console.print(f" {f}: {fi.total_documents} docs, {fi.total_pages} pages")
continue
if not query:
continue
chat_history.append(ChatMessage(role="user", content=query))
try:
response = pipeline.chat(query, chat_history, use_vision=not args.no_vision)
chat_history.append(response)
if response.folder_name:
console.print(f"\n[dim]📁 {response.folder_name}[/dim]")
console.print(f"\n{response.content}")
if response.sources:
console.print(f"\n[dim]Sources:[/dim]")
for s in response.sources:
console.print(f" [dim]{s['document']} → {s['section']} ({s['pages']}) [{s['score']:.0%}][/dim]")
if response.stats:
console.print(f"[dim]{response.stats.get('total_time', '')} | {response.stats.get('llm_calls', '')} calls | {response.stats.get('cost', '')}[/dim]")
except Exception as e:
console.print(f"[red]Error: {e}[/red]")
console.print()
except KeyboardInterrupt:
break
console.print(f"\n[dim]Session: {pipeline.usage}[/dim]")
# =============================================================================
# Standalone commands
# =============================================================================
@_handle_errors
def cmd_query(args):
from treerag.config import TreeRAGConfig
from treerag.pipeline import TreeRAGPipeline
config = TreeRAGConfig.from_env()
pipeline = TreeRAGPipeline(config)
if args.index:
doc_index = pipeline.load_index(args.index)
else:
doc_index = pipeline.index(args.pdf_path)
pipeline.query_document(args.query, doc_index, use_vision=not args.no_vision)
@_handle_errors
def cmd_inspect(args):
from treerag.models import DocumentIndex
doc = DocumentIndex.load(args.index_path)
console.print(f"\n[bold]Document:[/bold] {doc.filename} ({doc.total_pages} pages)")
console.print(f"[bold]Hash:[/bold] {doc.file_hash}")
console.print(f"[bold]Description:[/bold] {doc.description}")
if doc.root:
console.print(f"\n[bold]Tree:[/bold]")
console.print(doc.root.pretty_print())
nodes = doc.get_all_nodes()
console.print(f"\nNodes: {len(nodes)} | Leaves: {len([n for n in nodes if n.is_leaf])}")
# =============================================================================
# Main
# =============================================================================
def main():
parser = argparse.ArgumentParser(description="TreeRAG — MCTS-powered Vectorless Document Retrieval")
subparsers = parser.add_subparsers(dest="command", required=True)
# folder
p = subparsers.add_parser("folder", help="Manage folders")
p.add_argument("action", choices=["create", "add", "remove", "list", "info", "health", "repair", "refresh", "delete"])
p.add_argument("name", nargs="?")
p.add_argument("files", nargs="*")
p.set_defaults(func=cmd_folder)
# search
p = subparsers.add_parser("search", help="Search a folder")
p.add_argument("folder_name"); p.add_argument("query"); p.add_argument("--no-vision", action="store_true")
p.set_defaults(func=cmd_search)
# search-doc
p = subparsers.add_parser("search-doc", help="Search a specific document")
p.add_argument("folder_name"); p.add_argument("filename"); p.add_argument("query"); p.add_argument("--no-vision", action="store_true")
p.set_defaults(func=cmd_search_doc)
# chat (unified)
p = subparsers.add_parser("chat", help="Unified chat — auto-routes to folders")
p.add_argument("--no-vision", action="store_true")
p.set_defaults(func=cmd_interactive, mode="chat", target=None)
# interactive (folder-specific)
p = subparsers.add_parser("interactive", help="Interactive search on a specific folder")
p.add_argument("folder_name")
p.add_argument("--no-vision", action="store_true")
p.set_defaults(func=lambda a: cmd_interactive(
argparse.Namespace(mode="folder", target=a.folder_name, no_vision=a.no_vision)
))
# query (standalone)
p = subparsers.add_parser("query", help="Standalone: index + query a PDF")
p.add_argument("pdf_path"); p.add_argument("query"); p.add_argument("--index"); p.add_argument("--no-vision", action="store_true")
p.set_defaults(func=cmd_query)
# inspect
p = subparsers.add_parser("inspect", help="Inspect a saved index")
p.add_argument("index_path")
p.set_defaults(func=cmd_inspect)
args = parser.parse_args()
args.func(args)
if __name__ == "__main__":
main()