diff --git a/any_parser/any_parser.py b/any_parser/any_parser.py index 2701fc5..101f2ca 100644 --- a/any_parser/any_parser.py +++ b/any_parser/any_parser.py @@ -6,6 +6,8 @@ from collections.abc import Iterable from io import StringIO from pathlib import Path +from .terminal_ui import TerminalParserUI + from any_parser.async_parser import AsyncParser from any_parser.batch_parser import BatchParser @@ -126,25 +128,32 @@ def parse( file_content=None, file_type=None, extract_args=None, + show_ui=False # New optional parameter ): - """Extract full content from a file synchronously. + """Extract full content from a file synchronously with optional terminal UI. Args: file_path: Path to input file file_content: Base64 encoded file content file_type: File format extension extract_args: Additional extraction parameters + show_ui: Whether to display formatted output in terminal (default: False) Returns: tuple: (result, timing_info) or (error_message, "") """ - return self._sync_parse.parse( + result, timing = self._sync_parse.parse( file_path=file_path, file_content=file_content, file_type=file_type, - extract_args=extract_args, + extract_args=extract_args ) - + + if show_ui: + TerminalParserUI().display(result) + + return result, timing + @handle_file_processing def parse_pro( self, diff --git a/any_parser/terminal_ui.py b/any_parser/terminal_ui.py new file mode 100644 index 0000000..11d5809 --- /dev/null +++ b/any_parser/terminal_ui.py @@ -0,0 +1,66 @@ +from rich.console import Console +from rich.panel import Panel +from rich.markdown import Markdown +import re + +class TerminalParserUI: + def __init__(self): + self.console = Console() + + def clean_text(self, text): + """Clean and normalize the parsed text""" + if isinstance(text, list): + text = "\n".join(text) + text = re.sub(r'\n{3,}', '\n\n', text) + return text.strip() + + def detect_sections(self, text): + """Identify sections based on headers""" + sections = [] + current_section = [] + header_level = 0 + + for line in text.split('\n'): + if line.startswith('## '): + if current_section: + sections.append(('\n'.join(current_section), header_level)) + current_section = [line[3:]] + header_level = 2 + elif line.startswith('# '): + if current_section: + sections.append(('\n'.join(current_section), header_level)) + current_section = [line[2:]] + header_level = 1 + else: + current_section.append(line) + + if current_section: + sections.append(('\n'.join(current_section), header_level)) + + return sections + + def display(self, parsed_data): + """Display parsed content with rich formatting""" + clean_text = self.clean_text(parsed_data) + sections = self.detect_sections(clean_text) + + if not sections: + self.console.print(Markdown(clean_text)) + return + + for content, level in sections: + if level == 1: + self.console.print(Panel.fit( + Markdown(content), + border_style="bright_blue", + title_align="left" + )) + elif level == 2: + self.console.print(Panel.fit( + Markdown(content), + border_style="bright_green", + title_align="left" + )) + else: + self.console.print(Markdown(content)) + self.console.print() \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..cdcad87 --- /dev/null +++ b/main.py @@ -0,0 +1,23 @@ +import os +from dotenv import load_dotenv +from any_parser import AnyParser + +# Load environment variables +load_dotenv(override=True) + +# Get the API key from the environment +example_apikey = os.getenv("CAMBIO_API_KEY") + +# Create an AnyParser instance +ap = AnyParser(api_key=example_apikey) + +# Extract content with beautiful terminal UI display +markdown, total_time = ap.parse( + file_path="./data/qa.pdf", + show_ui=True # This enables the formatted terminal display +) + +# The results will automatically display in a nice format + +# Timing +print("\nProcessing Time (seconds):", total_time) \ No newline at end of file