Skip to content
This repository was archived by the owner on Dec 15, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions any_parser/any_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from collections.abc import Iterable
from io import StringIO
from pathlib import Path
from .terminal_ui import TerminalParserUI


from any_parser.async_parser import AsyncParser
from any_parser.batch_parser import BatchParser
Expand Down Expand Up @@ -126,25 +128,32 @@ def parse(
file_content=None,
file_type=None,
extract_args=None,
show_ui=False # New optional parameter
):
"""Extract full content from a file synchronously.
"""Extract full content from a file synchronously with optional terminal UI.

Args:
file_path: Path to input file
file_content: Base64 encoded file content
file_type: File format extension
extract_args: Additional extraction parameters
show_ui: Whether to display formatted output in terminal (default: False)

Returns:
tuple: (result, timing_info) or (error_message, "")
"""
return self._sync_parse.parse(
result, timing = self._sync_parse.parse(
file_path=file_path,
file_content=file_content,
file_type=file_type,
extract_args=extract_args,
extract_args=extract_args
)


if show_ui:
TerminalParserUI().display(result)

return result, timing

@handle_file_processing
def parse_pro(
self,
Expand Down
66 changes: 66 additions & 0 deletions any_parser/terminal_ui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from rich.console import Console
from rich.panel import Panel
from rich.markdown import Markdown
import re

class TerminalParserUI:
def __init__(self):
self.console = Console()

def clean_text(self, text):
"""Clean and normalize the parsed text"""
if isinstance(text, list):
text = "\n".join(text)
text = re.sub(r'\n{3,}', '\n\n', text)
return text.strip()

def detect_sections(self, text):
"""Identify sections based on headers"""
sections = []
current_section = []
header_level = 0

for line in text.split('\n'):
if line.startswith('## '):
if current_section:
sections.append(('\n'.join(current_section), header_level))
current_section = [line[3:]]
header_level = 2
elif line.startswith('# '):
if current_section:
sections.append(('\n'.join(current_section), header_level))
current_section = [line[2:]]
header_level = 1
else:
current_section.append(line)

if current_section:
sections.append(('\n'.join(current_section), header_level))

return sections

def display(self, parsed_data):
"""Display parsed content with rich formatting"""
clean_text = self.clean_text(parsed_data)
sections = self.detect_sections(clean_text)

if not sections:
self.console.print(Markdown(clean_text))
return

for content, level in sections:
if level == 1:
self.console.print(Panel.fit(
Markdown(content),
border_style="bright_blue",
title_align="left"
))
elif level == 2:
self.console.print(Panel.fit(
Markdown(content),
border_style="bright_green",
title_align="left"
))
else:
self.console.print(Markdown(content))
self.console.print()
23 changes: 23 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import os
from dotenv import load_dotenv
from any_parser import AnyParser

# Load environment variables
load_dotenv(override=True)

# Get the API key from the environment
example_apikey = os.getenv("CAMBIO_API_KEY")

# Create an AnyParser instance
ap = AnyParser(api_key=example_apikey)

# Extract content with beautiful terminal UI display
markdown, total_time = ap.parse(
file_path="./data/qa.pdf",
show_ui=True # This enables the formatted terminal display
)

# The results will automatically display in a nice format

# Timing
print("\nProcessing Time (seconds):", total_time)