Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
543 changes: 245 additions & 298 deletions src/parxy_cli/commands/attach.py

Large diffs are not rendered by default.

219 changes: 40 additions & 179 deletions src/parxy_cli/commands/pdf.py
Original file line number Diff line number Diff line change
@@ -1,119 +1,18 @@
"""PDF manipulation commands."""

import re
from pathlib import Path
from typing import List, Annotated, Optional, Tuple
from typing import List, Annotated, Optional

import typer
import pymupdf

from parxy_cli.console.console import Console
from parxy_cli.services import PdfService, collect_pdf_files_with_ranges

app = typer.Typer()

console = Console()


def parse_input_with_pages(
input_str: str,
) -> Tuple[str, Optional[int], Optional[int]]:
"""
Parse input string to extract file path and page range.

Supports formats:
- file.pdf[1] - single page (1-based)
- file.pdf[:2] - from start to page 2 (1-based, inclusive)
- file.pdf[3:] - from page 3 to end (1-based)
- file.pdf[3:5] - from page 3 to 5 (1-based, inclusive)
- file.pdf - all pages

Args:
input_str: Input string with optional page range

Returns:
Tuple of (file_path, from_page, to_page) where pages are 0-based for PyMuPDF.
from_page and to_page are None if no range specified or represent the range to use.
"""
# Match pattern: filename[range]
pattern = r'^(.+?)\[([^\]]+)\]$'
match = re.match(pattern, input_str)

if not match:
# No page range specified
return input_str, None, None

file_path = match.group(1)
page_range = match.group(2)

# Parse the page range
if ':' in page_range:
# Range format [start:end]
parts = page_range.split(':', 1)
start_str = parts[0].strip()
end_str = parts[1].strip()

# Convert to 0-based indices
# PyMuPDF uses 0-based indexing
from_page = (int(start_str) - 1) if start_str else 0
to_page = (int(end_str) - 1) if end_str else None # None means last page

else:
# Single page [n]
page_num = int(page_range) - 1 # Convert to 0-based
from_page = page_num
to_page = page_num

return file_path, from_page, to_page


def collect_pdf_files_with_ranges(
inputs: List[str],
) -> List[Tuple[Path, Optional[int], Optional[int]]]:
"""
Collect PDF files from the input list with optional page ranges.

For folders, only files in the exact directory are collected (non-recursive).
For files with page ranges (e.g., file.pdf[1:3]), parse and extract the range.

Args:
inputs: List of file paths (with optional page ranges) and/or folder paths

Returns:
List of tuples: (Path, from_page, to_page) where pages are 0-based.
from_page and to_page are None if all pages should be included.
"""
files = []

for input_str in inputs:
# Parse the input to extract file path and page range
file_path_str, from_page, to_page = parse_input_with_pages(input_str)
path = Path(file_path_str)

if path.is_file():
# Check if it's a PDF
if path.suffix.lower() == '.pdf':
files.append((path, from_page, to_page))
else:
console.warning(f'Skipping non-PDF file: {file_path_str}')
elif path.is_dir():
# Non-recursive: only files in the given directory
# Directories cannot have page ranges
if from_page is not None or to_page is not None:
console.warning(
f'Page ranges are not supported for directories: {input_str}'
)
pdf_files = sorted(path.glob('*.pdf'))
if pdf_files:
# Add all PDFs from directory without page ranges
files.extend([(f, None, None) for f in pdf_files])
else:
console.warning(f'No PDF files found in directory: {file_path_str}')
else:
console.warning(f'Path not found: {file_path_str}')

return files


@app.command(name='pdf:merge', help='Merge multiple PDF files into a single PDF')
def merge(
inputs: Annotated[
Expand Down Expand Up @@ -205,77 +104,40 @@ def merge(
# Create output directory if it doesn't exist
output_path.parent.mkdir(parents=True, exist_ok=True)

# Merge PDFs
# Merge PDFs using service
try:
with console.shimmer(f'Merging {len(files_with_ranges)} PDF files...'):
merged_pdf = pymupdf.open()

# Display progress for each file
for file_path, from_page, to_page in files_with_ranges:
try:
pdf = pymupdf.open(file_path)

# Determine page range to insert
if from_page is None and to_page is None:
# Insert all pages
page_info = 'all pages'
merged_pdf.insert_pdf(pdf)
# Determine page range info for display
if from_page is None and to_page is None:
page_info = 'all pages'
else:
actual_from = from_page if from_page is not None else 0
actual_to = to_page if to_page is not None else 'end'

if from_page == to_page:
page_info = f'page {from_page + 1}'
elif to_page is None:
page_info = f'pages {actual_from + 1}-end'
else:
# Insert specific page range
# PyMuPDF insert_pdf uses from_page and to_page (inclusive, 0-based)
actual_from = from_page if from_page is not None else 0
actual_to = to_page if to_page is not None else (len(pdf) - 1)

# Validate page range
if actual_from < 0 or actual_from >= len(pdf):
console.warning(
f'Invalid page range for {file_path.name}: page {actual_from + 1} does not exist'
)
pdf.close()
continue

if actual_to < 0 or actual_to >= len(pdf):
console.warning(
f'Invalid page range for {file_path.name}: page {actual_to + 1} does not exist'
)
pdf.close()
continue

if actual_from > actual_to:
console.warning(
f'Invalid page range for {file_path.name}: start page {actual_from + 1} > end page {actual_to + 1}'
)
pdf.close()
continue

# Format page info for display (1-based)
if actual_from == actual_to:
page_info = f'page {actual_from + 1}'
else:
page_info = f'pages {actual_from + 1}-{actual_to + 1}'

merged_pdf.insert_pdf(
pdf, from_page=actual_from, to_page=actual_to
)

console.print(
f'[faint]⎿ [/faint] Adding {file_path.name} ({page_info})'
)
pdf.close()

except Exception as e:
console.error(f'Error processing {file_path.name}: {str(e)}')
merged_pdf.close()
raise typer.Exit(1)

# Save the merged PDF
merged_pdf.save(str(output_path))
merged_pdf.close()
page_info = f'pages {actual_from + 1}-{to_page + 1}'

console.print(
f'[faint]⎿ [/faint] Adding {file_path.name} ({page_info})'
)

# Use service to merge PDFs
PdfService.merge_pdfs(files_with_ranges, output_path)

console.newline()
console.success(
f'Successfully merged {len(files_with_ranges)} files into {output_path}'
)

except (ValueError, FileNotFoundError) as e:
console.error(f'Error during merge: {str(e)}')
raise typer.Exit(1)
except Exception as e:
console.error(f'Error during merge: {str(e)}')
raise typer.Exit(1)
Expand Down Expand Up @@ -353,14 +215,17 @@ def split(
if prefix is None:
prefix = input_path.stem

# Open and process the PDF
# Split PDF using service
try:
# Get page count first to display info
import pymupdf

pdf = pymupdf.open(input_path)
total_pages = len(pdf)
pdf.close()

if total_pages == 0:
console.error('PDF file is empty (no pages)', panel=True)
pdf.close()
raise typer.Exit(1)

console.info(
Expand All @@ -371,27 +236,23 @@ def split(
)

with console.shimmer(f'Splitting PDF...'):
output_files = []

# Split into individual pages
for page_num in range(total_pages):
output_file = output_path / f'{prefix}_page_{page_num + 1}.pdf'
output_pdf = pymupdf.open()
output_pdf.insert_pdf(pdf, from_page=page_num, to_page=page_num)
output_pdf.save(str(output_file))
output_pdf.close()
output_files.append(output_file)
# Use service to split PDF
output_files = PdfService.split_pdf(input_path, output_path, prefix)

# Display created files
for idx, output_file in enumerate(output_files):
console.print(
f'[faint]⎿ [/faint] Created {output_file.name} (page {page_num + 1})'
f'[faint]⎿ [/faint] Created {output_file.name} (page {idx + 1})'
)

pdf.close()

console.newline()
console.success(
f'Successfully split PDF into {len(output_files)} file{"s" if len(output_files) > 1 else ""} in {output_path}'
)

except (ValueError, FileNotFoundError) as e:
console.error(f'Error during split: {str(e)}')
raise typer.Exit(1)
except Exception as e:
console.error(f'Error during split: {str(e)}')
raise typer.Exit(1)
19 changes: 19 additions & 0 deletions src/parxy_cli/services/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""PDF manipulation services."""

from parxy_cli.services.pdf_service import PdfService
from parxy_cli.services.pdf_utils import (
format_file_size,
validate_pdf_file,
is_binary_file,
parse_input_with_pages,
collect_pdf_files_with_ranges,
)

__all__ = [
'PdfService',
'format_file_size',
'validate_pdf_file',
'is_binary_file',
'parse_input_with_pages',
'collect_pdf_files_with_ranges',
]
Loading