diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..b140f86 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,57 @@ +## Description + + + +## Type of Change + + + +- [ ] `feat`: New feature +- [ ] `fix`: Bug fix +- [ ] `docs`: Documentation only +- [ ] `refactor`: Code refactoring (no behavior change) +- [ ] `perf`: Performance improvement +- [ ] `test`: Adding or updating tests +- [ ] `build`: Build system or dependencies +- [ ] `ci`: CI/workflow changes +- [ ] `chore`: Maintenance tasks + +## Scope + + + +- [ ] `cli` - CLI commands and interface +- [ ] `core` - Core library functionality +- [ ] `tools` - Tool integrations (qpdf, ghostscript, etc.) +- [ ] `pdf` - PDF-specific operations +- [ ] `packaging` - Package configurations +- [ ] Other: + +## Related Issues + + + +## Checklist + +- [ ] I have read the [commit convention](./COMMIT_CONVENTION.md) +- [ ] My commits follow the conventional format: `type(scope): description` +- [ ] I have added/updated tests as appropriate +- [ ] All tests pass locally (`cargo test`) +- [ ] Code passes lint checks (`cargo clippy` and `cargo fmt`) +- [ ] I have updated documentation if needed + +## Breaking Changes + + + +- [ ] This PR contains breaking changes + + + +## Screenshots / Output + + + +``` +# Example command output +``` diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..f88b522 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,77 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + branches: ["*"] + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + +jobs: + test: + name: Test (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + + # Install dependencies for integration tests + - name: Install dependencies (Ubuntu) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y qpdf ghostscript + + - name: Install dependencies (macOS) + if: runner.os == 'macOS' + run: | + brew install qpdf ghostscript + + - name: Install dependencies (Windows) + if: runner.os == 'Windows' + run: | + choco install qpdf ghostscript --yes + + - name: Build + run: cargo build --all-targets + + - name: Run tests + run: cargo test --all-targets + + - name: Run doc tests + run: cargo test --doc + + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + with: + components: clippy, rustfmt + + - name: Cache Rust dependencies + uses: Swatinem/rust-cache@v2 + + - name: Check formatting + run: cargo fmt --all -- --check + + - name: Clippy + run: cargo clippy --all-targets --all-features -- -D warnings diff --git a/.gitignore b/.gitignore index 915a26e..21c9eac 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,6 @@ desktop.ini *.tmp *.temp + +# PDF files +*.pdf diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0183b89..b1af3fd 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -59,7 +59,7 @@ ForgeKit is organized as a Rust workspace with two main crates: - A `JobSpec` describes what you want to do (merge PDFs, resize images, etc.) - It's pure data - no execution logic -- Examples: `PdfMerge`, `PdfSplit`, `ImageConvert` (coming soon) +- Examples: `PdfMerge`, `PdfSplit`, `PdfCompress` **Tools** (`crates/core/src/tools/`) @@ -94,13 +94,13 @@ ForgeKit is organized as a Rust workspace with two main crates: } ``` -2. **Create a tool adapter** (`crates/core/src/tools/pdfcpu.rs`): +2. **Create a tool adapter** (`crates/core/src/tools/gs.rs`): ```rust - pub struct PdfcpuTool; + pub struct GsTool; - impl Tool for PdfcpuTool { - fn name(&self) -> &'static str { "pdfcpu" } + impl Tool for GsTool { + fn name(&self) -> &'static str { "gs" } fn probe(&self, config: &ToolConfig) -> Result { /* ... */ } fn version(&self, path: &PathBuf) -> Result { /* ... */ } } @@ -178,7 +178,7 @@ Keep commits focused and descriptive: Add PDF compression support - Add PdfCompress JobSpec variant -- Implement pdfcpu tool adapter +- Implement Ghostscript tool adapter - Add pdf compress CLI subcommand - Add tests for compression levels ``` diff --git a/Cargo.lock b/Cargo.lock index d175bf8..dca0647 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,6 +116,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "forgekit" version = "0.0.3" @@ -133,6 +139,7 @@ dependencies = [ "anyhow", "serde", "serde_json", + "serde_yaml", "thiserror", "uuid", ] @@ -149,12 +156,28 @@ dependencies = [ "wasip2", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "indexmap" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.2" @@ -280,6 +303,19 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "strsim" version = "0.11.1" @@ -323,6 +359,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "utf8parse" version = "0.2.2" diff --git a/README.md b/README.md index 52712a7..82d8bcf 100644 --- a/README.md +++ b/README.md @@ -104,14 +104,14 @@ Then install them based on your platform: **macOS (Homebrew):** ```bash -brew install qpdf pdfcpu tesseract ffmpeg libvips exiftool +brew install qpdf ghostscript tesseract ffmpeg libvips exiftool pip3 install ocrmypdf ``` **Windows (winget/scoop):** ```powershell -winget install qpdf.qpdf pdfcpu.pdfcpu tesseract-ocr Gyan.FFmpeg +winget install qpdf.qpdf ArtifexSoftware.GhostScript tesseract-ocr Gyan.FFmpeg scoop install libvips exiftool pip install ocrmypdf ``` @@ -121,21 +121,21 @@ pip install ocrmypdf **Debian/Ubuntu:** ```bash -sudo apt install qpdf pdfcpu tesseract-ocr ffmpeg libvips-tools libimage-exiftool-perl python3-pip +sudo apt install qpdf ghostscript tesseract-ocr ffmpeg libvips-tools libimage-exiftool-perl python3-pip pip3 install ocrmypdf ``` **Fedora/RHEL:** ```bash -sudo dnf install qpdf pdfcpu tesseract ffmpeg libvips perl-Image-ExifTool python3-pip +sudo dnf install qpdf ghostscript tesseract ffmpeg libvips perl-Image-ExifTool python3-pip pip3 install ocrmypdf ``` **Arch Linux:** ```bash -sudo pacman -S qpdf pdfcpu tesseract ffmpeg libvips perl-image-exiftool python-pip +sudo pacman -S qpdf ghostscript tesseract ffmpeg libvips perl-image-exiftool python-pip pip3 install ocrmypdf ``` @@ -148,7 +148,7 @@ pip3 install ocrmypdf - **Merge**: Combine multiple PDFs into one - **Split**: Extract pages by ranges or keywords - **Linearize**: Optimize for fast web view -- **Compress**: Reduce file size with presets (coming soon) +- **Compress**: Reduce file size with Ghostscript presets - **OCR**: Add searchable text layer (coming soon) - **Metadata**: View/edit PDF metadata (coming soon) diff --git a/ROADMAP.md b/ROADMAP.md index 09ce037..939c6fb 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,8 +1,8 @@ # ForgeKit Beta-MVP Roadmap (CLI-First) **Last Updated**: December 2025 -**Status**: Core foundation complete, PDF merge/split implemented, dependency management complete -**Current Version**: v0.0.3 +**Status**: Core foundation complete, PDF merge/split/compress/extract implemented, dependency management complete +**Current Version**: v0.0.4 ## Versioning Strategy @@ -61,20 +61,20 @@ - ✅ Documentation for dependency installation per platform - ✅ Tests for dependency checking (10 integration tests covering tool probing, platform detection, install hints, and error handling) -#### v0.0.4 - PDF Advanced Operations 📋 +#### v0.0.4 - PDF Advanced Operations ✅ -**Status**: Pending +**Status**: Completed **Deliverables**: -- PDF compress command (`pdf compress`) with preset support -- PDF linearize command (`pdf linearize`) as standalone subcommand -- PDF reorder command (`pdf reorder`) with page ordering -- PDF extract command (`pdf extract`) with page selection -- pdfcpu tool adapter (probe, version, execute) -- Preset system foundation (YAML loader and parser) -- Tests for new PDF operations -- Integration with existing pages grammar parser +- ✅ PDF compress command (`pdf compress`) with preset support +- ✅ PDF linearize command (`pdf linearize`) as standalone subcommand +- ✅ PDF reorder command (`pdf reorder`) with page ordering +- ✅ PDF extract command (`pdf extract`) with page selection +- ✅ ghostscript tool adapter (probe, version, execute) +- ✅ Preset system foundation (YAML loader and parser) +- ✅ Tests for new PDF operations +- ✅ Integration with existing pages grammar parser #### v0.0.5 - PDF OCR and Metadata 📋 @@ -221,18 +221,18 @@ - **Testing**: 20 tests passing (13 unit, 2 integration, 5 doc tests) - **Dependency Checking**: `check-deps` command implemented -**Current Version**: v0.0.3 (completed) +**Current Version**: v0.0.4 (completed) ### 🔄 In Progress -- **v0.0.3**: Dependency Management (branch: `feat/dependency-management`) +- **v0.0.5**: PDF OCR and Metadata ### 📋 Pending Features -- PDF: compress, linearize (as subcommand), reorder, extract, OCR, metadata +- PDF: OCR, metadata - Image: convert, resize, strip - Media: transcode, audio convert/normalize -- Preset system (YAML) +- Preset system (YAML) ✅ - Package creation (deb, rpm, Homebrew, winget) - CI/CD setup with package building @@ -339,7 +339,7 @@ **Minimum versions**: - qpdf: 10.0+ -- pdfcpu: 0.4+ +- ghostscript: 0.4+ - ocrmypdf: 14.0+ (Python 3.8+) - tesseract: 5.0+ - ffmpeg: 5.0+ @@ -383,7 +383,7 @@ version: 1 presets: pdf-compress-web: - tool: pdfcpu + tool: ghostscript args: ["optimize", "-level=2"] image-webp-web: tool: libvips @@ -423,7 +423,7 @@ tools/ # External tool adapters mod.rs trait_def.rs # Tool trait: probe(), version(), execute() qpdf.rs # qpdf adapter - pdfcpu.rs # pdfcpu adapter + ghostscript.rs # ghostscript adapter ocrmypdf.rs # ocrmypdf adapter ffmpeg.rs # ffmpeg adapter libvips.rs # libvips adapter @@ -487,7 +487,7 @@ pub enum JobSpec { **qpdf**: Parse stderr for "Processing page X of Y" → `current=X, total=Y` -**pdfcpu**: Parse JSON output mode if available, else stderr "page X/Y" +**ghostscript**: Parse JSON output mode if available, else stderr "page X/Y" **ocrmypdf**: Parse progress bar: `[████████░░░░░░░░] 50%` → estimate from file size @@ -751,7 +751,7 @@ SEE ALSO: **Required dependencies**: - **qpdf** 11.x+ (PDF manipulation) -- **pdfcpu** 0.4+ (PDF compression/optimization) +- **ghostscript** 0.4+ (PDF compression/optimization) - **tesseract** 5.0+ (OCR engine) - **ocrmypdf** 14.0+ (Python wrapper for OCR, installed via pip) - **ffmpeg** 5.0+ (audio/video processing) @@ -770,14 +770,14 @@ SEE ALSO: **Debian/Ubuntu** (`.deb` package): ```deb -Depends: qpdf (>= 11.0), pdfcpu, tesseract-ocr (>= 5.0), ffmpeg (>= 5.0), libvips-tools (>= 8.12), libimage-exiftool-perl (>= 12.0), python3, python3-pip +Depends: qpdf (>= 11.0), ghostscript, tesseract-ocr (>= 5.0), ffmpeg (>= 5.0), libvips-tools (>= 8.12), libimage-exiftool-perl (>= 12.0), python3, python3-pip ``` **macOS** (Homebrew formula): ```ruby depends_on "qpdf" => ">= 11.0" -depends_on "pdfcpu" +depends_on "ghostscript" depends_on "tesseract" => ">= 5.0" depends_on "ffmpeg" => ">= 5.0" depends_on "libvips" => ">= 8.12" @@ -791,7 +791,7 @@ depends_on "python@3" Dependencies: - PackageIdentifier: qpdf.qpdf MinimumVersion: 11.0.0 - - PackageIdentifier: pdfcpu.pdfcpu + - PackageIdentifier: ghostscript.ghostscript - PackageIdentifier: tesseract-ocr - PackageIdentifier: ffmpeg - PackageIdentifier: Python.Python.3 @@ -813,10 +813,10 @@ Dependencies: **macOS**: - **Homebrew formula** (primary): `brew install forgekit` - - Automatically installs dependencies (qpdf, pdfcpu, tesseract, etc.) + - Automatically installs dependencies (qpdf, ghostscript, tesseract, etc.) - Binary installed to `/opt/homebrew/bin/forgekit` or `/usr/local/bin/forgekit` - **Binary release** (fallback): `.tar.gz` with just the binary - - Users manually install dependencies via `brew install qpdf pdfcpu ...` + - Users manually install dependencies via `brew install qpdf ghostscript ...` - Optional: `.pkg` installer (post-beta-MVP) **Windows**: @@ -942,22 +942,22 @@ Dependencies: version: 1 presets: pdf-compress-web: - tool: pdfcpu + tool: ghostscript description: "Web-optimized (smallest size, lower quality)" args: ["optimize", "-level=2", "-compression=compress"] pdf-compress-screen: - tool: pdfcpu + tool: ghostscript description: "Screen viewing (balanced)" args: ["optimize", "-level=3", "-compression=compress"] pdf-compress-printer: - tool: pdfcpu + tool: ghostscript description: "Print quality (higher quality)" args: ["optimize", "-level=4", "-compression=compress"] pdf-compress-hq: - tool: pdfcpu + tool: ghostscript description: "High quality (minimal compression)" args: ["optimize", "-level=5", "-compression=compress"] ``` @@ -1075,7 +1075,7 @@ presets: **Acceptance criteria**: -- pdfcpu compress/optimize +- ghostscript compress/optimize - OCR with ocrmypdf (basic) - Metadata read/write with exiftool - CLI `pdf compress/ocr/metadata` subcommands diff --git a/crates/cli/src/commands/check.rs b/crates/cli/src/commands/check.rs index 9a3f331..904520a 100644 --- a/crates/cli/src/commands/check.rs +++ b/crates/cli/src/commands/check.rs @@ -1,3 +1,4 @@ +use forgekit_core::tools::gs::GsTool; use forgekit_core::tools::qpdf::QpdfTool; use forgekit_core::tools::{Tool, ToolConfig}; use forgekit_core::utils::error::Result; @@ -17,7 +18,8 @@ pub fn handle_check_deps() -> Result<()> { let tools: Vec<(&'static str, Box)> = vec![ ("qpdf", Box::new(QpdfTool)), - // TODO: Add other tools (pdfcpu, ocrmypdf, ffmpeg, libvips, etc.) + ("gs", Box::new(GsTool)), + // TODO: Add other tools (ocrmypdf, ffmpeg, libvips, etc.) ]; let mut all_ok = true; @@ -57,26 +59,28 @@ pub fn handle_check_deps() -> Result<()> { println!("Install missing dependencies individually, or install all at once:\n"); match platform { Platform::MacOS => { - println!(" brew install qpdf pdfcpu tesseract ffmpeg libvips exiftool"); + println!(" brew install qpdf ghostscript tesseract ffmpeg libvips exiftool"); println!(" pip3 install ocrmypdf"); } Platform::Windows => { - println!(" winget install qpdf.qpdf pdfcpu.pdfcpu tesseract-ocr ffmpeg"); + println!( + " winget install qpdf.qpdf ArtifexSoftware.GhostScript tesseract-ocr ffmpeg" + ); println!(" scoop install libvips exiftool"); println!(" pip install ocrmypdf"); } Platform::Linux => { println!(" # Debian/Ubuntu:"); - println!(" sudo apt install qpdf pdfcpu tesseract-ocr ffmpeg libvips-tools libimage-exiftool-perl"); + println!(" sudo apt install qpdf ghostscript tesseract-ocr ffmpeg libvips-tools libimage-exiftool-perl"); println!(" pip3 install ocrmypdf"); println!("\n # Fedora/RHEL:"); println!( - " sudo dnf install qpdf pdfcpu tesseract ffmpeg libvips perl-Image-ExifTool" + " sudo dnf install qpdf ghostscript tesseract ffmpeg libvips perl-Image-ExifTool" ); println!(" pip3 install ocrmypdf"); println!("\n # Arch Linux:"); println!( - " sudo pacman -S qpdf pdfcpu tesseract ffmpeg libvips perl-image-exiftool" + " sudo pacman -S qpdf ghostscript tesseract ffmpeg libvips perl-image-exiftool" ); println!(" pip3 install ocrmypdf"); } diff --git a/crates/cli/src/commands/pdf.rs b/crates/cli/src/commands/pdf.rs index 00ce6ee..70599ac 100644 --- a/crates/cli/src/commands/pdf.rs +++ b/crates/cli/src/commands/pdf.rs @@ -9,42 +9,52 @@ pub enum PdfCommand { /// Merge multiple PDFs into a single file /// /// Examples: - /// # Merge two PDFs /// forgekit pdf merge doc1.pdf doc2.pdf --output merged.pdf - /// - /// # Merge with linearization and JSON progress - /// forgekit pdf merge *.pdf --output all.pdf --linearize --json - /// - /// # See what commands would run - /// forgekit pdf merge a.pdf b.pdf --output c.pdf --plan - /// - /// Exit codes: - /// 0 Success - /// 1 Processing failed (check logs) - /// 2 Tool not found (install qpdf) - /// 3 Invalid input (file not found or invalid format) - /// 4 Permission denied - /// 5 Disk full + /// forgekit pdf merge *.pdf --output all.pdf --linearize Merge(MergeArgs), /// Split PDF into separate files by page ranges /// /// Examples: - /// # Extract pages 1-3 - /// forgekit pdf split book.pdf --output-dir pages/ --pages 1-3 - /// - /// # Extract odd pages + /// forgekit pdf split book.pdf --output-dir pages/ --pages 1-5 /// forgekit pdf split book.pdf --output-dir pages/ --pages odd /// - /// # Extract complex range - /// forgekit pdf split book.pdf --output-dir pages/ --pages "1-5,10-20,odd" - /// - /// Page specification: - /// - Numbers: 1, 42 - /// - Ranges: 1-5, 10-20, 7- (7 to end), -10 (1 to 10) - /// - Keywords: odd, even, first, last - /// - Exclusions: !2, !5-10 - /// - Combined: 1-3,5,7-,odd,!2 + /// Page spec: numbers (1), ranges (1-5, 7-), keywords (odd, even), exclusions (!2) Split(SplitArgs), + /// Compress a PDF to reduce file size + /// + /// Examples: + /// forgekit pdf compress input.pdf --output compressed.pdf + /// forgekit pdf compress input.pdf --output compressed.pdf --level light + /// forgekit pdf compress input.pdf --output compressed.pdf --level xhigh + /// + /// Compression levels (using Ghostscript): + /// - light: High quality (~4.2MB) + /// - standard: Medium quality (~3.4MB, default) + /// - high: Low quality (~2.7MB) + /// - xhigh: Extreme compression (<1MB, significant quality loss) + Compress(CompressArgs), + /// Linearize a PDF for fast web viewing + /// + /// Example: + /// forgekit pdf linearize input.pdf --output linearized.pdf + Linearize(LinearizeArgs), + /// Reorder pages in a PDF + /// + /// Examples: + /// forgekit pdf reorder input.pdf --output reordered.pdf --pages 3,1,2 + /// forgekit pdf reorder input.pdf --output reversed.pdf --pages 5,4,3,2,1 + Reorder(ReorderArgs), + /// Extract specific pages from a PDF + /// + /// Examples: + /// forgekit pdf extract book.pdf --output pages.pdf --pages 1-5 + /// forgekit pdf extract book.pdf --output odd.pdf --pages odd + /// forgekit pdf extract book.pdf --output-dir images/ --pages 1-5 --format png + /// forgekit pdf extract book.pdf --output-dir images/ --pages 1-5 --format jpeg + /// + /// Page spec: numbers (1), ranges (1-5, 7-), keywords (odd, even), exclusions (!2) + /// Formats: pdf (default), png, jpeg/jpg (image files per page) + Extract(ExtractArgs), } #[derive(Args, Clone)] @@ -77,20 +87,96 @@ pub struct SplitArgs { )] pub output_dir: PathBuf, - /// Page specification (e.g., "1-3,5,7-", "odd", "even", "!2") + /// Page specification (e.g., "1-5", "odd", "1-10,!2") + #[arg(short, long, required = true)] + pub pages: String, +} + +#[derive(Args, Clone)] +pub struct CompressArgs { + /// Input PDF file + #[arg(required = true, help = "Input PDF file")] + pub input: PathBuf, + + /// Output PDF file path + #[arg(short, long, required = true, help = "Output PDF file path")] + pub output: PathBuf, + + /// Compression level: light, standard (default), high, or xhigh (extreme) + #[arg(short, long, default_value = "standard")] + pub level: String, +} + +#[derive(Args, Clone)] +pub struct LinearizeArgs { + /// Input PDF file + #[arg(required = true, help = "Input PDF file")] + pub input: PathBuf, + + /// Output PDF file path + #[arg(short, long, required = true, help = "Output PDF file path")] + pub output: PathBuf, +} + +#[derive(Args, Clone)] +pub struct ReorderArgs { + /// Input PDF file + #[arg(required = true, help = "Input PDF file")] + pub input: PathBuf, + + /// Output PDF file path + #[arg(short, long, required = true, help = "Output PDF file path")] + pub output: PathBuf, + + /// Page order (comma-separated, 1-indexed). Example: "3,1,2" means page 3, then 1, then 2 #[arg( short, long, required = true, - help = "Page specification: numbers (1, 42), ranges (1-5, 7-), keywords (odd, even, first, last), or exclusions (!2)" + help = "Page order (comma-separated, 1-indexed). Example: 3,1,2 means page 3, then 1, then 2" )] pub pages: String, } +#[derive(Args, Clone)] +pub struct ExtractArgs { + /// Input PDF file + #[arg(required = true, help = "Input PDF file")] + pub input: PathBuf, + + /// Output PDF file path (required when format is 'pdf') + #[arg( + short, + long, + help = "Output PDF file path (required when format is 'pdf')" + )] + pub output: Option, + + /// Output directory path (required when format is 'png' or 'jpeg') + #[arg( + short = 'd', + long, + help = "Output directory path (required when format is 'png' or 'jpeg')" + )] + pub output_dir: Option, + + /// Page specification (e.g., "1-5", "odd", "1-10,!2") + #[arg(short, long, required = true)] + pub pages: String, + + /// Output format: pdf (default), png, or jpeg/jpg + #[arg(long, default_value = "pdf")] + pub format: String, +} + pub fn handle_pdf_command(cmd: PdfCommand, plan_only: bool, json_output: bool) -> Result<()> { match cmd { PdfCommand::Merge(args) => handle_merge(args, plan_only, json_output), PdfCommand::Split(args) => handle_split(args, plan_only, json_output), + PdfCommand::Compress(args) => handle_compress(args, plan_only, json_output), + PdfCommand::Linearize(args) => handle_linearize(args, plan_only, json_output), + PdfCommand::Reorder(args) => handle_reorder(args, plan_only, json_output), + PdfCommand::Extract(args) => handle_extract(args, plan_only, json_output), } } @@ -181,3 +267,210 @@ fn handle_split(args: SplitArgs, plan_only: bool, json_output: bool) -> Result<( Ok(()) } } + +fn handle_compress(args: CompressArgs, plan_only: bool, json_output: bool) -> Result<()> { + let spec = JobSpec::PdfCompress { + input: args.input, + output: args.output, + level: args.level, + }; + + if plan_only { + let plan = forgekit_core::job::executor::execute_job(&spec, true)?; + if json_output { + let event = forgekit_core::job::progress::ProgressEvent::Progress { + version: 1, + job_id: forgekit_core::job::progress::new_job_id(), + progress: forgekit_core::job::progress::ProgressInfo { + current: 0, + total: 1, + percent: 0, + stage: Some("plan".to_string()), + }, + message: plan.clone(), + }; + println!("{}", serde_json::to_string(&event).unwrap()); + } else { + println!("{}", plan); + } + Ok(()) + } else { + if json_output { + let reporter = forgekit_core::job::progress::JsonProgressReporter; + forgekit_core::job::executor::execute_job_with_progress(&spec, false, &reporter)?; + } else { + let result = forgekit_core::job::executor::execute_job(&spec, false)?; + println!("{}", result); + } + Ok(()) + } +} + +fn handle_linearize(args: LinearizeArgs, plan_only: bool, json_output: bool) -> Result<()> { + let spec = JobSpec::PdfLinearize { + input: args.input, + output: args.output, + }; + + if plan_only { + let plan = forgekit_core::job::executor::execute_job(&spec, true)?; + if json_output { + let event = forgekit_core::job::progress::ProgressEvent::Progress { + version: 1, + job_id: forgekit_core::job::progress::new_job_id(), + progress: forgekit_core::job::progress::ProgressInfo { + current: 0, + total: 1, + percent: 0, + stage: Some("plan".to_string()), + }, + message: plan.clone(), + }; + println!("{}", serde_json::to_string(&event).unwrap()); + } else { + println!("{}", plan); + } + Ok(()) + } else { + if json_output { + let reporter = forgekit_core::job::progress::JsonProgressReporter; + forgekit_core::job::executor::execute_job_with_progress(&spec, false, &reporter)?; + } else { + let result = forgekit_core::job::executor::execute_job(&spec, false)?; + println!("{}", result); + } + Ok(()) + } +} + +fn handle_reorder(args: ReorderArgs, plan_only: bool, json_output: bool) -> Result<()> { + // Parse page order from comma-separated string + let page_order: Vec = args + .pages + .split(',') + .map(|s| s.trim().parse::()) + .collect::, _>>() + .map_err( + |_| forgekit_core::utils::error::ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: + "Invalid page order format. Expected comma-separated numbers (e.g., '3,1,2')" + .to_string(), + }, + )?; + + if page_order.is_empty() { + return Err(forgekit_core::utils::error::ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: "Page order cannot be empty".to_string(), + }); + } + + let spec = JobSpec::PdfReorder { + input: args.input, + output: args.output, + page_order, + }; + + if plan_only { + let plan = forgekit_core::job::executor::execute_job(&spec, true)?; + if json_output { + let event = forgekit_core::job::progress::ProgressEvent::Progress { + version: 1, + job_id: forgekit_core::job::progress::new_job_id(), + progress: forgekit_core::job::progress::ProgressInfo { + current: 0, + total: 1, + percent: 0, + stage: Some("plan".to_string()), + }, + message: plan.clone(), + }; + println!("{}", serde_json::to_string(&event).unwrap()); + } else { + println!("{}", plan); + } + Ok(()) + } else { + if json_output { + let reporter = forgekit_core::job::progress::JsonProgressReporter; + forgekit_core::job::executor::execute_job_with_progress(&spec, false, &reporter)?; + } else { + let result = forgekit_core::job::executor::execute_job(&spec, false)?; + println!("{}", result); + } + Ok(()) + } +} + +fn handle_extract(args: ExtractArgs, plan_only: bool, json_output: bool) -> Result<()> { + // Validate format and required paths + match args.format.as_str() { + "pdf" => { + if args.output.is_none() { + return Err(forgekit_core::utils::error::ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: "Output file path required when format is 'pdf'. Use --output " + .to_string(), + }); + } + } + "png" | "jpeg" | "jpg" => { + if args.output_dir.is_none() { + return Err(forgekit_core::utils::error::ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: format!("Output directory path required when format is '{}'. Use --output-dir ", args.format), + }); + } + } + _ => { + return Err(forgekit_core::utils::error::ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: format!( + "Unknown format '{}'. Supported formats: pdf, png, jpeg, jpg", + args.format + ), + }); + } + } + + let pages = PageSpec::parse(&args.pages)?; + + let spec = JobSpec::PdfExtract { + input: args.input, + output: args.output, + output_dir: args.output_dir, + pages, + format: args.format, + }; + + if plan_only { + let plan = forgekit_core::job::executor::execute_job(&spec, true)?; + if json_output { + let event = forgekit_core::job::progress::ProgressEvent::Progress { + version: 1, + job_id: forgekit_core::job::progress::new_job_id(), + progress: forgekit_core::job::progress::ProgressInfo { + current: 0, + total: 1, + percent: 0, + stage: Some("plan".to_string()), + }, + message: plan.clone(), + }; + println!("{}", serde_json::to_string(&event).unwrap()); + } else { + println!("{}", plan); + } + Ok(()) + } else { + if json_output { + let reporter = forgekit_core::job::progress::JsonProgressReporter; + forgekit_core::job::executor::execute_job_with_progress(&spec, false, &reporter)?; + } else { + let result = forgekit_core::job::executor::execute_job(&spec, false)?; + println!("{}", result); + } + Ok(()) + } +} diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index e58823c..a7934d4 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -34,51 +34,27 @@ use forgekit_core::utils::error::{ExitCode, ForgeKitError}; /// /// Global flags apply to all commands. Subcommands are defined in the `Commands` enum. #[derive(Parser)] -#[command(name = "forgekit")] -#[command(about = "Local-first media and PDF toolkit", long_about = None)] +#[command( + name = "forgekit", + about = "Local-first media and PDF toolkit", + long_about = "ForgeKit - Fast, lightweight, and privacy-focused media toolkit.\n\nQuick Start:\n forgekit pdf merge doc1.pdf doc2.pdf --output merged.pdf\n forgekit pdf split book.pdf --output-dir pages/ --pages 1-5\n forgekit pdf compress large.pdf --output small.pdf --level high" +)] struct Cli { #[command(subcommand)] command: Option, - /// Output progress as NDJSON (newline-delimited JSON). - /// - /// Each line is a JSON object representing a progress event. Useful for - /// scripting and automation. Example: `forgekit pdf merge --json | jq .progress` - #[arg(long, global = true)] - json: bool, - - /// Show underlying commands without executing. - /// - /// Prints the exact command that would be run (e.g., `qpdf --linearize input.pdf output.pdf`). - /// Great for debugging and understanding what ForgeKit does under the hood. + /// Show what commands would be executed without running them #[arg(long, global = true)] plan: bool, - /// Validate inputs and show plan, don't execute. - /// - /// Like `--plan`, but also validates that input files exist and paths are valid. - /// Useful for checking if a command would work before actually running it. + /// Output progress as JSON (NDJSON format, one event per line) #[arg(long, global = true)] - dry_run: bool, - - /// Log level (debug, info, warn, error). - /// - /// Controls verbosity of internal logging. `debug` shows tool invocations, - /// temp file paths, and progress parsing details. - #[arg(long, global = true, default_value = "info")] - log_level: String, - - /// Overwrite existing output files without prompting. - /// - /// By default, ForgeKit will error if the output file already exists to prevent - /// accidental data loss. Use this flag to allow overwriting. - #[arg(short, long, global = true)] - force: bool, + json: bool, } #[derive(Subcommand)] enum Commands { - /// PDF operations + /// PDF operations (merge, split, compress, extract, etc.) #[command(subcommand)] Pdf(PdfCommand), @@ -88,7 +64,7 @@ enum Commands { fn main() { let cli = Cli::parse(); - let plan_only = cli.plan || cli.dry_run; + let plan_only = cli.plan; let json_output = cli.json; let result = match &cli.command { diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index c3e402e..93f9d36 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -8,7 +8,8 @@ license.workspace = true [dependencies] anyhow = { workspace = true } thiserror = { workspace = true } -serde = { workspace = true } +serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } uuid = { workspace = true } +serde_yaml = "0.9.34" diff --git a/crates/core/src/job/executor.rs b/crates/core/src/job/executor.rs index c0092c1..2309215 100644 --- a/crates/core/src/job/executor.rs +++ b/crates/core/src/job/executor.rs @@ -24,10 +24,13 @@ use crate::job::progress::{ new_job_id, ErrorInfo, JobResult, ProgressEvent, ProgressInfo, ProgressReporter, }; use crate::job::JobSpec; +use crate::presets::get_compression_strategy; +use crate::tools::gs::GsTool; use crate::tools::qpdf::QpdfTool; use crate::tools::{Tool, ToolConfig}; use crate::utils::error::{ForgeKitError, Result}; use crate::utils::pages::PageSpec; +use crate::utils::temp::create_temp_file; use std::time::Instant; /// Execute a job specification without progress reporting. @@ -72,6 +75,31 @@ pub fn execute_job_with_progress( output_dir, pages, } => execute_pdf_split(input, output_dir, pages, plan_only), + JobSpec::PdfCompress { + input, + output, + level, + } => execute_pdf_compress(input, output, level, plan_only), + JobSpec::PdfLinearize { input, output } => execute_pdf_linearize(input, output, plan_only), + JobSpec::PdfReorder { + input, + output, + page_order, + } => execute_pdf_reorder(input, output, page_order, plan_only), + JobSpec::PdfExtract { + input, + output, + output_dir, + pages, + format, + } => execute_pdf_extract( + input, + output.as_ref(), + output_dir.as_ref(), + pages, + format, + plan_only, + ), } } @@ -232,16 +260,24 @@ fn execute_pdf_split( let config = ToolConfig::default(); let tool_info = tool.probe(&config)?; - // For now, we'll assume total_pages = 100 (in real implementation, we'd query this) - // This is a simplified version - in production we'd need to get actual page count - let total_pages = 100; // TODO: Get actual page count from PDF + // Get actual page count + let total_pages = if plan_only { + // In plan mode, we might not have the file, or just want to show example + if input.exists() { + tool.get_page_count(&tool_info.path, input).unwrap_or(100) + } else { + 100 // Placeholder for plan + } + } else { + tool.get_page_count(&tool_info.path, input)? + }; if plan_only { // Generate plan showing the command that would be run - let qpdf_pages = PageSpec::to_qpdf_pages(pages, total_pages)?; + let qpdf_pages = PageSpec::to_qpdf_pages(pages, total_pages as usize)?; let output_file = output_dir.join("split_output.pdf"); return Ok(format!( - "qpdf {} --pages {} -- {}", + "qpdf {} --pages . {} -- {}", input.display(), qpdf_pages, output_file.display() @@ -252,12 +288,13 @@ fn execute_pdf_split( std::fs::create_dir_all(output_dir).map_err(ForgeKitError::Io)?; // Build qpdf command - let qpdf_pages = PageSpec::to_qpdf_pages(pages, total_pages)?; + let qpdf_pages = PageSpec::to_qpdf_pages(pages, total_pages as usize)?; let output_file = output_dir.join("split_output.pdf"); let mut cmd = Command::new(&tool_info.path); cmd.arg(input); cmd.arg("--pages"); + cmd.arg("."); cmd.arg(&qpdf_pages); cmd.arg("--"); cmd.arg(&output_file); @@ -282,3 +319,582 @@ fn execute_pdf_split( output_file.display() )) } + +fn execute_pdf_compress( + input: &PathBuf, + output: &PathBuf, + level: &str, + plan_only: bool, +) -> Result { + let strategy = get_compression_strategy(level); + + if plan_only { + // Generate plan showing the command that would be run + let mut cmd_parts = vec![strategy.tool.clone()]; + match strategy.tool.as_str() { + "gs" => { + // Ghostscript: gs -sOutputFile=output.pdf [flags] -f input.pdf + // -sOutputFile must come before -c (PostScript) flags + // -f separates the input file from preceding -c arguments + cmd_parts.push(format!("-sOutputFile={}", output.display())); + cmd_parts.extend(strategy.flags.iter().cloned()); + cmd_parts.push("-f".to_string()); + cmd_parts.push(input.display().to_string()); + } + "qpdf" => { + cmd_parts.extend(strategy.flags.iter().cloned()); + cmd_parts.push(input.display().to_string()); + cmd_parts.push(output.display().to_string()); + } + _ => { + return Err(ForgeKitError::Other(anyhow::anyhow!( + "Unknown compression tool: {}", + strategy.tool + ))); + } + } + return Ok(cmd_parts.join(" ")); + } + + if !input.exists() { + return Err(ForgeKitError::InvalidInput { + path: input.clone(), + reason: "Input file does not exist".to_string(), + }); + } + + match strategy.tool.as_str() { + "gs" => { + // Probe for Ghostscript + let tool = GsTool; + let config = ToolConfig::default(); + let tool_info = tool.probe(&config)?; + + // Build Ghostscript command + // gs [flags] -sOutputFile=output.pdf input.pdf + // Note: input comes LAST for Ghostscript + let mut cmd = Command::new(&tool_info.path); + // Input comes LAST for Ghostscript + // Output file must come BEFORE any -c (PostScript) flags + cmd.arg(format!("-sOutputFile={}", output.display())); + cmd.args(&strategy.flags); + // Must use -f to separate input file from preceding -c arguments + cmd.arg("-f"); + cmd.arg(input); + + // Execute + let output_result = cmd.output().map_err(|e| ForgeKitError::ProcessingFailed { + tool: "gs".to_string(), + stderr: format!("Failed to execute: {}", e), + })?; + + if !output_result.status.success() { + let stderr = String::from_utf8_lossy(&output_result.stderr); + return Err(ForgeKitError::ProcessingFailed { + tool: "gs".to_string(), + stderr: stderr.to_string(), + }); + } + } + "qpdf" => { + // Probe for qpdf (kept for future use, e.g., page operations) + let tool = QpdfTool; + let config = ToolConfig::default(); + let tool_info = tool.probe(&config)?; + + // Build qpdf command + let mut cmd = Command::new(&tool_info.path); + cmd.args(&strategy.flags); + cmd.arg(input); + cmd.arg(output); + + // Execute + let output_result = cmd.output().map_err(|e| ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: format!("Failed to execute: {}", e), + })?; + + if !output_result.status.success() { + let stderr = String::from_utf8_lossy(&output_result.stderr); + return Err(ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: stderr.to_string(), + }); + } + } + _ => { + return Err(ForgeKitError::Other(anyhow::anyhow!( + "Unknown compression tool: {}", + strategy.tool + ))); + } + } + + Ok(format!( + "Successfully compressed PDF to {}", + output.display() + )) +} + +fn execute_pdf_linearize(input: &PathBuf, output: &PathBuf, plan_only: bool) -> Result { + if plan_only { + return Ok(format!( + "qpdf --linearize {} {}", + input.display(), + output.display() + )); + } + + if !input.exists() { + return Err(ForgeKitError::InvalidInput { + path: input.clone(), + reason: "Input file does not exist".to_string(), + }); + } + + // Probe for qpdf + let tool = QpdfTool; + let config = ToolConfig::default(); + let tool_info = tool.probe(&config)?; + + // Build qpdf command + let mut cmd = Command::new(&tool_info.path); + cmd.arg("--linearize"); + cmd.arg(input); + cmd.arg(output); + + // Execute + let output_result = cmd.output().map_err(|e| ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: format!("Failed to execute: {}", e), + })?; + + if !output_result.status.success() { + let stderr = String::from_utf8_lossy(&output_result.stderr); + return Err(ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: stderr.to_string(), + }); + } + + Ok(format!( + "Successfully linearized PDF to {}", + output.display() + )) +} + +fn execute_pdf_reorder( + input: &PathBuf, + output: &PathBuf, + page_order: &[u32], + plan_only: bool, +) -> Result { + if page_order.is_empty() { + return Err(ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: "Page order cannot be empty".to_string(), + }); + } + + // Convert page_order (1-indexed) to qpdf pages format + let pages_str = page_order + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(","); + + if plan_only { + return Ok(format!( + "qpdf {} --pages . {} -- {}", + input.display(), + pages_str, + output.display() + )); + } + + if !input.exists() { + return Err(ForgeKitError::InvalidInput { + path: input.clone(), + reason: "Input file does not exist".to_string(), + }); + } + + // Probe for qpdf + let tool = QpdfTool; + let config = ToolConfig::default(); + let tool_info = tool.probe(&config)?; + + // Build qpdf command + let mut cmd = Command::new(&tool_info.path); + cmd.arg(input); + cmd.arg("--pages"); + cmd.arg("."); + cmd.arg(&pages_str); + cmd.arg("--"); + cmd.arg(output); + + // Execute + let output_result = cmd.output().map_err(|e| ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: format!("Failed to execute: {}", e), + })?; + + if !output_result.status.success() { + let stderr = String::from_utf8_lossy(&output_result.stderr); + return Err(ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: stderr.to_string(), + }); + } + + Ok(format!( + "Successfully reordered PDF pages to {}", + output.display() + )) +} + +fn extract_pages_to_pdf_helper( + input: &PathBuf, + output_path: &PathBuf, + pages: &[PageSpec], + plan_only: bool, +) -> Result> { + // Get actual page count + let total_pages = if plan_only { + if input.exists() { + let tool = QpdfTool; + let config = ToolConfig::default(); + if let Ok(info) = tool.probe(&config) { + tool.get_page_count(&info.path, input).unwrap_or(100) + } else { + 100 + } + } else { + 100 + } + } else { + // We need to probe for tool info first to get path + let tool = QpdfTool; + let config = ToolConfig::default(); + let tool_info = tool.probe(&config)?; + tool.get_page_count(&tool_info.path, input)? + }; + + if plan_only { + let qpdf_pages = PageSpec::to_qpdf_pages(pages, total_pages as usize)?; + return Ok(Some(format!( + "qpdf {} --pages . {} -- {}", + input.display(), + qpdf_pages, + output_path.display() + ))); + } + + if !input.exists() { + return Err(ForgeKitError::InvalidInput { + path: input.clone(), + reason: "Input file does not exist".to_string(), + }); + } + + // Probe for qpdf + let tool = QpdfTool; + let config = ToolConfig::default(); + let tool_info = tool.probe(&config)?; + + // Build qpdf command + let qpdf_pages = PageSpec::to_qpdf_pages(pages, total_pages as usize)?; + + let mut cmd = Command::new(&tool_info.path); + cmd.arg(input); + cmd.arg("--pages"); + cmd.arg("."); + cmd.arg(&qpdf_pages); + cmd.arg("--"); + cmd.arg(output_path); + + // Execute + let output_result = cmd.output().map_err(|e| ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: format!("Failed to execute: {}", e), + })?; + + if !output_result.status.success() { + let stderr = String::from_utf8_lossy(&output_result.stderr); + return Err(ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: stderr.to_string(), + }); + } + + Ok(None) +} + +fn execute_pdf_extract( + input: &PathBuf, + output: Option<&PathBuf>, + output_dir: Option<&PathBuf>, + pages: &[PageSpec], + format: &str, + plan_only: bool, +) -> Result { + if pages.is_empty() { + return Err(ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: "At least one page specification required".to_string(), + }); + } + + match format { + "pdf" => { + let output_path = output.ok_or_else(|| ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: "Output file path required when format is 'pdf'".to_string(), + })?; + + if let Some(cmd) = extract_pages_to_pdf_helper(input, output_path, pages, plan_only)? { + return Ok(cmd); + } + + Ok(format!( + "Successfully extracted PDF pages to {}", + output_path.display() + )) + } + "png" | "jpeg" | "jpg" => { + let output_dir_path = output_dir.ok_or_else(|| ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: format!("Output directory path required when format is '{}'", format), + })?; + + if !plan_only { + std::fs::create_dir_all(output_dir_path).map_err(ForgeKitError::Io)?; + } + + // Normalization + let ext = if format == "png" { "png" } else { "jpg" }; + let device = if format == "png" { "png16m" } else { "jpeg" }; + + if plan_only { + let temp_path = input.with_file_name("temp_extract_pages.pdf"); + let qpdf_cmd = + extract_pages_to_pdf_helper(input, &temp_path, pages, true)?.unwrap(); + + let out_pattern = output_dir_path.join(format!("page_%d.{}", ext)); + let gs_cmd = format!( + "gs -sDEVICE={} -dNOPAUSE -dBATCH -dQUIET -r300 -sOutputFile={} {}", + device, + out_pattern.display(), + temp_path.display() + ); + + return Ok(format!( + "{} && {} && rm {}", + qpdf_cmd, + gs_cmd, + temp_path.display() + )); + } + + // Real execution + // 1. Extract to temp PDF + let temp_pdf = create_temp_file("extract", ".pdf")?; + extract_pages_to_pdf_helper(input, &temp_pdf, pages, false)?; + + // 2. Convert temp PDF to images using Ghostscript + let tool = GsTool; + let config = ToolConfig::default(); + let tool_info = tool.probe(&config)?; + + let mut cmd = Command::new(&tool_info.path); + cmd.arg(format!("-sDEVICE={}", device)); + cmd.arg("-dNOPAUSE"); + cmd.arg("-dBATCH"); + cmd.arg("-dQUIET"); + cmd.arg("-r300"); // 300 DPI for high quality extraction + + let out_pattern = output_dir_path.join(format!("page_%d.{}", ext)); + cmd.arg(format!("-sOutputFile={}", out_pattern.display())); + cmd.arg(&temp_pdf); + + let output_result = cmd.output().map_err(|e| ForgeKitError::ProcessingFailed { + tool: "gs".to_string(), + stderr: format!("Failed to execute gs: {}", e), + })?; + + // 3. Cleanup temp file explicitly + let _ = std::fs::remove_file(&temp_pdf); + + if !output_result.status.success() { + let stderr = String::from_utf8_lossy(&output_result.stderr); + return Err(ForgeKitError::ProcessingFailed { + tool: "gs".to_string(), + stderr: stderr.to_string(), + }); + } + + Ok(format!( + "Successfully extracted images to directory {}", + output_dir_path.display() + )) + } + "images" => Err(ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: "Format 'images' is ambiguous. Please specify 'png' or 'jpeg/jpg'.".to_string(), + }), + _ => Err(ForgeKitError::InvalidInput { + path: PathBuf::new(), + reason: format!( + "Unknown format '{}'. Supported formats: pdf, png, jpeg, jpg", + format + ), + }), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::utils::pages::PageSpec; + + // Helper to create valid page specs + fn pages(spec: &str) -> Vec { + PageSpec::parse(spec).unwrap() + } + + #[test] + fn test_execute_pdf_linearize_plan() { + let input = PathBuf::from("input.pdf"); + let output = PathBuf::from("output.pdf"); + + let result = execute_pdf_linearize(&input, &output, true).unwrap(); + + assert!(result.contains("qpdf")); + assert!(result.contains("--linearize")); + assert!(result.contains("input.pdf")); + assert!(result.contains("output.pdf")); + } + + #[test] + fn test_execute_pdf_reorder_plan() { + let input = PathBuf::from("input.pdf"); + let output = PathBuf::from("output.pdf"); + let order_spec = pages("1,3,2"); + let order: Vec = order_spec + .iter() + .filter_map(|s| match s { + PageSpec::Page(n) => Some(*n as u32), + _ => None, + }) + .collect(); + + // This invokes probe(). If qpdf missing, skip? + // But in plan_only + input doesn't exist? + // execute_pdf_reorder probes first. + + match execute_pdf_reorder(&input, &output, &order, true) { + Ok(result) => { + assert!(result.contains("qpdf")); + assert!(result.contains("--pages")); + assert!(result.contains(".")); + assert!(result.contains("1,3,2")); + } + Err(ForgeKitError::ToolNotFound { .. }) => { + println!("Skipping test_execute_pdf_reorder_plan: qpdf not found"); + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + } + + #[test] + fn test_execute_pdf_extract_plan() { + let input = PathBuf::from("input.pdf"); + let output_path = PathBuf::from("output.pdf"); + let pages_spec = pages("1-3"); + + // execute_pdf_extract also probes page count if input exists. + // If input doesn't exist, it uses default 100. + // "input.pdf" likely doesn't exist. + + let result = + execute_pdf_extract(&input, Some(&output_path), None, &pages_spec, "pdf", true) + .unwrap(); + + // 1-3 of 100 is 1-3. + assert!(result.contains("qpdf")); + assert!(result.contains("--pages")); + assert!(result.contains(".")); + assert!(result.contains("1-3")); + } +} +#[cfg(test)] +mod image_tests { + use super::*; + use crate::utils::pages::PageSpec; + + fn pages(spec: &str) -> Vec { + PageSpec::parse(spec).unwrap() + } + + #[test] + fn test_execute_pdf_extract_images_ambiguous() { + let input = PathBuf::from("input.pdf"); + let output_dir = PathBuf::from("out_dir"); + let pages_spec = pages("1"); + + let result = execute_pdf_extract( + &input, + None, + Some(&output_dir), + &pages_spec, + "images", + false, + ); + + assert!(result.is_err()); + match result { + Err(ForgeKitError::InvalidInput { reason, .. }) => { + assert!(reason.to_lowercase().contains("ambiguous")); + } + _ => panic!("Expected InvalidInput error for ambiguous format"), + } + } + + #[test] + fn test_execute_pdf_extract_png_plan() { + let input = PathBuf::from("input.pdf"); + let output_dir = PathBuf::from("out_dir"); + let pages_spec = pages("1-3"); + + let result = execute_pdf_extract( + &input, + None, + Some(&output_dir), + &pages_spec, + "png", // png format + true, // plan only + ) + .unwrap(); + + // Expected plan: "qpdf ... && gs ... && rm ..." + assert!(result.contains("qpdf")); + assert!(result.contains("gs")); + assert!(result.contains("png16m")); // device + assert!(result.contains("page_%d.png")); + assert!(result.contains("rm")); // cleanup + } + + #[test] + fn test_execute_pdf_extract_jpeg_plan() { + let input = PathBuf::from("input.pdf"); + let output_dir = PathBuf::from("out_dir"); + let pages_spec = pages("odd"); + + let result = + execute_pdf_extract(&input, None, Some(&output_dir), &pages_spec, "jpeg", true) + .unwrap(); + + assert!(result.contains("jpeg")); // device + assert!(result.contains("page_%d.jpg")); + } +} diff --git a/crates/core/src/job/spec.rs b/crates/core/src/job/spec.rs index e6543ea..998f759 100644 --- a/crates/core/src/job/spec.rs +++ b/crates/core/src/job/spec.rs @@ -53,11 +53,55 @@ pub enum JobSpec { /// Page specifications defining which pages to extract. pages: Vec, }, - // TODO: Add more job types as we implement them: - // PdfCompress { input: PathBuf, output: PathBuf, level: CompressionLevel }, - // PdfOcr { input: PathBuf, output: PathBuf, language: String }, - // ImageConvert { input: PathBuf, output: PathBuf, format: ImageFormat }, - // etc. + /// Compress a PDF using Ghostscript with compression level control. + /// + /// Uses Ghostscript with optimized QFactor settings for fast compression. + PdfCompress { + /// Input PDF file to compress. + input: PathBuf, + /// Output PDF file path. + output: PathBuf, + /// Compression level (light, standard, high). + /// Default is "standard" (balanced compression). + level: String, + }, + /// Linearize a PDF for fast web viewing. + /// + /// Optimizes the PDF's internal structure for fast web viewing by reorganizing + /// the internal structure. This is a standalone operation separate from merge. + PdfLinearize { + /// Input PDF file to linearize. + input: PathBuf, + /// Output PDF file path. + output: PathBuf, + }, + /// Reorder pages in a PDF. + /// + /// Reorders pages according to the specified order (1-indexed page numbers). + PdfReorder { + /// Input PDF file to reorder. + input: PathBuf, + /// Output PDF file path. + output: PathBuf, + /// Page order (1-indexed). Example: [3, 1, 2] means page 3, then 1, then 2. + page_order: Vec, + }, + /// Extract specific pages from a PDF. + /// + /// Extracts pages matching the page specification. Can output to a single PDF + /// file or separate image files per page. + PdfExtract { + /// Input PDF file to extract from. + input: PathBuf, + /// Output PDF file path (when format is "pdf"). + output: Option, + /// Output directory path (when format is "images"). + output_dir: Option, + /// Page specifications defining which pages to extract. + pages: Vec, + /// Output format: "pdf" or "images". + format: String, + }, } impl JobSpec { @@ -75,6 +119,84 @@ impl JobSpec { JobSpec::PdfSplit { pages, .. } => { format!("Split PDF into {} page spec(s)", pages.len()) } + JobSpec::PdfCompress { level, .. } => { + format!("Compress PDF with {} compression", level) + } + JobSpec::PdfLinearize { .. } => "Linearize PDF".to_string(), + JobSpec::PdfReorder { page_order, .. } => { + format!("Reorder PDF pages ({} pages)", page_order.len()) + } + JobSpec::PdfExtract { pages, format, .. } => { + format!( + "Extract {} page spec(s) from PDF as {}", + pages.len(), + format + ) + } } } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::utils::pages::PageSpec; + + #[test] + fn test_pdf_compress_description_with_level() { + let spec = JobSpec::PdfCompress { + input: PathBuf::from("input.pdf"), + output: PathBuf::from("output.pdf"), + level: "high".to_string(), + }; + assert_eq!(spec.description(), "Compress PDF with high compression"); + } + + #[test] + fn test_pdf_compress_description_standard() { + let spec = JobSpec::PdfCompress { + input: PathBuf::from("input.pdf"), + output: PathBuf::from("output.pdf"), + level: "standard".to_string(), + }; + assert_eq!(spec.description(), "Compress PDF with standard compression"); + } + + #[test] + fn test_pdf_linearize_description() { + let spec = JobSpec::PdfLinearize { + input: PathBuf::from("input.pdf"), + output: PathBuf::from("output.pdf"), + }; + assert_eq!(spec.description(), "Linearize PDF"); + } + + #[test] + fn test_pdf_reorder_description() { + let spec = JobSpec::PdfReorder { + input: PathBuf::from("input.pdf"), + output: PathBuf::from("output.pdf"), + page_order: vec![3, 1, 2], + }; + assert_eq!(spec.description(), "Reorder PDF pages (3 pages)"); + } + + #[test] + fn test_pdf_extract_description() { + let pages = vec![ + PageSpec::Range { + start: 1, + end: Some(5), + }, + PageSpec::Page(10), + ]; + let spec = JobSpec::PdfExtract { + input: PathBuf::from("input.pdf"), + output: Some(PathBuf::from("output.pdf")), + output_dir: None, + pages, + format: "pdf".to_string(), + }; + assert_eq!(spec.description(), "Extract 2 page spec(s) from PDF as pdf"); + } +} diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index a7a8d17..675345a 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -36,7 +36,7 @@ //! ```rust,no_run //! use forgekit_core::tools::{Tool, ToolConfig, ToolInfo}; //! use forgekit_core::utils::error::Result; -//! use std::path::PathBuf; +//! use std::path::{Path, PathBuf}; //! //! pub struct MyTool; //! @@ -49,7 +49,7 @@ //! # todo!() //! } //! -//! fn version(&self, path: &PathBuf) -> Result { +//! fn version(&self, path: &Path) -> Result { //! // Run `mytool --version` and parse the output //! # todo!() //! } @@ -59,6 +59,7 @@ //! Then add a `JobSpec` variant and wire it up in the executor. That's it! pub mod job; +pub mod presets; pub mod tools; pub mod utils; diff --git a/crates/core/src/presets/loader.rs b/crates/core/src/presets/loader.rs new file mode 100644 index 0000000..23c89ae --- /dev/null +++ b/crates/core/src/presets/loader.rs @@ -0,0 +1,188 @@ +use crate::presets::model::PresetsConfig; +use crate::utils::error::Result; +use std::sync::OnceLock; + +// Embedded default presets +// These exactly match the specific QFactor logic we developed for fast compression. +const DEFAULT_PRESETS_YAML: &str = r#" +version: 1 +presets: + light: + tool: gs + description: "High quality (~300 dpi equivalent)" + args: + - "-sDEVICE=pdfwrite" + - "-dCompatibilityLevel=1.4" + - "-dNOPAUSE" + - "-dBATCH" + - "-dQUIET" + - "-dDownsampleColorImages=false" + - "-dDownsampleGrayImages=false" + - "-dDownsampleMonoImages=false" + - "-c" + - "<< /ColorACSImageDict << /QFactor 0.15 /Blend 1 /ColorTransform 1 /HSamples [1 1 1 1] /VSamples [1 1 1 1] >> >> setdistillerparams" + + standard: + tool: gs + description: "Medium quality (balanced defaults)" + args: + - "-sDEVICE=pdfwrite" + - "-dCompatibilityLevel=1.4" + - "-dNOPAUSE" + - "-dBATCH" + - "-dQUIET" + - "-dDownsampleColorImages=false" + - "-dDownsampleGrayImages=false" + - "-dDownsampleMonoImages=false" + - "-c" + - "<< /ColorACSImageDict << /QFactor 0.5 /Blend 1 /ColorTransform 1 /HSamples [1 1 1 1] /VSamples [1 1 1 1] >> >> setdistillerparams" + + high: + tool: gs + description: "Low quality (smallest size)" + args: + - "-sDEVICE=pdfwrite" + - "-dCompatibilityLevel=1.4" + - "-dNOPAUSE" + - "-dBATCH" + - "-dQUIET" + - "-dDownsampleColorImages=false" + - "-dDownsampleGrayImages=false" + - "-dDownsampleMonoImages=false" + - "-c" + - "<< /ColorACSImageDict << /QFactor 1.5 /Blend 1 /ColorTransform 1 /HSamples [1 1 1 1] /VSamples [1 1 1 1] >> >> setdistillerparams" + + xhigh: + tool: gs + description: "Extreme compression (sub-1MB target, significant quality loss)" + args: + - "-sDEVICE=pdfwrite" + - "-dCompatibilityLevel=1.4" + - "-dPDFSETTINGS=/screen" + - "-dNOPAUSE" + - "-dBATCH" + - "-dQUIET" + - "-dColorImageDownsampleType=/Bicubic" + - "-dColorImageResolution=36" + - "-dGrayImageDownsampleType=/Bicubic" + - "-dGrayImageResolution=36" + - "-dMonoImageDownsampleType=/Subsample" + - "-dMonoImageResolution=36" + - "-dDownsampleColorImages=true" + - "-dDownsampleGrayImages=true" + - "-dDownsampleMonoImages=true" + - "-c" + - "<< /ColorACSImageDict << /QFactor 2.4 /Blend 1 /ColorTransform 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> /GrayACSImageDict << /QFactor 2.4 /Blend 1 /HSamples [2 1 1 2] /VSamples [2 1 1 2] >> >> setdistillerparams" +"#; + +// Global cache for loaded presets +static PRESETS_CACHE: OnceLock = OnceLock::new(); + +/// Load presets from defaults (and future file overrides). +/// +/// Currently just returns the built-in defaults. +pub fn load_presets() -> Result<&'static PresetsConfig> { + Ok(PRESETS_CACHE.get_or_init(|| { + // Parse embedded defaults + serde_yaml::from_str(DEFAULT_PRESETS_YAML) + .expect("Failed to parse default presets! This is a bug in the embedded YAML.") + })) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_load_presets_returns_valid_config() { + let config = load_presets().expect("Failed to load presets"); + + // Verify version + assert_eq!(config.version, 1); + + // Verify all expected presets exist + assert!(config.presets.contains_key("light")); + assert!(config.presets.contains_key("standard")); + assert!(config.presets.contains_key("high")); + assert!(config.presets.contains_key("xhigh")); + } + + #[test] + fn test_load_presets_light_preset_correct() { + let config = load_presets().expect("Failed to load presets"); + let light = config.presets.get("light").expect("light preset missing"); + + assert_eq!(light.tool, "gs"); + assert!(light.description.is_some()); + assert!(light.args.iter().any(|a| a.contains("QFactor 0.15"))); + } + + #[test] + fn test_load_presets_standard_preset_correct() { + let config = load_presets().expect("Failed to load presets"); + let standard = config + .presets + .get("standard") + .expect("standard preset missing"); + + assert_eq!(standard.tool, "gs"); + assert!(standard.args.iter().any(|a| a.contains("QFactor 0.5"))); + } + + #[test] + fn test_load_presets_high_preset_correct() { + let config = load_presets().expect("Failed to load presets"); + let high = config.presets.get("high").expect("high preset missing"); + + assert_eq!(high.tool, "gs"); + assert!(high.args.iter().any(|a| a.contains("QFactor 1.5"))); + } + + #[test] + fn test_load_presets_xhigh_preset_correct() { + let config = load_presets().expect("Failed to load presets"); + let xhigh = config.presets.get("xhigh").expect("xhigh preset missing"); + + assert_eq!(xhigh.tool, "gs"); + // xhigh uses aggressive QFactor and downsampling at 36 dpi + assert!(xhigh.args.iter().any(|a| a.contains("QFactor 2.4"))); + assert!(xhigh + .args + .iter() + .any(|a| a.contains("ColorImageResolution=36"))); + assert!(xhigh.args.iter().any(|a| a.contains("PDFSETTINGS=/screen"))); + } + + #[test] + fn test_load_presets_all_have_required_gs_flags() { + let config = load_presets().expect("Failed to load presets"); + + for (name, preset) in &config.presets { + assert!( + preset.args.iter().any(|a| a == "-sDEVICE=pdfwrite"), + "Preset '{}' missing -sDEVICE=pdfwrite", + name + ); + assert!( + preset.args.iter().any(|a| a == "-dNOPAUSE"), + "Preset '{}' missing -dNOPAUSE", + name + ); + assert!( + preset.args.iter().any(|a| a == "-dBATCH"), + "Preset '{}' missing -dBATCH", + name + ); + } + } + + #[test] + fn test_load_presets_is_cached() { + // Call twice - should return same reference (OnceLock behavior) + let config1 = load_presets().expect("First load failed"); + let config2 = load_presets().expect("Second load failed"); + + // Both should point to the same static reference + assert!(std::ptr::eq(config1, config2)); + } +} diff --git a/crates/core/src/presets/mod.rs b/crates/core/src/presets/mod.rs new file mode 100644 index 0000000..786ba4e --- /dev/null +++ b/crates/core/src/presets/mod.rs @@ -0,0 +1,122 @@ +//! # Preset System +//! +//! The preset system allows users to apply predefined quality/compression settings +//! to operations without specifying all the tool flags manually. +//! +//! ## Future Implementation +//! +//! Presets will be loaded from YAML files: +//! - Built-in presets: `presets/presets.yaml` (included in package) +//! - User overrides: `~/.config/forgekit/presets.yaml` +//! +//! For now, presets are hardcoded in the executor. This module provides the +//! foundation for future YAML-based preset loading. + +pub mod loader; +pub mod model; + +use crate::presets::loader::load_presets; + +/// Compression strategy configuration for different compression levels. +#[derive(Debug, Clone)] +pub struct CompressionStrategy { + /// Tool to use: "gs" (Ghostscript) or "qpdf" + pub tool: String, + /// Additional flags/arguments for the tool + pub flags: Vec, +} + +/// Get compression strategy for a given compression level. +/// +/// Maps compression levels to presets defined in the YAML configuration. +/// Defaults to "standard" if the requested level is not found. +pub fn get_compression_strategy(level: &str) -> CompressionStrategy { + // Load presets (cached) + let config = match load_presets() { + Ok(c) => c, + Err(e) => { + // This should effectively never happen with embedded defaults + eprintln!("Failed to load presets: {}", e); + // Fallback emergency hardcoded standard + return CompressionStrategy { + tool: "gs".to_string(), + flags: vec!["-sDEVICE=pdfwrite".to_string(), "-dQUIET".to_string()], // Minimal fallback + }; + } + }; + + // Look up preset, default to "standard" + let preset = config + .presets + .get(level) + .or_else(|| config.presets.get("standard")) + .expect("Standard preset missing from defaults!"); + + CompressionStrategy { + tool: preset.tool.clone(), + flags: preset.args.clone(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_compression_strategy_light() { + let strategy = get_compression_strategy("light"); + assert_eq!(strategy.tool, "gs"); + assert!(strategy.flags.contains(&"-sDEVICE=pdfwrite".to_string())); + + // Assert no downsampling + assert!(strategy + .flags + .contains(&"-dDownsampleColorImages=false".to_string())); + + // Assert QFactor 0.15 (High Quality) + let param_str = strategy.flags.last().unwrap(); + assert!(param_str.contains("/QFactor 0.15")); + } + + #[test] + fn test_get_compression_strategy_standard() { + let strategy = get_compression_strategy("standard"); + assert_eq!(strategy.tool, "gs"); + + // Assert QFactor 0.5 (Standard) + let param_str = strategy.flags.last().unwrap(); + assert!(param_str.contains("/QFactor 0.5")); + } + + #[test] + fn test_get_compression_strategy_high() { + let strategy = get_compression_strategy("high"); + + // Assert QFactor 1.5 (Low Quality) + let param_str = strategy.flags.last().unwrap(); + assert!(param_str.contains("/QFactor 1.5")); + } + + #[test] + fn test_get_compression_strategy_unknown() { + let strategy = get_compression_strategy("unknown"); + // Unknown defaults to standard (0.5) + let param_str = strategy.flags.last().unwrap(); + assert!(param_str.contains("/QFactor 0.5")); + } + + #[test] + fn test_compression_strategy_base_flags() { + // All strategies should have the base Ghostscript flags + for level in &["light", "standard", "high"] { + let strategy = get_compression_strategy(level); + assert!(strategy.flags.contains(&"-sDEVICE=pdfwrite".to_string())); + assert!(strategy.flags.contains(&"-dQUIET".to_string())); + assert!(strategy + .flags + .contains(&"-dDownsampleColorImages=false".to_string())); + // Ensure -c and params are separate args + assert!(strategy.flags.contains(&"-c".to_string())); + } + } +} diff --git a/crates/core/src/presets/model.rs b/crates/core/src/presets/model.rs new file mode 100644 index 0000000..b4356e4 --- /dev/null +++ b/crates/core/src/presets/model.rs @@ -0,0 +1,111 @@ +use serde::Deserialize; +use std::collections::HashMap; + +/// Root configuration file structure. +#[derive(Debug, Deserialize)] +pub struct PresetsConfig { + /// Version of the config schema (e.g., 1). + pub version: u32, + /// Map of preset names to their definitions. + pub presets: HashMap, +} + +/// Definition of a single preset. +#[derive(Debug, Deserialize, Clone)] +pub struct PresetDefinition { + /// The tool to use for this preset (e.g., "gs", "qpdf"). + pub tool: String, + /// List of arguments/flags to pass to the tool. + /// Can include template placeholders in future, but for now exact flags. + pub args: Vec, + /// Optional description of what this preset does. + pub description: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_presets_config_deserialize_minimal() { + let yaml = r#" +version: 1 +presets: {} +"#; + let config: PresetsConfig = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(config.version, 1); + assert!(config.presets.is_empty()); + } + + #[test] + fn test_presets_config_deserialize_with_preset() { + let yaml = r#" +version: 2 +presets: + my-preset: + tool: qpdf + args: + - "--linearize" + - "--compress-streams=y" + description: "Test preset" +"#; + let config: PresetsConfig = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(config.version, 2); + assert_eq!(config.presets.len(), 1); + + let preset = config.presets.get("my-preset").unwrap(); + assert_eq!(preset.tool, "qpdf"); + assert_eq!(preset.args.len(), 2); + assert_eq!(preset.args[0], "--linearize"); + assert_eq!(preset.description, Some("Test preset".to_string())); + } + + #[test] + fn test_preset_definition_deserialize_without_description() { + let yaml = r#" +tool: gs +args: + - "-dNOPAUSE" +"#; + let preset: PresetDefinition = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(preset.tool, "gs"); + assert_eq!(preset.args, vec!["-dNOPAUSE"]); + assert!(preset.description.is_none()); + } + + #[test] + fn test_preset_definition_clone() { + let preset = PresetDefinition { + tool: "gs".to_string(), + args: vec!["-dBATCH".to_string()], + description: Some("Cloneable".to_string()), + }; + + let cloned = preset.clone(); + assert_eq!(cloned.tool, preset.tool); + assert_eq!(cloned.args, preset.args); + assert_eq!(cloned.description, preset.description); + } + + #[test] + fn test_presets_config_multiple_presets() { + let yaml = r#" +version: 1 +presets: + light: + tool: gs + args: ["-q"] + heavy: + tool: gs + args: ["-q", "-dCompressFonts=true"] + description: "Heavy compression" +"#; + let config: PresetsConfig = serde_yaml::from_str(yaml).unwrap(); + assert_eq!(config.presets.len(), 2); + assert!(config.presets.contains_key("light")); + assert!(config.presets.contains_key("heavy")); + + let heavy = config.presets.get("heavy").unwrap(); + assert_eq!(heavy.args.len(), 2); + } +} diff --git a/crates/core/src/tools/gs.rs b/crates/core/src/tools/gs.rs new file mode 100644 index 0000000..9630b19 --- /dev/null +++ b/crates/core/src/tools/gs.rs @@ -0,0 +1,144 @@ +//! # Ghostscript (gs) Tool Adapter +//! +//! Ghostscript is a command-line tool for PDF processing. We use it for: +//! - Compressing/recompressing PDFs with actual image downsampling +//! - PDF format conversion +//! +//! ## Why Ghostscript? +//! +//! Unlike tools that only optimize PDF structure, Ghostscript +//! actually recompresses images using direct JPEG Quality Factor (QFactor) +//! control without expensive downsampling. This achieves high speeds (~1s) while +//! maintaining file size differentiation: +//! - Light: High quality (QFactor 0.15) +//! - Standard: Medium quality (QFactor 0.50) +//! - High: Low quality (QFactor 1.50) +//! +//! ## Minimum Version +//! +//! Ghostscript 9.50+ is recommended. Older versions may work but aren't tested. + +use std::path::{Path, PathBuf}; +use std::process::Command; + +use crate::tools::{Tool, ToolConfig, ToolInfo}; +use crate::utils::error::{ForgeKitError, Result}; +use crate::utils::platform::ToolInstallHints; + +/// Ghostscript tool adapter. +/// +/// Implements the `Tool` trait for Ghostscript, handling detection, version checking, +/// and command construction for PDF compression. +pub struct GsTool; + +impl Tool for GsTool { + fn name(&self) -> &'static str { + "gs" + } + + fn probe(&self, config: &ToolConfig) -> Result { + // Check override path first + if let Some(ref path) = config.override_path { + if path.exists() { + let version = self.version(path)?; + return Ok(ToolInfo { + path: path.clone(), + version, + available: true, + }); + } + } + + // Probe PATH - try 'gs' first, then 'gswin64c' on Windows + let binary_names = if cfg!(target_os = "windows") { + vec!["gswin64c", "gswin32c", "gs"] + } else { + vec!["gs"] + }; + + for binary_name in binary_names { + let which_output = if cfg!(target_os = "windows") { + Command::new("where").arg(binary_name).output() + } else { + Command::new("which").arg(binary_name).output() + }; + + if let Ok(output) = which_output { + if output.status.success() { + let path_str = String::from_utf8_lossy(&output.stdout) + .lines() + .next() + .unwrap_or("") + .trim() + .to_string(); + + if !path_str.is_empty() { + let path = PathBuf::from(&path_str); + if let Ok(version) = self.version(&path) { + return Ok(ToolInfo { + path, + version, + available: true, + }); + } + } + } + } + } + + Err(ForgeKitError::ToolNotFound { + tool: "gs".to_string(), + hint: ToolInstallHints::for_tool("gs"), + }) + } + + fn version(&self, path: &Path) -> Result { + let output = Command::new(path) + .arg("--version") + .output() + .map_err(|e| ForgeKitError::Other(anyhow::anyhow!("Failed to run gs: {}", e)))?; + + if !output.status.success() { + return Err(ForgeKitError::Other(anyhow::anyhow!("gs --version failed"))); + } + + let version = String::from_utf8_lossy(&output.stdout) + .lines() + .next() + .unwrap_or("") + .to_string(); + + Ok(version.trim().to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_gs_name() { + let tool = GsTool; + assert_eq!(tool.name(), "gs"); + } + + #[test] + fn test_gs_probe() { + let tool = GsTool; + let config = ToolConfig::default(); + // This will only pass if gs is installed + let result = tool.probe(&config); + // We don't assert success here since gs may not be installed in test environment + // Just verify it doesn't panic + match result { + Ok(info) => { + assert!(info.available); + assert!(!info.version.is_empty()); + } + Err(ForgeKitError::ToolNotFound { .. }) => { + // Expected if gs is not installed + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + } +} diff --git a/crates/core/src/tools/mod.rs b/crates/core/src/tools/mod.rs index 5a04148..7ea9205 100644 --- a/crates/core/src/tools/mod.rs +++ b/crates/core/src/tools/mod.rs @@ -1,3 +1,4 @@ +pub mod gs; pub mod qpdf; pub mod trait_def; diff --git a/crates/core/src/tools/qpdf.rs b/crates/core/src/tools/qpdf.rs index 7a8f5e8..9d178d3 100644 --- a/crates/core/src/tools/qpdf.rs +++ b/crates/core/src/tools/qpdf.rs @@ -115,6 +115,36 @@ impl Tool for QpdfTool { } } +impl QpdfTool { + /// Get the number of pages in a PDF file. + pub fn get_page_count(&self, tool_path: &Path, pdf_path: &Path) -> Result { + let output = Command::new(tool_path) + .arg("--show-npages") + .arg(pdf_path) + .output() + .map_err(|e| ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: format!("Failed to run qpdf --show-npages: {}", e), + })?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(ForgeKitError::ProcessingFailed { + tool: "qpdf".to_string(), + stderr: stderr.to_string(), + }); + } + + let count_str = String::from_utf8_lossy(&output.stdout).trim().to_string(); + count_str.parse::().map_err(|_| { + ForgeKitError::Other(anyhow::anyhow!( + "Failed to parse page count from qpdf output: '{}'", + count_str + )) + }) + } +} + #[cfg(test)] mod tests { use super::*; @@ -144,4 +174,80 @@ mod tests { Err(e) => panic!("Unexpected error: {:?}", e), } } + + #[test] + fn test_get_page_count_with_empty_pdf() { + let tool = QpdfTool; + let config = ToolConfig::default(); + + // Skip test if qpdf is not installed + let tool_info = match tool.probe(&config) { + Ok(info) => info, + Err(ForgeKitError::ToolNotFound { .. }) => { + println!("Skipping test_get_page_count_with_empty_pdf: qpdf not installed"); + return; + } + Err(e) => panic!("Unexpected error probing qpdf: {:?}", e), + }; + + // Use qpdf to create a valid empty PDF (0 pages) + let temp_dir = std::env::temp_dir(); + let temp_pdf = temp_dir.join("forgekit_test_page_count_empty.pdf"); + + // Create a valid 0-page PDF using qpdf --empty + let create_result = Command::new(&tool_info.path) + .arg("--empty") + .arg(&temp_pdf) + .output(); + + match create_result { + Ok(output) if output.status.success() => { + // PDF created successfully, now test page count + let result = tool.get_page_count(&tool_info.path, &temp_pdf); + + // Cleanup + let _ = std::fs::remove_file(&temp_pdf); + + // Verify result - qpdf --empty creates a PDF with 0 pages + match result { + Ok(count) => assert_eq!(count, 0, "Expected 0 pages in empty PDF"), + Err(e) => panic!("Failed to get page count: {:?}", e), + } + } + _ => { + // Cleanup and skip if we can't create the test PDF + let _ = std::fs::remove_file(&temp_pdf); + println!("Skipping test: couldn't create test PDF with qpdf --empty"); + } + } + } + + #[test] + fn test_get_page_count_nonexistent_file() { + let tool = QpdfTool; + let config = ToolConfig::default(); + + // Skip test if qpdf is not installed + let tool_info = match tool.probe(&config) { + Ok(info) => info, + Err(ForgeKitError::ToolNotFound { .. }) => { + println!("Skipping test_get_page_count_nonexistent_file: qpdf not installed"); + return; + } + Err(e) => panic!("Unexpected error probing qpdf: {:?}", e), + }; + + let nonexistent = PathBuf::from("/nonexistent/file.pdf"); + let result = tool.get_page_count(&tool_info.path, &nonexistent); + + // Should fail with ProcessingFailed error + assert!(result.is_err()); + match result { + Err(ForgeKitError::ProcessingFailed { tool, .. }) => { + assert_eq!(tool, "qpdf"); + } + Err(e) => panic!("Expected ProcessingFailed error, got: {:?}", e), + Ok(_) => panic!("Expected error for nonexistent file"), + } + } } diff --git a/crates/core/src/utils/pages.rs b/crates/core/src/utils/pages.rs index cf04e62..90e816e 100644 --- a/crates/core/src/utils/pages.rs +++ b/crates/core/src/utils/pages.rs @@ -25,7 +25,7 @@ use std::fmt; /// A page specification for selecting pages from a document. /// /// Pages are 1-indexed (first page is 1, not 0). This matches how users -/// think about pages and how most tools (qpdf, pdfcpu) work. +/// think about pages and how most PDF tools work. #[derive(Debug, Clone, PartialEq, Eq)] pub enum PageSpec { /// Single page number (1-indexed, e.g., `Page(5)` means page 5). diff --git a/crates/core/src/utils/platform.rs b/crates/core/src/utils/platform.rs index 7bbfd0d..d5324ad 100644 --- a/crates/core/src/utils/platform.rs +++ b/crates/core/src/utils/platform.rs @@ -121,7 +121,22 @@ impl ToolInstallHints { match tool { "qpdf" => platform.install_hint("qpdf"), - "pdfcpu" => platform.install_hint("pdfcpu"), + "gs" | "ghostscript" => match platform { + Platform::MacOS => "brew install ghostscript".to_string(), + Platform::Windows => "winget install ArtifexSoftware.GhostScript".to_string(), + Platform::Linux => { + let distro = detect_linux_distro(); + match distro { + LinuxDistro::Debian => "sudo apt install ghostscript".to_string(), + LinuxDistro::Fedora => "sudo dnf install ghostscript".to_string(), + LinuxDistro::Arch => "sudo pacman -S ghostscript".to_string(), + LinuxDistro::Unknown => { + "Install ghostscript using your package manager".to_string() + } + } + } + Platform::Unknown => "Install ghostscript using your package manager".to_string(), + }, "tesseract" => match platform { Platform::MacOS => "brew install tesseract".to_string(), Platform::Windows => "winget install tesseract-ocr".to_string(), diff --git a/crates/core/src/utils/temp.rs b/crates/core/src/utils/temp.rs index 4eca226..c961cca 100644 --- a/crates/core/src/utils/temp.rs +++ b/crates/core/src/utils/temp.rs @@ -89,16 +89,16 @@ pub fn create_temp_file_near(path: &Path, prefix: &str, suffix: &str) -> Result< /// ``` /// use forgekit_core::utils::temp::atomic_write; /// use forgekit_core::utils::error::Result; -/// use std::path::Path; /// /// # fn main() -> Result<()> { -/// let target_path = Path::new("/tmp/output.txt"); -/// atomic_write(target_path, |temp_path| { +/// let target_path = std::env::temp_dir().join("forgekit_test_output.txt"); +/// atomic_write(&target_path, |temp_path| { /// // Write your data to temp_path /// std::fs::write(temp_path, b"hello world")?; /// Ok(()) /// })?; /// // target_path now exists with the data, or an error was returned +/// # std::fs::remove_file(&target_path).ok(); // cleanup /// # Ok(()) /// # } /// ``` diff --git a/crates/core/tests/integration_test.rs b/crates/core/tests/integration_test.rs index 08c6fd6..e031fc6 100644 --- a/crates/core/tests/integration_test.rs +++ b/crates/core/tests/integration_test.rs @@ -53,23 +53,29 @@ fn test_json_complete_format() { #[test] fn test_tool_probe_with_override_path() { - // Test that override_path takes precedence over PATH + // Test that override_path that doesn't exist falls back to PATH + // (This is the designed fallback behavior) let tool = QpdfTool; let config = ToolConfig { override_path: Some(PathBuf::from("/nonexistent/qpdf")), }; let result = tool.probe(&config); - // Should fail because the override path doesn't exist - assert!(result.is_err()); - - // Verify it's a ToolNotFound error with a hint - if let Err(ForgeKitError::ToolNotFound { hint, .. }) = result { - assert!(!hint.is_empty()); - assert!(hint.contains("qpdf") || hint.contains("install")); - } else { - // If qpdf is not installed, we might get a different error - // That's okay - the important thing is we tested the override path logic + // Result depends on whether qpdf is installed on the system + // If qpdf is on PATH, probe succeeds (fallback worked) + // If qpdf is not on PATH, probe fails + match result { + Ok(info) => { + // qpdf is installed - fallback to PATH worked + assert!(info.available); + // The path should NOT be the nonexistent override path + assert_ne!(info.path, PathBuf::from("/nonexistent/qpdf")); + } + Err(ForgeKitError::ToolNotFound { hint, .. }) => { + // qpdf not installed - that's fine + assert!(!hint.is_empty()); + } + Err(e) => panic!("Unexpected error type: {:?}", e), } } @@ -123,7 +129,7 @@ fn test_tool_install_hints_platform_specific() { // Test that install hints are platform-specific let tools = vec![ "qpdf", - "pdfcpu", + "gs", "tesseract", "ocrmypdf", "ffmpeg", @@ -288,7 +294,7 @@ fn test_tool_version_parsing() { fn test_platform_install_hints_bulk() { // Test bulk install hints for multiple tools let platform = detect_platform(); - let tools = vec!["qpdf", "pdfcpu", "tesseract"]; + let tools = vec!["qpdf", "gs", "tesseract"]; let bulk_hint = platform.install_hints(&tools); assert!(!bulk_hint.is_empty()); diff --git a/packaging/README.md b/packaging/README.md index 8922568..e9f584a 100644 --- a/packaging/README.md +++ b/packaging/README.md @@ -10,25 +10,25 @@ all required external tools are automatically installed as dependencies. - **File**: `debian/control` - **Usage**: Included in `.deb` package metadata -- **Dependencies**: qpdf, pdfcpu, tesseract-ocr, ffmpeg, libvips-tools, libimage-exiftool-perl, python3-pip +- **Dependencies**: qpdf, ghostscript, tesseract-ocr, ffmpeg, libvips-tools, libimage-exiftool-perl, python3-pip ### Fedora/RHEL/CentOS (`.rpm`) - **File**: `rpm/forgekit.spec` - **Usage**: Used to build `.rpm` packages with `rpmbuild` -- **Dependencies**: qpdf, pdfcpu, tesseract, ffmpeg, libvips, perl-Image-ExifTool, python3-pip +- **Dependencies**: qpdf, ghostscript, tesseract, ffmpeg, libvips, perl-Image-ExifTool, python3-pip ### macOS (Homebrew) - **File**: `homebrew/forgekit.rb` - **Usage**: Homebrew formula file -- **Dependencies**: qpdf, pdfcpu, tesseract, ffmpeg, libvips, exiftool, python@3 +- **Dependencies**: qpdf, ghostscript, tesseract, ffmpeg, libvips, exiftool, python@3 ### Windows (winget) - **File**: `winget/forgekit.yaml` - **Usage**: winget manifest for Windows Package Manager -- **Dependencies**: qpdf.qpdf, pdfcpu.pdfcpu, tesseract-ocr, Gyan.FFmpeg, libvips.libvips, exiftool.exiftool, Python.Python.3 +- **Dependencies**: qpdf.qpdf, ArtifexSoftware.GhostScript, tesseract-ocr, Gyan.FFmpeg, libvips.libvips, exiftool.exiftool, Python.Python.3 ## Building Packages diff --git a/packaging/debian/control b/packaging/debian/control index 14e35cc..313b5d4 100644 --- a/packaging/debian/control +++ b/packaging/debian/control @@ -7,7 +7,7 @@ Version: 0.0.3 Section: utils Priority: optional Architecture: amd64 -Depends: qpdf, pdfcpu, tesseract-ocr, ffmpeg, libvips-tools, libimage-exiftool-perl, python3, python3-pip +Depends: qpdf, ghostscript, tesseract-ocr, ffmpeg, libvips-tools, libimage-exiftool-perl, python3, python3-pip Description: Local-first media and PDF toolkit ForgeKit is a fast, privacy-focused toolkit for everyday media and PDF tasks. It provides a unified CLI for operations like PDF merging, image conversion, diff --git a/packaging/homebrew/forgekit.rb b/packaging/homebrew/forgekit.rb index dc14d81..859141a 100644 --- a/packaging/homebrew/forgekit.rb +++ b/packaging/homebrew/forgekit.rb @@ -11,7 +11,7 @@ class Forgekit < Formula depends_on "rust" => :build depends_on "qpdf" - depends_on "pdfcpu" + depends_on "ghostscript" depends_on "tesseract" depends_on "ffmpeg" depends_on "libvips" diff --git a/packaging/rpm/forgekit.spec b/packaging/rpm/forgekit.spec index a3c5a21..89144af 100644 --- a/packaging/rpm/forgekit.spec +++ b/packaging/rpm/forgekit.spec @@ -12,7 +12,7 @@ Source0: %{name}-%{version}.tar.gz BuildRequires: rust Requires: qpdf -Requires: pdfcpu +Requires: ghostscript Requires: tesseract Requires: ffmpeg Requires: libvips diff --git a/packaging/winget/forgekit.yaml b/packaging/winget/forgekit.yaml index f5f2cd9..a52c716 100644 --- a/packaging/winget/forgekit.yaml +++ b/packaging/winget/forgekit.yaml @@ -23,7 +23,7 @@ Installers: ManifestType: version Dependencies: - PackageIdentifier: qpdf.qpdf - - PackageIdentifier: pdfcpu.pdfcpu + - PackageIdentifier: ArtifexSoftware.GhostScript - PackageIdentifier: tesseract-ocr - PackageIdentifier: Gyan.FFmpeg - PackageIdentifier: libvips.libvips