From 5e9d6add2cae8d18373180d1777678dbbbeb7bb0 Mon Sep 17 00:00:00 2001 From: Aiden Mitchell Date: Tue, 3 Feb 2026 10:03:34 -0800 Subject: [PATCH] feat: add PDF to Markdown conversion tool - Add pdfjs-dist dependency for PDF text extraction - Create /lib/pdf-to-markdown.ts with core extraction logic: - Heading detection based on font size ratios (configurable sensitivity) - List detection (bullets, numbered items, indentation-based) - Bold/italic detection from font names - Paragraph grouping by vertical gaps - Create /components/tools/pdf-to-markdown.tsx UI with: - Drag-and-drop file upload - Options panel (heading sensitivity, list detection, page breaks) - Preview (react-markdown) and Raw Markdown tabs - Download .md and copy to clipboard actions - Stats display (pages, words, headings, list items) - Register tool in Typography & Text category Co-Authored-By: Claude Opus 4.5 --- app/tools/[toolId]/page.tsx | 1 + components/tools/pdf-to-markdown.tsx | 346 ++++++++++++++++++++++++++ lib/pdf-to-markdown.ts | 352 +++++++++++++++++++++++++++ lib/tools.ts | 8 + package-lock.json | 289 +++++++++++++++++++++- package.json | 1 + 6 files changed, 996 insertions(+), 1 deletion(-) create mode 100644 components/tools/pdf-to-markdown.tsx create mode 100644 lib/pdf-to-markdown.ts diff --git a/app/tools/[toolId]/page.tsx b/app/tools/[toolId]/page.tsx index 5a03a1c..b3d8842 100644 --- a/app/tools/[toolId]/page.tsx +++ b/app/tools/[toolId]/page.tsx @@ -47,6 +47,7 @@ const toolComponents: Record = { "time-calc": dynamic(() => import("@/components/tools/time-calc").then(mod => mod.TimeCalcTool)), "unit-converter": dynamic(() => import("@/components/tools/unit-converter").then(mod => mod.UnitConverterTool)), "encoder": dynamic(() => import("@/components/tools/encoder").then(mod => mod.EncoderTool)), + "pdf-to-markdown": dynamic(() => import("@/components/tools/pdf-to-markdown").then(mod => mod.PdfToMarkdownTool)), }; interface ToolPageProps { diff --git a/components/tools/pdf-to-markdown.tsx b/components/tools/pdf-to-markdown.tsx new file mode 100644 index 0000000..4e346e8 --- /dev/null +++ b/components/tools/pdf-to-markdown.tsx @@ -0,0 +1,346 @@ +"use client"; + +import { useState, useCallback, useRef } from "react"; +import { Upload, Download, Copy, Check, FileText, Loader2 } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { Switch } from "@/components/ui/switch"; +import { Label } from "@/components/ui/label"; +import { Tabs, TabsContent, TabsList, TabsTrigger } from "@/components/ui/tabs"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { cn } from "@/lib/utils"; +import ReactMarkdown from "react-markdown"; +import remarkGfm from "remark-gfm"; +import { + convertPdfToMarkdown, + ConversionOptions, + ConversionStats, +} from "@/lib/pdf-to-markdown"; + +export function PdfToMarkdownTool() { + const [file, setFile] = useState(null); + const [markdown, setMarkdown] = useState(""); + const [stats, setStats] = useState(null); + const [isConverting, setIsConverting] = useState(false); + const [progress, setProgress] = useState({ current: 0, total: 0 }); + const [copied, setCopied] = useState(false); + const [error, setError] = useState(null); + + // Options + const [headingSensitivity, setHeadingSensitivity] = useState< + ConversionOptions["headingSensitivity"] + >("medium"); + const [detectLists, setDetectLists] = useState(true); + const [addPageBreaks, setAddPageBreaks] = useState(true); + + const fileInputRef = useRef(null); + + const handleFileSelect = useCallback( + async (selectedFile: File) => { + if (!selectedFile.type.includes("pdf")) { + setError("Please select a PDF file"); + return; + } + + setFile(selectedFile); + setError(null); + setIsConverting(true); + setProgress({ current: 0, total: 0 }); + + try { + const arrayBuffer = await selectedFile.arrayBuffer(); + const result = await convertPdfToMarkdown( + arrayBuffer, + { + headingSensitivity, + detectLists, + addPageBreaks, + }, + (current, total) => { + setProgress({ current, total }); + } + ); + + setMarkdown(result.markdown); + setStats(result.stats); + } catch (err) { + console.error("PDF conversion failed:", err); + setError( + err instanceof Error ? err.message : "Failed to convert PDF" + ); + setMarkdown(""); + setStats(null); + } finally { + setIsConverting(false); + } + }, + [headingSensitivity, detectLists, addPageBreaks] + ); + + const handleDrop = useCallback( + (e: React.DragEvent) => { + e.preventDefault(); + const droppedFile = e.dataTransfer.files[0]; + if (droppedFile) { + handleFileSelect(droppedFile); + } + }, + [handleFileSelect] + ); + + const handleFileInputChange = useCallback( + (e: React.ChangeEvent) => { + const selectedFile = e.target.files?.[0]; + if (selectedFile) { + handleFileSelect(selectedFile); + } + }, + [handleFileSelect] + ); + + const reconvert = useCallback(async () => { + if (!file) return; + handleFileSelect(file); + }, [file, handleFileSelect]); + + const copyToClipboard = useCallback(async () => { + try { + await navigator.clipboard.writeText(markdown); + setCopied(true); + setTimeout(() => setCopied(false), 2000); + } catch (err) { + console.error("Failed to copy:", err); + } + }, [markdown]); + + const downloadMarkdown = useCallback(() => { + const blob = new Blob([markdown], { type: "text/markdown" }); + const url = URL.createObjectURL(blob); + const link = document.createElement("a"); + link.href = url; + const baseName = file?.name.replace(/\.pdf$/i, "") || "document"; + link.download = `${baseName}.md`; + link.click(); + URL.revokeObjectURL(url); + }, [markdown, file]); + + const clearFile = useCallback(() => { + setFile(null); + setMarkdown(""); + setStats(null); + setError(null); + if (fileInputRef.current) { + fileInputRef.current.value = ""; + } + }, []); + + return ( +
+ {/* Options Panel */} +
+
+ + +
+ +
+ + +
+ +
+ + +
+ + {file && ( + + )} +
+ + {/* Upload Zone */} + {!file && ( +
e.preventDefault()} + onClick={() => fileInputRef.current?.click()} + className={cn( + "border-2 border-dashed rounded-xl p-12 text-center transition-colors cursor-pointer", + "hover:border-primary/50 hover:bg-muted/30", + error && "border-destructive" + )} + > + + +

Drop a PDF here

+

+ or click to select a file +

+ {error && ( +

{error}

+ )} +
+ )} + + {/* Converting Progress */} + {isConverting && ( +
+ +

+ Converting page {progress.current} of {progress.total}... +

+
+ )} + + {/* Results */} + {file && !isConverting && markdown && ( +
+ {/* File Info & Stats */} +
+
+ + {file.name} +
+ + {stats && ( +
+ + {stats.pages} pages + + + {stats.words.toLocaleString()} words + + + {stats.headings} headings + + + {stats.lists} list items + +
+ )} + + +
+ + {/* Tabs: Preview / Raw */} + +
+ + Preview + Raw Markdown + + +
+ + +
+
+ + +
+
+ + {markdown} + +
+
+
+ + +