diff --git a/README.md b/README.md index dcbaae2..181fc77 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,19 @@ const pdfBytes2 = convertToPdf(xlsxBytes, "xlsx"); Available functions: `convertToPdf(data, format)`, `convertDocxToPdf(data)`, `convertPptxToPdf(data)`, `convertXlsxToPdf(data)`. +Browser upload + preview demo: + +```sh +cd crates/office2pdf +wasm-pack build --target web --features wasm +python3 -m http.server 8000 +# open http://localhost:8000/examples/wasm-web/index.html +``` + +Demo source files: +- `crates/office2pdf/examples/wasm-web/index.html` +- `crates/office2pdf/examples/wasm-web/app.js` + ## CLI Options | Flag | Description | diff --git a/crates/office2pdf/Cargo.toml b/crates/office2pdf/Cargo.toml index 6b5cde9..674fcfe 100644 --- a/crates/office2pdf/Cargo.toml +++ b/crates/office2pdf/Cargo.toml @@ -7,45 +7,79 @@ license.workspace = true repository.workspace = true description = "Convert DOCX, XLSX, and PPTX files to PDF using pure Rust" readme = "../../README.md" -keywords = ["pdf", "docx", "xlsx", "pptx", "converter"] -categories = ["text-processing"] +keywords = [ + "pdf", + "docx", + "xlsx", + "pptx", + "converter", +] +categories = [ + "text-processing", +] [features] -wasm = ["wasm-bindgen"] -pdf-ops = ["lopdf"] -typescript = ["ts-rs"] +wasm = [ + "wasm-bindgen", + "console_error_panic_hook", +] +pdf-ops = [ + "lopdf", +] +typescript = [ + "ts-rs", +] [dependencies] thiserror = "2" lopdf = { version = "0.39", optional = true } typst = "0.14" typst-pdf = "0.14" -typst-kit = { version = "0.14", default-features = false, features = ["fonts", "embed-fonts"] } +typst-kit = { version = "0.14", default-features = false, features = [ + "fonts", + "embed-fonts", +] } comemo = "0.5" docx-rs = "0.4" serde = "1" serde_json = "1" -zip = { version = "0.6", default-features = false, features = ["deflate"] } +zip = { version = "0.6", default-features = false, features = [ + "deflate", +] } quick-xml = "0.38" umya-spreadsheet = "2" unicode-normalization = "0.1" image = "0.25" tracing = "0.1" wasm-bindgen = { version = "0.2", optional = true } +console_error_panic_hook = { version = "0.1", optional = true } ts-rs = { version = "12", optional = true } [target.'cfg(target_arch = "wasm32")'.dependencies] -getrandom_02 = { package = "getrandom", version = "0.2", features = ["js"] } -getrandom = { version = "0.3", features = ["wasm_js"] } +getrandom_02 = { package = "getrandom", version = "0.2", features = [ + "js", +] } +getrandom = { version = "0.3", features = [ + "wasm_js", +] } +js-sys = "0.3" [target.'cfg(target_arch = "wasm32")'.dev-dependencies] wasm-bindgen-test = "0.3" [dev-dependencies] -criterion = { version = "0.5", features = ["html_reports"] } +criterion = { version = "0.5", features = [ + "html_reports", +] } paste = "1" pdf-extract = "0.10" [[bench]] name = "conversion" harness = false + +[lib] +crate-type = [ + "cdylib", + "rlib", +] diff --git a/crates/office2pdf/examples/wasm-web/.gitignore b/crates/office2pdf/examples/wasm-web/.gitignore new file mode 100644 index 0000000..01d0a08 --- /dev/null +++ b/crates/office2pdf/examples/wasm-web/.gitignore @@ -0,0 +1 @@ +pkg/ diff --git a/crates/office2pdf/examples/wasm-web/app.js b/crates/office2pdf/examples/wasm-web/app.js new file mode 100644 index 0000000..8ac58c8 --- /dev/null +++ b/crates/office2pdf/examples/wasm-web/app.js @@ -0,0 +1,128 @@ +import init, { convertToPdf } from "../../pkg/office2pdf.js"; + +const fileInput = document.getElementById("fileInput"); +const formatSelect = document.getElementById("formatSelect"); +const convertButton = document.getElementById("convertButton"); +const status = document.getElementById("status"); +const viewer = document.getElementById("viewer"); +const previewFrame = document.getElementById("previewFrame"); +const downloadLink = document.getElementById("downloadLink"); + +const formatByExtension = new Map([ + ["docx", "docx"], + ["pptx", "pptx"], + ["xlsx", "xlsx"], +]); + +let wasmInitPromise = null; +let pdfObjectUrl = null; +let wasmUrl = ""; + +function setStatus(message, isError = false) { + status.textContent = message; + status.style.color = isError ? "#b91c1c" : "#64748b"; +} + +function detectFormat(fileName) { + const extension = fileName.split(".").pop()?.toLowerCase() ?? ""; + return formatByExtension.get(extension); +} + +function getSelectedFormat(file) { + if (formatSelect.value !== "auto") { + return formatSelect.value; + } + return detectFormat(file.name) ?? null; +} + +function getPdfFileName(inputFileName) { + const baseName = inputFileName.replace(/\.[^.]+$/, ""); + const safeName = baseName.length > 0 ? baseName : "converted"; + return `${safeName}.pdf`; +} + +async function ensureWasmReady() { + if (wasmInitPromise === null) { + if (window.location.protocol === "file:") { + throw new Error( + "Do not open this page with file://. Start a local server and use http://localhost (for example: python3 -m http.server).", + ); + } + + wasmInitPromise = (async () => { + const resolvedWasmUrl = new URL("../../pkg/office2pdf_bg.wasm", import.meta.url); + wasmUrl = resolvedWasmUrl.href; + setStatus(`Loading WASM module: ${resolvedWasmUrl.pathname}`); + + const response = await fetch(resolvedWasmUrl); + if (!response.ok) { + throw new Error(`Failed to fetch WASM (${response.status} ${response.statusText})`); + } + + const wasmBytes = await response.arrayBuffer(); + await init({ module_or_path: wasmBytes }); + setStatus("WASM module loaded."); + })(); + } + await wasmInitPromise; +} + +function updatePdfPreview(pdfBytes, sourceName) { + if (pdfObjectUrl) { + URL.revokeObjectURL(pdfObjectUrl); + } + + const pdfBlob = new Blob([pdfBytes], { type: "application/pdf" }); + pdfObjectUrl = URL.createObjectURL(pdfBlob); + + previewFrame.src = pdfObjectUrl; + downloadLink.href = pdfObjectUrl; + downloadLink.download = getPdfFileName(sourceName); + viewer.classList.add("visible"); +} + +async function handleConvertClick() { + const file = fileInput.files?.[0]; + if (!file) { + setStatus("Please select a DOCX, PPTX, or XLSX file first.", true); + return; + } + + const format = getSelectedFormat(file); + if (format === null) { + setStatus("Could not detect file format. Please choose it manually.", true); + return; + } + + convertButton.disabled = true; + setStatus(`Converting ${file.name} as ${format.toUpperCase()}...`); + + try { + await ensureWasmReady(); + const officeBytes = new Uint8Array(await file.arrayBuffer()); + const pdfBytes = convertToPdf(officeBytes, format); + updatePdfPreview(pdfBytes, file.name); + const wasmSourceHint = wasmUrl.length > 0 ? ` via ${wasmUrl}` : ""; + setStatus(`Done. Generated ${pdfBytes.length.toLocaleString()} bytes of PDF${wasmSourceHint}.`); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + setStatus(`Conversion failed: ${message}`, true); + } finally { + convertButton.disabled = false; + } +} + +convertButton.addEventListener("click", () => { + void handleConvertClick(); +}); + +window.addEventListener("beforeunload", () => { + if (pdfObjectUrl) { + URL.revokeObjectURL(pdfObjectUrl); + } +}); + +void ensureWasmReady().catch((error) => { + const message = error instanceof Error ? error.message : String(error); + setStatus(`WASM preload failed: ${message}`, true); +}); diff --git a/crates/office2pdf/examples/wasm-web/index.html b/crates/office2pdf/examples/wasm-web/index.html new file mode 100644 index 0000000..ded50da --- /dev/null +++ b/crates/office2pdf/examples/wasm-web/index.html @@ -0,0 +1,168 @@ + + + + + + office2pdf WASM Browser Demo + + + +
+
+

office2pdf WASM Demo

+

Upload a DOCX, PPTX, or XLSX file and preview the converted PDF in the browser.

+
+ +
+
+ + + + + +
+ +
WASM not loaded yet.
+
+ +
+ Download PDF + +
+
+ + + + diff --git a/crates/office2pdf/src/lib.rs b/crates/office2pdf/src/lib.rs index f9ce3f3..dc43675 100644 --- a/crates/office2pdf/src/lib.rs +++ b/crates/office2pdf/src/lib.rs @@ -48,6 +48,8 @@ pub mod render; #[cfg(feature = "wasm")] pub mod wasm; +use std::time::Duration; +#[cfg(not(target_arch = "wasm32"))] use std::time::Instant; use config::{ConvertOptions, Format}; @@ -67,6 +69,29 @@ fn dedup_warnings(warnings: &mut Vec) { warnings.retain(|warning| seen.insert(warning.to_string())); } +#[cfg(not(target_arch = "wasm32"))] +type TimingMark = Instant; +#[cfg(target_arch = "wasm32")] +type TimingMark = (); + +#[cfg(not(target_arch = "wasm32"))] +fn start_timing() -> TimingMark { + Instant::now() +} + +#[cfg(target_arch = "wasm32")] +fn start_timing() -> TimingMark {} + +#[cfg(not(target_arch = "wasm32"))] +fn elapsed_since(start: TimingMark) -> Duration { + start.elapsed() +} + +#[cfg(target_arch = "wasm32")] +fn elapsed_since(_start: TimingMark) -> Duration { + Duration::ZERO +} + /// Extract a human-readable message from a caught panic payload. fn extract_panic_message(payload: &Box) -> String { if let Some(s) = payload.downcast_ref::() { @@ -178,7 +203,7 @@ pub fn convert_bytes( return convert_bytes_streaming_xlsx(data, options); } - let total_start = Instant::now(); + let total_start = start_timing(); let input_size_bytes = data.len() as u64; let parser: Box = match format { @@ -190,7 +215,7 @@ pub fn convert_bytes( // Stage 1: Parse (OOXML → IR) // Wrap with catch_unwind to convert upstream panics (e.g. unwrap() in // umya-spreadsheet / docx-rs) into ConvertError::Parse. - let parse_start = Instant::now(); + let parse_start = start_timing(); let parse_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| parser.parse(data, options))); let (doc, mut warnings) = match parse_result { @@ -202,7 +227,7 @@ pub fn convert_bytes( ))); } }; - let parse_duration = parse_start.elapsed(); + let parse_duration = elapsed_since(parse_start); let page_count = doc.pages.len() as u32; #[cfg(not(target_arch = "wasm32"))] @@ -233,44 +258,56 @@ pub fn convert_bytes( ); dedup_warnings(&mut warnings); - // Stage 2: Codegen (IR → Typst) - let codegen_start = Instant::now(); - #[cfg(not(target_arch = "wasm32"))] - let output = render::typst_gen::generate_typst_with_options_and_font_context( - &doc, - options, - font_context.as_ref(), - )?; - #[cfg(target_arch = "wasm32")] - let output = render::typst_gen::generate_typst_with_options(&doc, options)?; - let codegen_duration = codegen_start.elapsed(); + // Stage 2+3: Codegen + Compile + let render_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let codegen_start = start_timing(); + #[cfg(not(target_arch = "wasm32"))] + let output = render::typst_gen::generate_typst_with_options_and_font_context( + &doc, + options, + font_context.as_ref(), + )?; + #[cfg(target_arch = "wasm32")] + let output = render::typst_gen::generate_typst_with_options(&doc, options)?; + let codegen_duration = elapsed_since(codegen_start); - // Stage 3: Compile (Typst → PDF) - let compile_start = Instant::now(); - #[cfg(not(target_arch = "wasm32"))] - let pdf = render::pdf::compile_to_pdf( - &output.source, - &output.images, - options.pdf_standard, - font_context - .as_ref() - .map(|context| context.search_paths()) - .unwrap_or(&[]), - options.tagged, - options.pdf_ua, - )?; - #[cfg(target_arch = "wasm32")] - let pdf = render::pdf::compile_to_pdf( - &output.source, - &output.images, - options.pdf_standard, - &options.font_paths, - options.tagged, - options.pdf_ua, - )?; - let compile_duration = compile_start.elapsed(); - - let total_duration = total_start.elapsed(); + let compile_start = start_timing(); + #[cfg(not(target_arch = "wasm32"))] + let pdf = render::pdf::compile_to_pdf( + &output.source, + &output.images, + options.pdf_standard, + font_context + .as_ref() + .map(|context| context.search_paths()) + .unwrap_or(&[]), + options.tagged, + options.pdf_ua, + )?; + #[cfg(target_arch = "wasm32")] + let pdf = render::pdf::compile_to_pdf( + &output.source, + &output.images, + options.pdf_standard, + &options.font_paths, + options.tagged, + options.pdf_ua, + )?; + let compile_duration = elapsed_since(compile_start); + + Ok::<(Vec, Duration, Duration), ConvertError>((pdf, codegen_duration, compile_duration)) + })); + let (pdf, codegen_duration, compile_duration) = match render_result { + Ok(result) => result?, + Err(panic_info) => { + return Err(ConvertError::Render(format!( + "render pipeline panicked: {}", + extract_panic_message(&panic_info) + ))); + } + }; + + let total_duration = elapsed_since(total_start); let output_size_bytes = pdf.len() as u64; Ok(ConvertResult { @@ -298,7 +335,7 @@ fn convert_bytes_streaming_xlsx( data: &[u8], options: &ConvertOptions, ) -> Result { - let total_start = Instant::now(); + let total_start = start_timing(); let input_size_bytes = data.len() as u64; let chunk_size = options.streaming_chunk_size.unwrap_or(1000); @@ -306,7 +343,7 @@ fn convert_bytes_streaming_xlsx( // Stage 1: Parse into chunks // Wrap with catch_unwind (same rationale as convert_bytes). - let parse_start = Instant::now(); + let parse_start = start_timing(); let parse_result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { xlsx_parser.parse_streaming(data, options, chunk_size) })); @@ -319,7 +356,7 @@ fn convert_bytes_streaming_xlsx( ))); } }; - let parse_duration = parse_start.elapsed(); + let parse_duration = elapsed_since(parse_start); if chunk_docs.is_empty() { // Empty spreadsheet — produce a minimal empty PDF @@ -353,15 +390,15 @@ fn convert_bytes_streaming_xlsx( #[cfg(target_arch = "wasm32")] let pdf = render::pdf::compile_to_pdf(&output.source, &output.images, None, &[], false, false)?; - let total_duration = total_start.elapsed(); + let total_duration = elapsed_since(total_start); dedup_warnings(&mut warnings); return Ok(ConvertResult { pdf, warnings, metrics: Some(ConvertMetrics { parse_duration, - codegen_duration: std::time::Duration::ZERO, - compile_duration: std::time::Duration::ZERO, + codegen_duration: Duration::ZERO, + compile_duration: Duration::ZERO, total_duration, input_size_bytes, output_size_bytes: 0, @@ -372,8 +409,8 @@ fn convert_bytes_streaming_xlsx( // Stage 2+3: Codegen + Compile each chunk independently let mut all_pdfs: Vec> = Vec::with_capacity(chunk_docs.len()); - let mut codegen_duration_total = std::time::Duration::ZERO; - let mut compile_duration_total = std::time::Duration::ZERO; + let mut codegen_duration_total = Duration::ZERO; + let mut compile_duration_total = Duration::ZERO; let mut total_page_count = 0u32; #[cfg(not(target_arch = "wasm32"))] @@ -392,7 +429,7 @@ fn convert_bytes_streaming_xlsx( for chunk_doc in chunk_docs { total_page_count += chunk_doc.pages.len() as u32; - let codegen_start = Instant::now(); + let codegen_start = start_timing(); #[cfg(not(target_arch = "wasm32"))] let output = render::typst_gen::generate_typst_with_options_and_font_context( &chunk_doc, @@ -401,9 +438,9 @@ fn convert_bytes_streaming_xlsx( )?; #[cfg(target_arch = "wasm32")] let output = render::typst_gen::generate_typst_with_options(&chunk_doc, options)?; - codegen_duration_total += codegen_start.elapsed(); + codegen_duration_total += elapsed_since(codegen_start); - let compile_start = Instant::now(); + let compile_start = start_timing(); #[cfg(not(target_arch = "wasm32"))] let pdf = render::pdf::compile_to_pdf( &output.source, @@ -425,7 +462,7 @@ fn convert_bytes_streaming_xlsx( options.tagged, options.pdf_ua, )?; - compile_duration_total += compile_start.elapsed(); + compile_duration_total += elapsed_since(compile_start); all_pdfs.push(pdf); // chunk_doc and output are dropped here, freeing their memory @@ -439,7 +476,7 @@ fn convert_bytes_streaming_xlsx( pdf_ops::merge(&refs).map_err(|e| ConvertError::Render(format!("PDF merge failed: {e}")))? }; - let total_duration = total_start.elapsed(); + let total_duration = elapsed_since(total_start); let output_size_bytes = final_pdf.len() as u64; dedup_warnings(&mut warnings); diff --git a/crates/office2pdf/src/render/pdf.rs b/crates/office2pdf/src/render/pdf.rs index 694d0b0..5dc13a6 100644 --- a/crates/office2pdf/src/render/pdf.rs +++ b/crates/office2pdf/src/render/pdf.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; #[cfg(not(target_arch = "wasm32"))] use std::path::PathBuf; use std::sync::{Arc, Mutex, OnceLock}; +#[cfg(not(target_arch = "wasm32"))] use std::time::{SystemTime, UNIX_EPOCH}; use typst::diag::FileResult; @@ -177,12 +178,19 @@ fn compile_to_pdf_inner( .map_err(|e| ConvertError::Render(format!("PDF standard configuration error: {e}")))? }; - // PDF/A and PDF/UA require a document creation timestamp - let needs_timestamp = pdf_standard.is_some() || pdf_ua; - let timestamp = if needs_timestamp { - Some(typst_pdf::Timestamp::new_utc(current_utc_datetime())) - } else { - None + #[cfg(target_arch = "wasm32")] + let timestamp = Some(typst_pdf::Timestamp::new_utc(current_utc_datetime())); + + // PDF/A and PDF/UA require a document creation timestamp. + // For regular PDFs on native, omit timestamp to keep behavior unchanged. + #[cfg(not(target_arch = "wasm32"))] + let timestamp = { + let needs_timestamp = pdf_standard.is_some() || pdf_ua; + if needs_timestamp { + Some(typst_pdf::Timestamp::new_utc(current_utc_datetime())) + } else { + None + } }; // Enable tagging when explicitly requested or when PDF/UA requires it @@ -205,6 +213,7 @@ fn compile_to_pdf_inner( /// Uses `std::time::SystemTime` to avoid an external chrono dependency. /// The civil date is computed from the Unix timestamp using Howard Hinnant's /// algorithm (). +#[cfg(not(target_arch = "wasm32"))] fn current_utc_datetime() -> Datetime { let duration = SystemTime::now() .duration_since(UNIX_EPOCH) @@ -234,6 +243,23 @@ fn current_utc_datetime() -> Datetime { .expect("valid date derived from SystemTime") } +/// Convert current JS `Date` to Typst UTC datetime on wasm32. +#[cfg(target_arch = "wasm32")] +fn current_utc_datetime() -> Datetime { + let now = js_sys::Date::new_0(); + let year: i32 = now.get_utc_full_year() as i32; + let month: u8 = now.get_utc_month() as u8 + 1; + let day: u8 = now.get_utc_date() as u8; + let hour: u8 = now.get_utc_hours() as u8; + let minute: u8 = now.get_utc_minutes() as u8; + let second: u8 = now.get_utc_seconds() as u8; + + Datetime::from_ymd_hms(year, month, day, hour, minute, second).unwrap_or_else(|| { + Datetime::from_ymd_hms(1970, 1, 1, 0, 0, 0) + .expect("epoch datetime should always be valid") + }) +} + /// Font data source: either a static reference to cached fonts or owned /// data for custom font path searches. enum FontSource { diff --git a/crates/office2pdf/src/wasm.rs b/crates/office2pdf/src/wasm.rs index 2e24ce9..a2e8dac 100644 --- a/crates/office2pdf/src/wasm.rs +++ b/crates/office2pdf/src/wasm.rs @@ -28,6 +28,12 @@ use wasm_bindgen::prelude::*; use crate::config::{ConvertOptions, Format}; use crate::convert_bytes; +/// Install panic hook so Rust panic messages are forwarded to JS console. +#[wasm_bindgen(start)] +pub fn wasm_start() { + console_error_panic_hook::set_once(); +} + /// Internal: convert with format string, returning a `String` error (testable on native). fn convert_to_pdf_inner(data: &[u8], format: &str) -> Result, String> { let fmt =