-
Notifications
You must be signed in to change notification settings - Fork 4
feature: added generate_pdf_from_html string alternative #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
dbd35db
826c93c
919549d
795eb0d
870c7b1
bdca222
2bcace1
937b3fc
3caf785
af65779
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,6 @@ use std::collections::HashMap; | |
| use std::fmt::{Display, Formatter}; | ||
| use std::fs; | ||
| use std::path::{Path, PathBuf}; | ||
| use std::sync::atomic::{AtomicBool, Ordering}; | ||
|
|
||
| use base64::engine::general_purpose; | ||
| use base64::Engine; | ||
|
|
@@ -147,23 +146,19 @@ static BROWSER: Lazy<RwLock<Option<ChromiumInstance>>> = Lazy::new(|| RwLock::ne | |
| static TOKENS_AND_IMAGES_REGEX: Lazy<Regex> = Lazy::new(|| { | ||
| Regex::new(r#"(?:%%(?P<prop_name>.*)%%)|(?:<img[^>]*\ssrc="(?P<img_src>.*?)"[^>]*>)"#).unwrap() | ||
| }); | ||
|
|
||
| static NO_SANDBOX: AtomicBool = AtomicBool::new(false); | ||
|
|
||
| pub fn set_no_sandbox(val: bool) { | ||
| NO_SANDBOX.store(val, Ordering::Relaxed); | ||
| } | ||
|
|
||
| pub async fn generate_pdf( | ||
| pub async fn generate_pdf_from_html( | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would appreciate it if you could |
||
| html: String, | ||
| template: Template, | ||
| assets: &[Asset], | ||
| print_options: &PrintOptions, | ||
| ) -> Result<Vec<u8>, SimplePdfGeneratorError> { | ||
| let html = tokio::fs::read_to_string(template.html_path.clone()) | ||
| .await | ||
| .map_err(|e| { | ||
| SimplePdfGeneratorError::IoError(format!("Cannot read the html file: {}", e)) | ||
| })?; | ||
|
|
||
| let mut xpath_texts: Vec<String> = Vec::new(); | ||
| let html = TOKENS_AND_IMAGES_REGEX | ||
| .replace_all(&html, |caps: ®ex::Captures| { | ||
|
|
@@ -172,45 +167,43 @@ pub async fn generate_pdf( | |
| let mut result = String::new(); | ||
|
|
||
| if let Some(prop_name) = prop_name { | ||
| if let Some(property) = template.properties.get(prop_name) { | ||
| if property.is_none { | ||
| xpath_texts.push(format!("text() = '{}'", prop_name)); | ||
| result = prop_name.to_string(); | ||
| } else { | ||
| result = html_escape::encode_text(&property.val).to_string() | ||
| } | ||
| } | ||
| } else if let Some(img_src) = img_src { | ||
| if img_src.starts_with("data:image") { | ||
| result = img_src.to_string(); | ||
| } else { | ||
| let mime_type = mime_guess::from_path(img_src).first_raw(); | ||
| if let Some(mime_type) = mime_type { | ||
| let mut img_src_path = Path::new(img_src).to_owned(); | ||
| if img_src_path.is_relative() { | ||
| img_src_path = template | ||
| .html_path | ||
| .parent() | ||
| .unwrap_or_else(|| Path::new("")) | ||
| .join(img_src_path) | ||
| .canonicalize() | ||
| .unwrap_or_else(|_| PathBuf::new()); | ||
| } | ||
|
|
||
| let img_data = fs::read(img_src_path).unwrap_or(Vec::new()); | ||
| let image_base64 = general_purpose::STANDARD.encode(img_data); | ||
| let new_src = format!("data:{};base64,{}", mime_type, image_base64); | ||
|
|
||
| result = caps.get(0).unwrap().as_str().replace(img_src, &new_src); | ||
| } else { | ||
| result = img_src.to_string(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| result | ||
| }) | ||
| .to_string(); | ||
| if let Some(property) = template.properties.get(prop_name) { | ||
| if property.is_none { | ||
| xpath_texts.push(format!("text() = '{}'", prop_name)); | ||
| result = prop_name.to_string(); | ||
| } else { | ||
| result = html_escape::encode_text(&property.val).to_string() | ||
| } | ||
| } | ||
| } else if let Some(img_src) = img_src { | ||
| if img_src.starts_with("data:image") { | ||
| result = img_src.to_string(); | ||
| } else { | ||
| let mime_type = mime_guess::from_path(img_src).first_raw(); | ||
| if let Some(mime_type) = mime_type { | ||
| let mut img_src_path = Path::new(img_src).to_owned(); | ||
| if img_src_path.is_relative() { | ||
| img_src_path = template | ||
| .html_path | ||
| .parent() | ||
| .unwrap_or_else(|| Path::new("")) | ||
| .join(img_src_path) | ||
| .canonicalize() | ||
| .unwrap_or_else(|_| PathBuf::new()); | ||
| } | ||
|
|
||
| let img_data = fs::read(img_src_path).unwrap_or(Vec::new()); | ||
| let image_base64 = general_purpose::STANDARD.encode(img_data); | ||
| let new_src = format!("data:{};base64,{}", mime_type, image_base64); | ||
| result = caps.get(0).unwrap().as_str().replace(img_src, &new_src); | ||
| } else { | ||
| result = img_src.to_string(); | ||
| } | ||
| } | ||
| } | ||
| result | ||
| }) | ||
| .to_string(); | ||
|
|
||
| let browser = get_browser().await; | ||
| let browser_instance = browser | ||
|
|
@@ -257,25 +250,25 @@ pub async fn generate_pdf( | |
| })?; | ||
|
|
||
| if !template.tables.is_empty() { | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why you removed the
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That makes sense! I removed it to make it work in my tests, but forgot to readd it. Will be readding it |
||
| let table_generator_js: &'static str = include_str!("../assets/js/table-generator.js"); | ||
|
|
||
| let mut tables_data = "tablesData = {".to_string(); | ||
| for (table_name, mut table_data) in template.tables { | ||
| if table_data.is_empty() { | ||
| table_data = "[]".to_string(); | ||
| xpath_texts.push(format!("@items = '{}'", table_name)); | ||
| } | ||
|
|
||
| tables_data.push_str(&format!("{}:{},", table_name, table_data)); | ||
| } | ||
| tables_data.push('}'); | ||
|
|
||
| let table_generator_js = | ||
| table_generator_js.replacen("tablesData", &html_escape::encode_text(&tables_data), 1); | ||
| _ = page.evaluate(table_generator_js).await.map_err(|e| { | ||
| SimplePdfGeneratorError::BrowserError(format!("Cannot evaluate the js: {}", e)) | ||
| })?; | ||
| } | ||
| let table_generator_js: &'static str = include_str!("../assets/js/table-generator.js"); | ||
|
|
||
| let mut tables_data = "tablesData = {".to_string(); | ||
| for (table_name, mut table_data) in template.tables { | ||
| if table_data.is_empty() { | ||
| table_data = "[]".to_string(); | ||
| xpath_texts.push(format!("@items = '{}'", table_name)); | ||
| } | ||
|
|
||
| tables_data.push_str(&format!("{}:{},", table_name, table_data)); | ||
| } | ||
| tables_data.push('}'); | ||
|
|
||
| let table_generator_js = | ||
| table_generator_js.replacen("tablesData", &html_escape::encode_text(&tables_data), 1); | ||
| _ = page.evaluate(table_generator_js).await.map_err(|e| { | ||
| SimplePdfGeneratorError::BrowserError(format!("Cannot evaluate the js: {}", e)) | ||
| })?; | ||
| } | ||
|
|
||
| if !xpath_texts.is_empty() { | ||
| let xpath_expression = format!( | ||
|
|
@@ -310,6 +303,163 @@ pub async fn generate_pdf( | |
| .map_err(|e| SimplePdfGeneratorError::PdfError(format!("Cannot create the pdf: {}", e))) | ||
| } | ||
|
|
||
| pub async fn generate_pdf( | ||
| template: Template, | ||
| assets: &[Asset], | ||
| print_options: &PrintOptions, | ||
| ) -> Result<Vec<u8>, SimplePdfGeneratorError> { | ||
| let html = tokio::fs::read_to_string(template.html_path.clone()) | ||
| .await | ||
| .map_err(|e| { | ||
| SimplePdfGeneratorError::IoError(format!("Cannot read the html file: {}", e)) | ||
| })?; | ||
|
|
||
| let mut xpath_texts: Vec<String> = Vec::new(); | ||
| let html = TOKENS_AND_IMAGES_REGEX | ||
| .replace_all(&html, |caps: ®ex::Captures| { | ||
| let prop_name = caps.name("prop_name").map(|prop_name| prop_name.as_str()); | ||
| let img_src = caps.name("img_src").map(|img_src| img_src.as_str()); | ||
| let mut result = String::new(); | ||
|
|
||
| if let Some(prop_name) = prop_name { | ||
| if let Some(property) = template.properties.get(prop_name) { | ||
| if property.is_none { | ||
| xpath_texts.push(format!("text() = '{}'", prop_name)); | ||
| result = prop_name.to_string(); | ||
| } else { | ||
| result = html_escape::encode_text(&property.val).to_string() | ||
| } | ||
| } | ||
| } else if let Some(img_src) = img_src { | ||
| if img_src.starts_with("data:image") { | ||
| result = img_src.to_string(); | ||
| } else { | ||
| let mime_type = mime_guess::from_path(img_src).first_raw(); | ||
| if let Some(mime_type) = mime_type { | ||
| let mut img_src_path = Path::new(img_src).to_owned(); | ||
| if img_src_path.is_relative() { | ||
| img_src_path = template | ||
| .html_path | ||
| .parent() | ||
| .unwrap_or_else(|| Path::new("")) | ||
| .join(img_src_path) | ||
| .canonicalize() | ||
| .unwrap_or_else(|_| PathBuf::new()); | ||
| } | ||
|
|
||
| let img_data = fs::read(img_src_path).unwrap_or(Vec::new()); | ||
| let image_base64 = general_purpose::STANDARD.encode(img_data); | ||
| let new_src = format!("data:{};base64,{}", mime_type, image_base64); | ||
|
|
||
| result = caps.get(0).unwrap().as_str().replace(img_src, &new_src); | ||
| } else { | ||
| result = img_src.to_string(); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| result | ||
| }) | ||
| .to_string(); | ||
|
|
||
| let browser = get_browser().await; | ||
| let browser_instance = browser | ||
| .as_ref() | ||
| .ok_or(SimplePdfGeneratorError::BrowserError( | ||
| "Cannot create the browser".to_string(), | ||
| ))?; | ||
| let page = browser_instance | ||
| .browser | ||
| .new_page("about:blank") | ||
| .await | ||
| .map_err(|e| { | ||
| SimplePdfGeneratorError::BrowserError(format!("Cannot create the page: {}", e)) | ||
| })?; | ||
| page.set_content(html).await.map_err(|e| { | ||
| SimplePdfGeneratorError::BrowserError(format!("Cannot set the content: {}", e)) | ||
| })?; | ||
|
|
||
| let mut asset_content_futures = Vec::new(); | ||
| for asset in assets { | ||
| asset_content_futures.push(tokio::fs::read_to_string(asset.path.clone())); | ||
| } | ||
|
|
||
| let asset_contents = try_join_all(asset_content_futures) | ||
| .await | ||
| .map_err(|e| SimplePdfGeneratorError::IoError(format!("Cannot read the asset: {}", e)))?; | ||
| let mut inject_futures_css = Vec::new(); | ||
| let mut inject_futures_js = Vec::new(); | ||
| for (index, asset_content) in asset_contents.into_iter().enumerate() { | ||
| match assets[index].r#type { | ||
| AssetType::Style => { | ||
| inject_futures_css.push(inject_css(&page, asset_content)); | ||
| } | ||
| AssetType::Script => { | ||
| inject_futures_js.push(inject_js(&page, asset_content)); | ||
| } | ||
| } | ||
| } | ||
| try_join_all(inject_futures_css).await.map_err(|e| { | ||
| SimplePdfGeneratorError::BrowserError(format!("Cannot inject the css: {}", e)) | ||
| })?; | ||
| try_join_all(inject_futures_js).await.map_err(|e| { | ||
| SimplePdfGeneratorError::BrowserError(format!("Cannot inject the js: {}", e)) | ||
| })?; | ||
|
|
||
| if !template.tables.is_empty() { | ||
| let table_generator_js: &'static str = include_str!("../assets/js/table-generator.js"); | ||
|
|
||
| let mut tables_data = "tablesData = {".to_string(); | ||
| for (table_name, mut table_data) in template.tables { | ||
| if table_data.is_empty() { | ||
| table_data = "[]".to_string(); | ||
| xpath_texts.push(format!("@items = '{}'", table_name)); | ||
| } | ||
|
|
||
| tables_data.push_str(&format!("{}:{},", table_name, table_data)); | ||
| } | ||
| tables_data.push('}'); | ||
|
|
||
| let table_generator_js = | ||
| table_generator_js.replacen("tablesData", &html_escape::encode_text(&tables_data), 1); | ||
| _ = page.evaluate(table_generator_js).await.map_err(|e| { | ||
| SimplePdfGeneratorError::BrowserError(format!("Cannot evaluate the js: {}", e)) | ||
| })?; | ||
| } | ||
|
|
||
| if !xpath_texts.is_empty() { | ||
| let xpath_expression = format!( | ||
| "//*[not(self::script or self::style or self::title) and ({})]", | ||
| xpath_texts.join(" or ") | ||
| ); | ||
| let js_script = format!( | ||
| " | ||
| () => {{ | ||
| const xpathExpression = `{}`; | ||
| const result = document.evaluate(xpathExpression, document, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null); | ||
|
|
||
| for (let i = 0; i < result.snapshotLength; i++) {{ | ||
| const targetElement = result.snapshotItem(i); | ||
| targetElement.style.display = 'none'; | ||
| }} | ||
| }} | ||
| ", | ||
| xpath_expression | ||
| ); | ||
|
|
||
| _ = page.evaluate(js_script).await.map_err(|e| { | ||
| SimplePdfGeneratorError::BrowserError(format!( | ||
| "Cannot evaluate the xPath script: {}", | ||
| e | ||
| )) | ||
| })?; | ||
| } | ||
|
|
||
| page.pdf(print_options.into()) | ||
| .await | ||
| .map_err(|e| SimplePdfGeneratorError::PdfError(format!("Cannot create the pdf: {}", e))) | ||
| } | ||
|
|
||
| async fn inject_js(page: &Page, js: String) -> Result<EvaluationResult, CdpError> { | ||
| let script = format!( | ||
| "() => {{ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should even add an usage example and update the
test_suiteas wellThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense! I will be sure to do that in due time