Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Simple PDF Generator:

## Quick Start

In order to have a template you must create struct with `PdfTemplate` derive:
In order to have a template you must create struct with `PdfTemplate` or `PdfTemplateForHtml` derive:
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should even add an usage example and update the test_suite as well

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense! I will be sure to do that in due time


```rust
use simple_pdf_generator::{Asset, AssetType};
Expand Down
4 changes: 1 addition & 3 deletions simple_pdf_generator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@ repository = "https://github.com/Massimiliano-solutiontech/simple_pdf_generator"
edition = "2021"

[dependencies]
chromiumoxide = { version = "0.5.1", features = [
"tokio-runtime",
], default-features = false }
chromiumoxide = { version = "0.5.1", features = ["tokio-runtime"], default-features = false }
base64 = "0.21.3"
futures = "0.3.28"
html-escape = "0.2.13"
Expand Down
282 changes: 216 additions & 66 deletions simple_pdf_generator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use std::collections::HashMap;
use std::fmt::{Display, Formatter};
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering};

use base64::engine::general_purpose;
use base64::Engine;
Expand Down Expand Up @@ -147,23 +146,19 @@ static BROWSER: Lazy<RwLock<Option<ChromiumInstance>>> = Lazy::new(|| RwLock::ne
static TOKENS_AND_IMAGES_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"(?:%%(?P<prop_name>.*)%%)|(?:<img[^>]*\ssrc="(?P<img_src>.*?)"[^>]*>)"#).unwrap()
});

static NO_SANDBOX: AtomicBool = AtomicBool::new(false);

pub fn set_no_sandbox(val: bool) {
NO_SANDBOX.store(val, Ordering::Relaxed);
}

pub async fn generate_pdf(
pub async fn generate_pdf_from_html(
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A &str is more correct because the parameter isn't mutable. If you prefer you can leave it as is and I will change it before the release

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would appreciate it if you could

html: String,
template: Template,
assets: &[Asset],
print_options: &PrintOptions,
) -> Result<Vec<u8>, SimplePdfGeneratorError> {
let html = tokio::fs::read_to_string(template.html_path.clone())
.await
.map_err(|e| {
SimplePdfGeneratorError::IoError(format!("Cannot read the html file: {}", e))
})?;

let mut xpath_texts: Vec<String> = Vec::new();
let html = TOKENS_AND_IMAGES_REGEX
.replace_all(&html, |caps: &regex::Captures| {
Expand All @@ -172,45 +167,43 @@ pub async fn generate_pdf(
let mut result = String::new();

if let Some(prop_name) = prop_name {
if let Some(property) = template.properties.get(prop_name) {
if property.is_none {
xpath_texts.push(format!("text() = '{}'", prop_name));
result = prop_name.to_string();
} else {
result = html_escape::encode_text(&property.val).to_string()
}
}
} else if let Some(img_src) = img_src {
if img_src.starts_with("data:image") {
result = img_src.to_string();
} else {
let mime_type = mime_guess::from_path(img_src).first_raw();
if let Some(mime_type) = mime_type {
let mut img_src_path = Path::new(img_src).to_owned();
if img_src_path.is_relative() {
img_src_path = template
.html_path
.parent()
.unwrap_or_else(|| Path::new(""))
.join(img_src_path)
.canonicalize()
.unwrap_or_else(|_| PathBuf::new());
}

let img_data = fs::read(img_src_path).unwrap_or(Vec::new());
let image_base64 = general_purpose::STANDARD.encode(img_data);
let new_src = format!("data:{};base64,{}", mime_type, image_base64);

result = caps.get(0).unwrap().as_str().replace(img_src, &new_src);
} else {
result = img_src.to_string();
}
}
}

result
})
.to_string();
if let Some(property) = template.properties.get(prop_name) {
if property.is_none {
xpath_texts.push(format!("text() = '{}'", prop_name));
result = prop_name.to_string();
} else {
result = html_escape::encode_text(&property.val).to_string()
}
}
} else if let Some(img_src) = img_src {
if img_src.starts_with("data:image") {
result = img_src.to_string();
} else {
let mime_type = mime_guess::from_path(img_src).first_raw();
if let Some(mime_type) = mime_type {
let mut img_src_path = Path::new(img_src).to_owned();
if img_src_path.is_relative() {
img_src_path = template
.html_path
.parent()
.unwrap_or_else(|| Path::new(""))
.join(img_src_path)
.canonicalize()
.unwrap_or_else(|_| PathBuf::new());
}

let img_data = fs::read(img_src_path).unwrap_or(Vec::new());
let image_base64 = general_purpose::STANDARD.encode(img_data);
let new_src = format!("data:{};base64,{}", mime_type, image_base64);
result = caps.get(0).unwrap().as_str().replace(img_src, &new_src);
} else {
result = img_src.to_string();
}
}
}
result
})
.to_string();

let browser = get_browser().await;
let browser_instance = browser
Expand Down Expand Up @@ -257,25 +250,25 @@ pub async fn generate_pdf(
})?;

if !template.tables.is_empty() {
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why you removed the tables feature if in the derive is still present? I would like to keep this feature in order to have a more coherent behavior across the library

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense! I removed it to make it work in my tests, but forgot to readd it. Will be readding it

let table_generator_js: &'static str = include_str!("../assets/js/table-generator.js");

let mut tables_data = "tablesData = {".to_string();
for (table_name, mut table_data) in template.tables {
if table_data.is_empty() {
table_data = "[]".to_string();
xpath_texts.push(format!("@items = '{}'", table_name));
}

tables_data.push_str(&format!("{}:{},", table_name, table_data));
}
tables_data.push('}');

let table_generator_js =
table_generator_js.replacen("tablesData", &html_escape::encode_text(&tables_data), 1);
_ = page.evaluate(table_generator_js).await.map_err(|e| {
SimplePdfGeneratorError::BrowserError(format!("Cannot evaluate the js: {}", e))
})?;
}
let table_generator_js: &'static str = include_str!("../assets/js/table-generator.js");

let mut tables_data = "tablesData = {".to_string();
for (table_name, mut table_data) in template.tables {
if table_data.is_empty() {
table_data = "[]".to_string();
xpath_texts.push(format!("@items = '{}'", table_name));
}

tables_data.push_str(&format!("{}:{},", table_name, table_data));
}
tables_data.push('}');

let table_generator_js =
table_generator_js.replacen("tablesData", &html_escape::encode_text(&tables_data), 1);
_ = page.evaluate(table_generator_js).await.map_err(|e| {
SimplePdfGeneratorError::BrowserError(format!("Cannot evaluate the js: {}", e))
})?;
}

if !xpath_texts.is_empty() {
let xpath_expression = format!(
Expand Down Expand Up @@ -310,6 +303,163 @@ pub async fn generate_pdf(
.map_err(|e| SimplePdfGeneratorError::PdfError(format!("Cannot create the pdf: {}", e)))
}

pub async fn generate_pdf(
template: Template,
assets: &[Asset],
print_options: &PrintOptions,
) -> Result<Vec<u8>, SimplePdfGeneratorError> {
let html = tokio::fs::read_to_string(template.html_path.clone())
.await
.map_err(|e| {
SimplePdfGeneratorError::IoError(format!("Cannot read the html file: {}", e))
})?;

let mut xpath_texts: Vec<String> = Vec::new();
let html = TOKENS_AND_IMAGES_REGEX
.replace_all(&html, |caps: &regex::Captures| {
let prop_name = caps.name("prop_name").map(|prop_name| prop_name.as_str());
let img_src = caps.name("img_src").map(|img_src| img_src.as_str());
let mut result = String::new();

if let Some(prop_name) = prop_name {
if let Some(property) = template.properties.get(prop_name) {
if property.is_none {
xpath_texts.push(format!("text() = '{}'", prop_name));
result = prop_name.to_string();
} else {
result = html_escape::encode_text(&property.val).to_string()
}
}
} else if let Some(img_src) = img_src {
if img_src.starts_with("data:image") {
result = img_src.to_string();
} else {
let mime_type = mime_guess::from_path(img_src).first_raw();
if let Some(mime_type) = mime_type {
let mut img_src_path = Path::new(img_src).to_owned();
if img_src_path.is_relative() {
img_src_path = template
.html_path
.parent()
.unwrap_or_else(|| Path::new(""))
.join(img_src_path)
.canonicalize()
.unwrap_or_else(|_| PathBuf::new());
}

let img_data = fs::read(img_src_path).unwrap_or(Vec::new());
let image_base64 = general_purpose::STANDARD.encode(img_data);
let new_src = format!("data:{};base64,{}", mime_type, image_base64);

result = caps.get(0).unwrap().as_str().replace(img_src, &new_src);
} else {
result = img_src.to_string();
}
}
}

result
})
.to_string();

let browser = get_browser().await;
let browser_instance = browser
.as_ref()
.ok_or(SimplePdfGeneratorError::BrowserError(
"Cannot create the browser".to_string(),
))?;
let page = browser_instance
.browser
.new_page("about:blank")
.await
.map_err(|e| {
SimplePdfGeneratorError::BrowserError(format!("Cannot create the page: {}", e))
})?;
page.set_content(html).await.map_err(|e| {
SimplePdfGeneratorError::BrowserError(format!("Cannot set the content: {}", e))
})?;

let mut asset_content_futures = Vec::new();
for asset in assets {
asset_content_futures.push(tokio::fs::read_to_string(asset.path.clone()));
}

let asset_contents = try_join_all(asset_content_futures)
.await
.map_err(|e| SimplePdfGeneratorError::IoError(format!("Cannot read the asset: {}", e)))?;
let mut inject_futures_css = Vec::new();
let mut inject_futures_js = Vec::new();
for (index, asset_content) in asset_contents.into_iter().enumerate() {
match assets[index].r#type {
AssetType::Style => {
inject_futures_css.push(inject_css(&page, asset_content));
}
AssetType::Script => {
inject_futures_js.push(inject_js(&page, asset_content));
}
}
}
try_join_all(inject_futures_css).await.map_err(|e| {
SimplePdfGeneratorError::BrowserError(format!("Cannot inject the css: {}", e))
})?;
try_join_all(inject_futures_js).await.map_err(|e| {
SimplePdfGeneratorError::BrowserError(format!("Cannot inject the js: {}", e))
})?;

if !template.tables.is_empty() {
let table_generator_js: &'static str = include_str!("../assets/js/table-generator.js");

let mut tables_data = "tablesData = {".to_string();
for (table_name, mut table_data) in template.tables {
if table_data.is_empty() {
table_data = "[]".to_string();
xpath_texts.push(format!("@items = '{}'", table_name));
}

tables_data.push_str(&format!("{}:{},", table_name, table_data));
}
tables_data.push('}');

let table_generator_js =
table_generator_js.replacen("tablesData", &html_escape::encode_text(&tables_data), 1);
_ = page.evaluate(table_generator_js).await.map_err(|e| {
SimplePdfGeneratorError::BrowserError(format!("Cannot evaluate the js: {}", e))
})?;
}

if !xpath_texts.is_empty() {
let xpath_expression = format!(
"//*[not(self::script or self::style or self::title) and ({})]",
xpath_texts.join(" or ")
);
let js_script = format!(
"
() => {{
const xpathExpression = `{}`;
const result = document.evaluate(xpathExpression, document, null, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);

for (let i = 0; i < result.snapshotLength; i++) {{
const targetElement = result.snapshotItem(i);
targetElement.style.display = 'none';
}}
}}
",
xpath_expression
);

_ = page.evaluate(js_script).await.map_err(|e| {
SimplePdfGeneratorError::BrowserError(format!(
"Cannot evaluate the xPath script: {}",
e
))
})?;
}

page.pdf(print_options.into())
.await
.map_err(|e| SimplePdfGeneratorError::PdfError(format!("Cannot create the pdf: {}", e)))
}

async fn inject_js(page: &Page, js: String) -> Result<EvaluationResult, CdpError> {
let script = format!(
"() => {{
Expand Down
Loading