From ec088acfd89ea4435337662457896ceb1dcb116e Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Mon, 3 May 2021 18:53:36 -0300 Subject: [PATCH 01/10] - remove rule so we can insert DIV tags into P tags, since we use it for our ad manager DD-70 --- src/spec/tag/omission.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/spec/tag/omission.rs b/src/spec/tag/omission.rs index 8720c6eb..7c93259c 100644 --- a/src/spec/tag/omission.rs +++ b/src/spec/tag/omission.rs @@ -90,7 +90,6 @@ lazy_static! { followed_by.insert(b"aside"); followed_by.insert(b"blockquote"); followed_by.insert(b"details"); - followed_by.insert(b"div"); followed_by.insert(b"dl"); followed_by.insert(b"fieldset"); followed_by.insert(b"figcaption"); From 1f42ca1e47495047824d3a05c0e68ea1c43ec365 Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Mon, 3 May 2021 18:54:20 -0300 Subject: [PATCH 02/10] + allow for void tags to have closing tags to be a bit more tolerant with invalid HTML DD-70 --- src/unit/tag.rs | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/src/unit/tag.rs b/src/unit/tag.rs index 45290b2a..e2c3c3d1 100644 --- a/src/unit/tag.rs +++ b/src/unit/tag.rs @@ -199,6 +199,31 @@ pub fn process_tag( proc.write_slice(b"/>"); }; }; + + if is_void_tag { + let closing_tag_checkpoint = ReadCheckpoint::new(proc); + let closing_tag = match proc.m(IsSeq(b" + match proc.m(WhileInLookup(TAG_NAME_CHAR), Discard).require("closing tag name".to_string()){ + Ok(tag) => Some(tag), + Err(_) => None, + } + , + Err(_) => None + }; + + return match closing_tag { + Some(tag) => { + proc.make_lowercase(tag); + if proc[tag] != proc[tag_name] { + closing_tag_checkpoint.restore(proc); + } + Ok(MaybeClosingTag(None)) + }, + None => Ok(MaybeClosingTag(None)) + } + + } return Ok(MaybeClosingTag(None)); }; @@ -222,8 +247,8 @@ pub fn process_tag( }; let closing_tag_checkpoint = ReadCheckpoint::new(proc); - proc.m(IsSeq(b"'), Discard).require("closing tag end")?; + proc.m(IsChar(b'>'), Discard).require(formated)?; Ok(MaybeClosingTag(Some(tag_name))) } } From c4c83908a26c58dc60333f32253795bfb419294b Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Mon, 3 May 2021 21:10:40 -0300 Subject: [PATCH 03/10] + added support for lazy styles and lazy scripts DD-70 --- src/unit/style.rs | 10 ++++++++-- src/unit/tag.rs | 24 ++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/unit/style.rs b/src/unit/style.rs index 0b1d99ef..9388d6e0 100644 --- a/src/unit/style.rs +++ b/src/unit/style.rs @@ -27,14 +27,20 @@ lazy_static! { lazy_static! { static ref STYLE_END: AhoCorasick = AhoCorasickBuilder::new().ascii_case_insensitive(true).build(&[" ProcessingResult<()> { +pub fn process_style(proc: &mut Processor, cfg: &Cfg, as_script: bool) -> ProcessingResult<()> { #[cfg(feature = "js-esbuild")] let start = WriteCheckpoint::new(proc); proc.require_not_at_end()?; - proc.m(WhileNotSeq(&STYLE_END), Keep); + + if as_script { + proc.m(WhileNotSeq( &SCRIPT_STYLE_END ), Keep); + } else { + proc.m(WhileNotSeq( &STYLE_END ), Keep); + } // `process_tag` will require closing tag. // TODO This is copied from script.rs. diff --git a/src/unit/tag.rs b/src/unit/tag.rs index e2c3c3d1..15494bdd 100644 --- a/src/unit/tag.rs +++ b/src/unit/tag.rs @@ -36,14 +36,22 @@ lazy_static! { s.insert(b"text/livescript"); s.insert(b"text/x-ecmascript"); s.insert(b"text/x-javascript"); + s.insert(b"text/rmscript"); s }; + + pub static ref SCRIPTSTYLES_MIME_TYPES: HashSet<&'static [u8]> = { + let mut s = HashSet::<&'static [u8]>::new(); + s.insert(b"text/rmstyle"); + s + }; } #[derive(Copy, Clone)] enum TagType { ScriptJs, ScriptData, + ScriptStyle, Style, Other, } @@ -161,11 +169,22 @@ pub fn process_tag( let script_tag_type_is_js = value .filter(|v| !JAVASCRIPT_MIME_TYPES.contains(&proc[*v])) .is_none(); + if script_tag_type_is_js { erase_attr = true; } else { // Tag does not contain JS, don't minify JS. - tag_type = TagType::ScriptData; + + let script_tag_type_is_style = value + .filter(|v| !SCRIPTSTYLES_MIME_TYPES.contains(&proc[*v])) + .is_none(); + + if script_tag_type_is_style { + tag_type = TagType::ScriptStyle; + erase_attr = false; + } else { + tag_type = TagType::ScriptData; + } }; } (_, name) => { @@ -237,7 +256,8 @@ pub fn process_tag( match tag_type { TagType::ScriptData => process_script(proc, cfg, false)?, TagType::ScriptJs => process_script(proc, cfg, true)?, - TagType::Style => process_style(proc, cfg)?, + TagType::ScriptStyle => process_style(proc, cfg, true)?, + TagType::Style => process_style(proc, cfg, false)?, _ => closing_tag_omitted = process_content(proc, cfg, child_ns, Some(tag_name), descendant_of_pre)?.closing_tag_omitted, }; From a13ebcaaaaf9cf2b554d517c987cbbc0d2c4c863 Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Mon, 3 May 2021 21:31:36 -0300 Subject: [PATCH 04/10] + minified JSON as well DD-70 --- Cargo.toml | 3 ++- src/proc/mod.rs | 13 +++++++++++-- src/unit/script.rs | 44 +++++++++++++++++++++++++++++--------------- src/unit/style.rs | 4 ++-- 4 files changed, 44 insertions(+), 20 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e8b63fb6..ec073903 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,11 +17,12 @@ maintenance = { status = "actively-developed" } [features] default = [] -js-esbuild = ["crossbeam", "esbuild-rs"] +js-esbuild = ["crossbeam", "esbuild-rs", "minify"] [dependencies] aho-corasick = "0.7" crossbeam = { version = "0.7", optional = true } esbuild-rs = { version = "0.8.30", optional = true } +minify = { version = "1.2", optional = true } lazy_static = "1.4" memchr = "2" diff --git a/src/proc/mod.rs b/src/proc/mod.rs index 5904fe16..6f8d1035 100644 --- a/src/proc/mod.rs +++ b/src/proc/mod.rs @@ -51,10 +51,16 @@ pub enum MatchAction { MatchOnly, } +#[cfg(feature = "js-esbuild")] +pub enum ResultType{ + EsBuildResult(TransformResult), + StringResult(String), +} + #[cfg(feature = "js-esbuild")] pub struct EsbuildSection { pub src: ProcessorRange, - pub result: TransformResult, + pub result: ResultType, } // Processing state of a file. Single use only; create one per processing. @@ -385,7 +391,10 @@ impl<'d> Processor<'d> { // Resulting minified JS/CSS to write. // TODO Verify. // TODO Handle potential `` in output code, which could be in string (e.g. orig. ""), comment, or expression (e.g. orig. `a < /script>/.exec(b)?.length`). - let min_code = result.code.as_str().trim(); + let min_code = match result { + ResultType::EsBuildResult(es_result) => es_result.code.as_str().trim(), + ResultType::StringResult(string_result) => &string_result[..].trim() + }; let min_len = if min_code.len() < src.len() { self.code[write_next..write_next + min_code.len()].copy_from_slice(min_code.as_bytes()); min_code.len() diff --git a/src/unit/script.rs b/src/unit/script.rs index 091da90c..a0fc37a1 100644 --- a/src/unit/script.rs +++ b/src/unit/script.rs @@ -9,7 +9,8 @@ use crate::proc::Processor; use { std::sync::Arc, esbuild_rs::{TransformOptionsBuilder, TransformOptions}, - crate::proc::EsbuildSection, + minify::json::minify, + crate::proc::{EsbuildSection, ResultType}, crate::proc::checkpoint::WriteCheckpoint, }; @@ -38,23 +39,36 @@ pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingRe // TODO This is copied from style.rs. #[cfg(feature = "js-esbuild")] - if js && cfg.minify_js { - let (wg, results) = proc.new_esbuild_section(); + if cfg.minify_js { let src = start.written_range(proc); - unsafe { - esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| { - let mut guard = results.lock().unwrap(); - guard.push(EsbuildSection { - src, - result, + let (wg, results) = proc.new_esbuild_section(); + if js { + unsafe { + esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| { + let mut guard = results.lock().unwrap(); + guard.push(EsbuildSection { + src, + result: ResultType::EsBuildResult(result), + }); + // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish + // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function. + drop(guard); + drop(results); + drop(wg); }); - // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish - // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function. - drop(guard); - drop(results); - drop(wg); + }; + } else { + let raw_json = unsafe { String::from_utf8_unchecked(proc[src].to_vec()) }; + let result = minify(&raw_json[..]); + let mut guard = results.lock().unwrap(); + guard.push(EsbuildSection { + src, + result: ResultType::StringResult(result), }); - }; + drop(guard); + drop(results); + drop(wg); + } }; Ok(()) diff --git a/src/unit/style.rs b/src/unit/style.rs index 9388d6e0..e3720e68 100644 --- a/src/unit/style.rs +++ b/src/unit/style.rs @@ -8,7 +8,7 @@ use crate::proc::Processor; use { std::sync::Arc, esbuild_rs::{Loader, TransformOptionsBuilder, TransformOptions}, - crate::proc::EsbuildSection, + crate::proc::{EsbuildSection, ResultType}, crate::proc::checkpoint::WriteCheckpoint, }; use crate::Cfg; @@ -53,7 +53,7 @@ pub fn process_style(proc: &mut Processor, cfg: &Cfg, as_script: bool) -> Proces let mut guard = results.lock().unwrap(); guard.push(EsbuildSection { src, - result, + result: ResultType::EsBuildResult(result), }); // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function. From 249f36a3d28734d7417c20b16abc9164703dbfcb Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Mon, 3 May 2021 22:59:24 -0300 Subject: [PATCH 05/10] - removed leftover > DD-70 --- src/unit/tag.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/unit/tag.rs b/src/unit/tag.rs index 15494bdd..fde2d667 100644 --- a/src/unit/tag.rs +++ b/src/unit/tag.rs @@ -234,7 +234,9 @@ pub fn process_tag( return match closing_tag { Some(tag) => { proc.make_lowercase(tag); - if proc[tag] != proc[tag_name] { + if proc[tag] == proc[tag_name] { + proc.m(IsSeq(b">"), Discard); + }else{ closing_tag_checkpoint.restore(proc); } Ok(MaybeClosingTag(None)) From 2717e9ef0ed7c7dd22e865b0703476034883ec2c Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Tue, 11 May 2021 17:05:14 -0300 Subject: [PATCH 06/10] + moved json minification to its own module DD-70 --- src/unit/jsonscript.rs | 46 ++++++++++++++++++++++++++++++++++++++++++ src/unit/mod.rs | 1 + src/unit/script.rs | 42 +++++++++++++------------------------- src/unit/tag.rs | 23 +++++++++++++++++++-- 4 files changed, 82 insertions(+), 30 deletions(-) create mode 100644 src/unit/jsonscript.rs diff --git a/src/unit/jsonscript.rs b/src/unit/jsonscript.rs new file mode 100644 index 00000000..81cd10b4 --- /dev/null +++ b/src/unit/jsonscript.rs @@ -0,0 +1,46 @@ +use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; +use lazy_static::lazy_static; +use crate::cfg::Cfg; +use crate::err::ProcessingResult; +use crate::proc::MatchAction::*; +use crate::proc::MatchMode::*; +use crate::proc::Processor; +#[cfg(feature = "js-esbuild")] +use { + std::sync::Arc, + minify::json::minify, + crate::proc::{EsbuildSection, ResultType}, + crate::proc::checkpoint::WriteCheckpoint, +}; + +lazy_static! { + static ref SCRIPT_END: AhoCorasick = AhoCorasickBuilder::new().ascii_case_insensitive(true).build(&[" ProcessingResult<()> { + #[cfg(feature = "js-esbuild")] + let start = WriteCheckpoint::new(proc); + proc.require_not_at_end()?; + proc.m(WhileNotSeq(&SCRIPT_END), Keep); + // `process_tag` will require closing tag. + + // TODO This is copied from style.rs. + #[cfg(feature = "js-esbuild")] + if cfg.minify_js { + let src = start.written_range(proc); + let (wg, results) = proc.new_esbuild_section(); + let raw_json = unsafe { String::from_utf8_unchecked(proc[src].to_vec()) }; + let result = minify(&raw_json[..]); + let mut guard = results.lock().unwrap(); + guard.push(EsbuildSection { + src, + result: ResultType::StringResult(result), + }); + drop(guard); + drop(results); + drop(wg); + }; + + Ok(()) +} diff --git a/src/unit/mod.rs b/src/unit/mod.rs index c45f54cb..40cf9998 100644 --- a/src/unit/mod.rs +++ b/src/unit/mod.rs @@ -3,6 +3,7 @@ pub mod bang; pub mod comment; pub mod content; pub mod instruction; +pub mod jsonscript; pub mod script; pub mod style; pub mod tag; diff --git a/src/unit/script.rs b/src/unit/script.rs index a0fc37a1..d5645199 100644 --- a/src/unit/script.rs +++ b/src/unit/script.rs @@ -9,7 +9,6 @@ use crate::proc::Processor; use { std::sync::Arc, esbuild_rs::{TransformOptionsBuilder, TransformOptions}, - minify::json::minify, crate::proc::{EsbuildSection, ResultType}, crate::proc::checkpoint::WriteCheckpoint, }; @@ -39,36 +38,23 @@ pub fn process_script(proc: &mut Processor, cfg: &Cfg, js: bool) -> ProcessingRe // TODO This is copied from style.rs. #[cfg(feature = "js-esbuild")] - if cfg.minify_js { - let src = start.written_range(proc); + if js && cfg.minify_js { let (wg, results) = proc.new_esbuild_section(); - if js { - unsafe { - esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| { - let mut guard = results.lock().unwrap(); - guard.push(EsbuildSection { - src, - result: ResultType::EsBuildResult(result), - }); - // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish - // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function. - drop(guard); - drop(results); - drop(wg); + let src = start.written_range(proc); + unsafe { + esbuild_rs::transform_direct_unmanaged(&proc[src], &TRANSFORM_OPTIONS.clone(), move |result| { + let mut guard = results.lock().unwrap(); + guard.push(EsbuildSection { + src, + result: ResultType::EsBuildResult(result), }); - }; - } else { - let raw_json = unsafe { String::from_utf8_unchecked(proc[src].to_vec()) }; - let result = minify(&raw_json[..]); - let mut guard = results.lock().unwrap(); - guard.push(EsbuildSection { - src, - result: ResultType::StringResult(result), + // Drop Arc reference and Mutex guard before marking task as complete as it's possible proc::finish + // waiting on WaitGroup will resume before Arc/Mutex is dropped after exiting this function. + drop(guard); + drop(results); + drop(wg); }); - drop(guard); - drop(results); - drop(wg); - } + }; }; Ok(()) diff --git a/src/unit/tag.rs b/src/unit/tag.rs index fde2d667..b195744c 100644 --- a/src/unit/tag.rs +++ b/src/unit/tag.rs @@ -10,6 +10,7 @@ use crate::spec::tag::void::VOID_TAGS; use crate::unit::attr::{AttrType, process_attr, ProcessedAttr}; use crate::unit::content::process_content; use crate::unit::script::process_script; +use crate::unit::jsonscript::process_json; use crate::unit::style::process_style; use crate::gen::attrs::{ATTRS, AttributeMinification}; use crate::spec::tag::ns::Namespace; @@ -39,6 +40,13 @@ lazy_static! { s.insert(b"text/rmscript"); s }; + + pub static ref JSON_MIME_TYPES: HashSet<&'static [u8]> = { + let mut s = HashSet::<&'static [u8]>::new(); + s.insert(b"application/json"); + s.insert(b"application/ld+json"); + s + }; pub static ref SCRIPTSTYLES_MIME_TYPES: HashSet<&'static [u8]> = { let mut s = HashSet::<&'static [u8]>::new(); @@ -50,6 +58,7 @@ lazy_static! { #[derive(Copy, Clone)] enum TagType { ScriptJs, + JsonData, ScriptData, ScriptStyle, Style, @@ -183,7 +192,16 @@ pub fn process_tag( tag_type = TagType::ScriptStyle; erase_attr = false; } else { - tag_type = TagType::ScriptData; + let script_tag_type_is_json = value + .filter(|v| !JSON_MIME_TYPES.contains(&proc[*v])) + .is_none(); + + if script_tag_type_is_json { + tag_type = TagType::JsonData; + erase_attr = false; + }else{ + tag_type = TagType::ScriptData; + } } }; } @@ -256,7 +274,8 @@ pub fn process_tag( let mut closing_tag_omitted = false; match tag_type { - TagType::ScriptData => process_script(proc, cfg, false)?, + TagType::ScriptData => process_json(proc, cfg)?, + TagType::JsonData => process_json(proc, cfg)?, TagType::ScriptJs => process_script(proc, cfg, true)?, TagType::ScriptStyle => process_style(proc, cfg, true)?, TagType::Style => process_style(proc, cfg, false)?, From 60b25959025ae905e0f408e5bc093d21c888b04d Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Tue, 11 May 2021 17:13:17 -0300 Subject: [PATCH 07/10] - removed unneeded type after merge DD-70 --- src/proc/mod.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/proc/mod.rs b/src/proc/mod.rs index ade2723a..88ea1527 100644 --- a/src/proc/mod.rs +++ b/src/proc/mod.rs @@ -50,12 +50,6 @@ pub enum MatchAction { MatchOnly, } -#[cfg(feature = "js-esbuild")] -pub enum ResultType{ - EsBuildResult(TransformResult), - StringResult(String), -} - #[cfg(feature = "js-esbuild")] pub struct EsbuildSection { pub src: ProcessorRange, From cca96b15a6bb8ff1971c0fa9e2bee342173e896e Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Tue, 11 May 2021 19:19:50 -0300 Subject: [PATCH 08/10] + made json script work like the other modules of its type DD-70 --- src/unit/jsonscript.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/unit/jsonscript.rs b/src/unit/jsonscript.rs index 81cd10b4..bdcf87ec 100644 --- a/src/unit/jsonscript.rs +++ b/src/unit/jsonscript.rs @@ -9,7 +9,7 @@ use crate::proc::Processor; use { std::sync::Arc, minify::json::minify, - crate::proc::{EsbuildSection, ResultType}, + crate::proc::EsbuildSection, crate::proc::checkpoint::WriteCheckpoint, }; @@ -35,7 +35,7 @@ pub fn process_json(proc: &mut Processor, cfg: &Cfg) -> ProcessingResult<()> { let mut guard = results.lock().unwrap(); guard.push(EsbuildSection { src, - result: ResultType::StringResult(result), + escaped: result.as_bytes().to_vec(), }); drop(guard); drop(results); From 9c0c5646ed0b00e67688edb0060308a51128600c Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Tue, 11 May 2021 19:20:01 -0300 Subject: [PATCH 09/10] + simplified logic a bit DD-70 --- src/unit/tag.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/unit/tag.rs b/src/unit/tag.rs index b195744c..48f217b8 100644 --- a/src/unit/tag.rs +++ b/src/unit/tag.rs @@ -179,18 +179,15 @@ pub fn process_tag( .filter(|v| !JAVASCRIPT_MIME_TYPES.contains(&proc[*v])) .is_none(); - if script_tag_type_is_js { - erase_attr = true; - } else { + erase_attr = false; + if !script_tag_type_is_js { // Tag does not contain JS, don't minify JS. - let script_tag_type_is_style = value .filter(|v| !SCRIPTSTYLES_MIME_TYPES.contains(&proc[*v])) .is_none(); if script_tag_type_is_style { tag_type = TagType::ScriptStyle; - erase_attr = false; } else { let script_tag_type_is_json = value .filter(|v| !JSON_MIME_TYPES.contains(&proc[*v])) @@ -198,8 +195,8 @@ pub fn process_tag( if script_tag_type_is_json { tag_type = TagType::JsonData; - erase_attr = false; }else{ + erase_attr = true; tag_type = TagType::ScriptData; } } @@ -274,7 +271,7 @@ pub fn process_tag( let mut closing_tag_omitted = false; match tag_type { - TagType::ScriptData => process_json(proc, cfg)?, + TagType::ScriptData => process_script(proc, cfg, false)?, TagType::JsonData => process_json(proc, cfg)?, TagType::ScriptJs => process_script(proc, cfg, true)?, TagType::ScriptStyle => process_style(proc, cfg, true)?, From 5d7c79bd8c8add77b4945691e7db2630550c4e0c Mon Sep 17 00:00:00 2001 From: gbrlmtrz Date: Tue, 11 May 2021 19:20:10 -0300 Subject: [PATCH 10/10] + changed permission DD-70 --- prebuild.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 prebuild.sh diff --git a/prebuild.sh b/prebuild.sh old mode 100644 new mode 100755