diff --git a/CHANGELOG.md b/CHANGELOG.md
index a587bd1..82ddce1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added
+- Proxy now converts HTML responses to Markdown on the fly when clients send `Accept: text/markdown`, compatible with Cloudflare's Markdown for Agents standard; responses include `Content-Type: text/markdown`, `Vary: Accept`, and `X-Markdown-Tokens` headers; SSE, WebSocket, and responses over 2 MB pass through unchanged
- MCP (Model Context Protocol) server with 210 tools across 30 domain modules (`mcp/`)
- OpenAPI SDK auto-generated via `@hey-api/openapi-ts` for MCP server
- WebSocket support for container runtime logs in MCP server
diff --git a/Cargo.lock b/Cargo.lock
index 3cc4294..c20459b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2476,6 +2476,29 @@ dependencies = [
"typenum",
]
+[[package]]
+name = "cssparser"
+version = "0.36.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dae61cf9c0abb83bd659dab65b7e4e38d8236824c85f0f804f173567bda257d2"
+dependencies = [
+ "cssparser-macros",
+ "dtoa-short",
+ "itoa",
+ "phf 0.13.1",
+ "smallvec",
+]
+
+[[package]]
+name = "cssparser-macros"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
+dependencies = [
+ "quote",
+ "syn 2.0.108",
+]
+
[[package]]
name = "ctr"
version = "0.9.2"
@@ -2991,6 +3014,21 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
+[[package]]
+name = "dtoa"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590"
+
+[[package]]
+name = "dtoa-short"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
+dependencies = [
+ "dtoa",
+]
+
[[package]]
name = "dunce"
version = "1.0.5"
@@ -3054,6 +3092,12 @@ dependencies = [
"zeroize",
]
+[[package]]
+name = "ego-tree"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
+
[[package]]
name = "either"
version = "1.15.0"
@@ -3596,6 +3640,16 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
+[[package]]
+name = "futf"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
+dependencies = [
+ "mac",
+ "new_debug_unreachable",
+]
+
[[package]]
name = "futures"
version = "0.3.31"
@@ -3735,6 +3789,15 @@ dependencies = [
"libc",
]
+[[package]]
+name = "getopts"
+version = "0.2.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
+dependencies = [
+ "unicode-width 0.2.2",
+]
+
[[package]]
name = "getrandom"
version = "0.2.16"
@@ -4186,6 +4249,38 @@ dependencies = [
"windows-link 0.1.3",
]
+[[package]]
+name = "htmd"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60ae59466542f2346e43d4a5e9b4432a1fc915b279c9fc0484e9ed7379121454"
+dependencies = [
+ "html5ever 0.35.0",
+ "markup5ever_rcdom",
+ "phf 0.13.1",
+]
+
+[[package]]
+name = "html5ever"
+version = "0.35.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55d958c2f74b664487a2035fe1dadb032c48718a03b63f3ab0b8537db8549ed4"
+dependencies = [
+ "log",
+ "markup5ever 0.35.0",
+ "match_token",
+]
+
+[[package]]
+name = "html5ever"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6452c4751a24e1b99c3260d505eaeee76a050573e61f30ac2c924ddc7236f01e"
+dependencies = [
+ "log",
+ "markup5ever 0.36.1",
+]
+
[[package]]
name = "http"
version = "0.2.12"
@@ -5355,6 +5450,12 @@ dependencies = [
"pkg-config",
]
+[[package]]
+name = "mac"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
+
[[package]]
name = "macro_magic"
version = "0.5.1"
@@ -5435,12 +5536,57 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
+[[package]]
+name = "markup5ever"
+version = "0.35.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "311fe69c934650f8f19652b3946075f0fc41ad8757dbb68f1ca14e7900ecc1c3"
+dependencies = [
+ "log",
+ "tendril",
+ "web_atoms 0.1.3",
+]
+
+[[package]]
+name = "markup5ever"
+version = "0.36.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c3294c4d74d0742910f8c7b466f44dda9eb2d5742c1e430138df290a1e8451c"
+dependencies = [
+ "log",
+ "tendril",
+ "web_atoms 0.2.3",
+]
+
+[[package]]
+name = "markup5ever_rcdom"
+version = "0.35.0+unofficial"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8bcd53df4748257345b8bc156d620340ce0f015ec1c7ef1cff475543888a31d"
+dependencies = [
+ "html5ever 0.35.0",
+ "markup5ever 0.35.0",
+ "tendril",
+ "xml5ever",
+]
+
[[package]]
name = "match_cfg"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4"
+[[package]]
+name = "match_token"
+version = "0.35.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac84fd3f360fcc43dc5f5d186f02a94192761a080e8bc58621ad4d12296a58cf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.108",
+]
+
[[package]]
name = "matchers"
version = "0.2.0"
@@ -6717,6 +6863,15 @@ dependencies = [
"serde",
]
+[[package]]
+name = "phf"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
+dependencies = [
+ "phf_shared 0.11.3",
+]
+
[[package]]
name = "phf"
version = "0.12.1"
@@ -6732,20 +6887,51 @@ version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
dependencies = [
+ "phf_macros",
"phf_shared 0.13.1",
"serde",
]
+[[package]]
+name = "phf_codegen"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
+dependencies = [
+ "phf_generator 0.11.3",
+ "phf_shared 0.11.3",
+]
+
[[package]]
name = "phf_codegen"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efbdcb6f01d193b17f0b9c3360fa7e0e620991b193ff08702f78b3ce365d7e61"
dependencies = [
- "phf_generator",
+ "phf_generator 0.12.1",
"phf_shared 0.12.1",
]
+[[package]]
+name = "phf_codegen"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1"
+dependencies = [
+ "phf_generator 0.13.1",
+ "phf_shared 0.13.1",
+]
+
+[[package]]
+name = "phf_generator"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
+dependencies = [
+ "phf_shared 0.11.3",
+ "rand 0.8.5",
+]
+
[[package]]
name = "phf_generator"
version = "0.12.1"
@@ -6756,6 +6942,38 @@ dependencies = [
"phf_shared 0.12.1",
]
+[[package]]
+name = "phf_generator"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
+dependencies = [
+ "fastrand",
+ "phf_shared 0.13.1",
+]
+
+[[package]]
+name = "phf_macros"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
+dependencies = [
+ "phf_generator 0.13.1",
+ "phf_shared 0.13.1",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.108",
+]
+
+[[package]]
+name = "phf_shared"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
+dependencies = [
+ "siphasher",
+]
+
[[package]]
name = "phf_shared"
version = "0.12.1"
@@ -7236,6 +7454,12 @@ dependencies = [
"zerocopy",
]
+[[package]]
+name = "precomputed-hash"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
+
[[package]]
name = "predicates"
version = "3.1.3"
@@ -7971,7 +8195,7 @@ dependencies = [
"pest",
"pest_derive",
"phf 0.12.1",
- "phf_codegen",
+ "phf_codegen 0.12.1",
"proc-macro2",
"quote",
"relay-protocol",
@@ -8750,6 +8974,21 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+[[package]]
+name = "scraper"
+version = "0.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93cecd86d6259499c844440546d02f55f3e17bd286e529e48d1f9f67e92315cb"
+dependencies = [
+ "cssparser",
+ "ego-tree",
+ "getopts",
+ "html5ever 0.36.1",
+ "precomputed-hash",
+ "selectors",
+ "tendril",
+]
+
[[package]]
name = "scroll"
version = "0.12.0"
@@ -9040,6 +9279,25 @@ dependencies = [
"libc",
]
+[[package]]
+name = "selectors"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "feef350c36147532e1b79ea5c1f3791373e61cbd9a6a2615413b3807bb164fb7"
+dependencies = [
+ "bitflags 2.10.0",
+ "cssparser",
+ "derive_more 2.0.1",
+ "log",
+ "new_debug_unreachable",
+ "phf 0.13.1",
+ "phf_codegen 0.13.1",
+ "precomputed-hash",
+ "rustc-hash 2.1.1",
+ "servo_arc",
+ "smallvec",
+]
+
[[package]]
name = "semver"
version = "1.0.27"
@@ -9276,6 +9534,15 @@ dependencies = [
"syn 2.0.108",
]
+[[package]]
+name = "servo_arc"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "170fb83ab34de17dc69aa7c67482b22218ddb85da56546f9bd6b929e32a05930"
+dependencies = [
+ "stable_deref_trait",
+]
+
[[package]]
name = "sfv"
version = "0.10.4"
@@ -9834,6 +10101,55 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+[[package]]
+name = "string_cache"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
+dependencies = [
+ "new_debug_unreachable",
+ "parking_lot",
+ "phf_shared 0.11.3",
+ "precomputed-hash",
+ "serde",
+]
+
+[[package]]
+name = "string_cache"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901"
+dependencies = [
+ "new_debug_unreachable",
+ "parking_lot",
+ "phf_shared 0.13.1",
+ "precomputed-hash",
+]
+
+[[package]]
+name = "string_cache_codegen"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
+dependencies = [
+ "phf_generator 0.11.3",
+ "phf_shared 0.11.3",
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "string_cache_codegen"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69"
+dependencies = [
+ "phf_generator 0.13.1",
+ "phf_shared 0.13.1",
+ "proc-macro2",
+ "quote",
+]
+
[[package]]
name = "stringprep"
version = "0.1.5"
@@ -11222,6 +11538,7 @@ dependencies = [
"cookie 0.18.1",
"flate2",
"hex",
+ "htmd",
"http-body-util",
"hyper 1.7.0",
"hyper-util",
@@ -11241,6 +11558,7 @@ dependencies = [
"regex",
"rustls 0.23.34",
"rustls-pemfile",
+ "scraper",
"sea-orm",
"sea-orm-migration",
"serde",
@@ -11523,6 +11841,17 @@ dependencies = [
"uuid",
]
+[[package]]
+name = "tendril"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
+dependencies = [
+ "futf",
+ "mac",
+ "utf-8",
+]
+
[[package]]
name = "termcolor"
version = "1.4.1"
@@ -12849,6 +13178,30 @@ dependencies = [
"wasm-bindgen",
]
+[[package]]
+name = "web_atoms"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57ffde1dc01240bdf9992e3205668b235e59421fd085e8a317ed98da0178d414"
+dependencies = [
+ "phf 0.11.3",
+ "phf_codegen 0.11.3",
+ "string_cache 0.8.9",
+ "string_cache_codegen 0.5.4",
+]
+
+[[package]]
+name = "web_atoms"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57a9779e9f04d2ac1ce317aee707aa2f6b773afba7b931222bff6983843b1576"
+dependencies = [
+ "phf 0.13.1",
+ "phf_codegen 0.13.1",
+ "string_cache 0.9.0",
+ "string_cache_codegen 0.6.1",
+]
+
[[package]]
name = "webdriver"
version = "0.50.0"
@@ -13522,6 +13875,16 @@ dependencies = [
"rustix",
]
+[[package]]
+name = "xml5ever"
+version = "0.35.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee3f1e41afb31a75aef076563b0ad3ecc24f5bd9d12a72b132222664eb76b494"
+dependencies = [
+ "log",
+ "markup5ever 0.35.0",
+]
+
[[package]]
name = "xmlparser"
version = "0.13.6"
diff --git a/crates/temps-proxy/Cargo.toml b/crates/temps-proxy/Cargo.toml
index 38d8149..168c070 100644
--- a/crates/temps-proxy/Cargo.toml
+++ b/crates/temps-proxy/Cargo.toml
@@ -29,6 +29,8 @@ sea-orm-migration = { workspace = true }
flate2 = { workspace = true }
tracing = { workspace = true }
memchr = "2.7"
+htmd = "0.5"
+scraper = "0.25"
pingora = { version = "0.7.0", features = ["lb"] }
pingora-core = { version = "0.7.0", features = ["openssl", "connection_filter"] }
pingora-http = "0.7.0"
diff --git a/crates/temps-proxy/src/proxy.rs b/crates/temps-proxy/src/proxy.rs
index 796c2d5..f8c4935 100644
--- a/crates/temps-proxy/src/proxy.rs
+++ b/crates/temps-proxy/src/proxy.rs
@@ -29,6 +29,264 @@ use uuid::Uuid;
// Constants
pub const VISITOR_ID_COOKIE: &str = "_temps_visitor_id";
+
+/// Maximum HTML body size (in bytes) eligible for Markdown conversion.
+/// Mirrors Cloudflare's "Markdown for Agents" 2 MB limit.
+const MAX_MARKDOWN_BODY_BYTES: usize = 2 * 1024 * 1024;
+
+/// Estimate the number of tokens in a Markdown document using a simple
+/// word-count heuristic (tokens ≈ words × 1.33, i.e. words / 0.75).
+/// This matches the rough estimate used by the Cloudflare `x-markdown-tokens` header.
+fn estimate_markdown_tokens(markdown: &str) -> usize {
+ let word_count = markdown.split_whitespace().count();
+ // 1 token ≈ 0.75 words → tokens ≈ words / 0.75 ≈ words * 4 / 3
+ word_count * 4 / 3
+}
+
+/// Metadata extracted from a page's `
` for the YAML front-matter block.
+struct PageMeta {
+ title: Option,
+ description: Option,
+ image: Option,
+}
+
+impl PageMeta {
+ /// Return a YAML front-matter block, or `None` if no metadata was found.
+ fn to_frontmatter(&self) -> Option {
+ if self.title.is_none() && self.description.is_none() && self.image.is_none() {
+ return None;
+ }
+ let mut fm = String::from("---\n");
+ if let Some(t) = &self.title {
+ fm.push_str(&format!("title: {}\n", t));
+ }
+ if let Some(d) = &self.description {
+ fm.push_str(&format!("description: {}\n", d));
+ }
+ if let Some(i) = &self.image {
+ fm.push_str(&format!("image: {}\n", i));
+ }
+ fm.push_str("---\n\n");
+ Some(fm)
+ }
+}
+
+/// Parse YAML front-matter metadata from `` meta tags.
+///
+/// Priority for `title`:
+/// 1. `` — the short title without site-name suffix.
+/// 2. `` — fallback, used when og:title is absent.
+///
+/// Priority for `description`:
+/// 1. `` — canonical description.
+/// 2. `` — fallback.
+///
+/// Priority for `image`:
+/// 1. `` (Cloudflare convention).
+/// 2. ``.
+fn extract_page_meta(document: &scraper::Html) -> PageMeta {
+ use scraper::Selector;
+
+ // Helper: return the `content` attribute of the first element matching `sel`.
+ let first_content = |sel: &str| -> Option {
+ Selector::parse(sel).ok().and_then(|s| {
+ document
+ .select(&s)
+ .next()
+ .and_then(|el| el.attr("content"))
+ .map(|v| v.to_owned())
+ })
+ };
+
+ // Title: prefer og:title (short), fall back to text content.
+ let title = first_content(r#"meta[property="og:title"]"#).or_else(|| {
+ Selector::parse("title").ok().and_then(|s| {
+ document
+ .select(&s)
+ .next()
+ .map(|el| el.text().collect::())
+ .filter(|t| !t.is_empty())
+ })
+ });
+
+ let description = first_content(r#"meta[name="description"]"#)
+ .or_else(|| first_content(r#"meta[property="og:description"]"#));
+
+ let image = first_content(r#"meta[property="image"]"#)
+ .or_else(|| first_content(r#"meta[property="og:image"]"#));
+
+ PageMeta {
+ title,
+ description,
+ image,
+ }
+}
+
+/// Extract the inner HTML of the content node to convert to Markdown.
+///
+/// Strategy (matches Cloudflare's Markdown for Agents behaviour):
+/// 1. First `` element found at shallowest depth (document order).
+/// 2. Fall back to `` if no `` is present.
+/// 3. Fall back to the full document string if neither is found (e.g. plain
+/// HTML fragments without a body element).
+///
+/// `
+
+ Clean content
+
+ "#;
+ let extracted = extract(html);
+ assert!(
+ extracted.contains("Clean content"),
+ "Expected content in: {}",
+ extracted
+ );
+ assert!(
+ !extracted.contains("window.foo"),
+ "Expected inline script stripped, got: {}",
+ extracted
+ );
+ assert!(
+ !extracted.contains("schema.org"),
+ "Expected JSON-LD stripped, got: {}",
+ extracted
+ );
+ }
+
+ #[test]
+ fn test_extract_style_inside_main_stripped() {
+ let html = r#"
+
+
+ Article text
+
+ "#;
+ let extracted = extract(html);
+ assert!(
+ extracted.contains("Article text"),
+ "Expected content in: {}",
+ extracted
+ );
+ assert!(
+ !extracted.contains("color: red"),
+ "Expected style stripped, got: {}",
+ extracted
+ );
+ }
+
+ #[test]
+ fn test_extract_script_outside_main_not_in_output() {
+ let html = r#"
+
+ Clean content
+ "#;
+ let extracted = extract(html);
+ assert!(!extracted.contains("window.bar"));
+ assert!(!extracted.contains("color: red"));
+ }
+
+ #[test]
+ fn test_extract_fallback_to_original_when_no_body() {
+ let fragment = "Just a heading
";
+ let extracted = extract(fragment);
+ assert!(
+ extracted.contains("Just a heading"),
+ "Expected heading in: {}",
+ extracted
+ );
+ }
+
+ // ── extract_page_meta / frontmatter ──────────────────────────────────────
+
+ #[test]
+ fn test_frontmatter_from_og_title_and_description() {
+ let html = r#"
+ My Page · Site Name
+
+
+ Content
"#;
+ let doc = scraper::Html::parse_document(html);
+ let meta = extract_page_meta(&doc);
+ // og:title preferred over
+ assert_eq!(meta.title.as_deref(), Some("My Page"));
+ assert_eq!(
+ meta.description.as_deref(),
+ Some("A great page about things.")
+ );
+ assert!(meta.image.is_none());
+
+ let fm = meta.to_frontmatter().unwrap();
+ assert!(fm.starts_with("---\n"), "Expected YAML fence: {}", fm);
+ assert!(fm.contains("title: My Page"), "got: {}", fm);
+ assert!(
+ fm.contains("description: A great page about things."),
+ "got: {}",
+ fm
+ );
+ assert!(fm.ends_with("---\n\n"), "Expected closing fence: {}", fm);
+ }
+
+ #[test]
+ fn test_frontmatter_falls_back_to_title_tag() {
+ let html = r#"Fallback Title
+ x
"#;
+ let doc = scraper::Html::parse_document(html);
+ let meta = extract_page_meta(&doc);
+ assert_eq!(meta.title.as_deref(), Some("Fallback Title"));
+ }
+
+ #[test]
+ fn test_frontmatter_image_from_og_image() {
+ let html = r#"
+
+ x
"#;
+ let doc = scraper::Html::parse_document(html);
+ let meta = extract_page_meta(&doc);
+ assert_eq!(meta.image.as_deref(), Some("https://example.com/img.png"));
+ }
+
+ #[test]
+ fn test_frontmatter_image_prefers_property_image_over_og_image() {
+ let html = r#"
+
+
+ x
"#;
+ let doc = scraper::Html::parse_document(html);
+ let meta = extract_page_meta(&doc);
+ assert_eq!(
+ meta.image.as_deref(),
+ Some("https://example.com/preview.png")
+ );
+ }
+
+ #[test]
+ fn test_frontmatter_none_when_no_meta() {
+ let html = r#"x
"#;
+ let doc = scraper::Html::parse_document(html);
+ let meta = extract_page_meta(&doc);
+ assert!(meta.to_frontmatter().is_none());
+ }
+
+ #[test]
+ fn test_body_filter_converts_html_to_markdown_with_frontmatter() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+
+ // Full page with meta + main + noise — frontmatter should be prepended,
+ // nav/footer stripped, script inside main stripped.
+ let html = br#"
+
+
+
+
+
+
+ Hello
World
+
+
+ "#;
+ let result = run_body_filter_single_chunk(&mut ctx, html);
+
+ let md = String::from_utf8(result.unwrap().to_vec()).unwrap();
+ // Frontmatter present
+ assert!(md.starts_with("---\n"), "Expected frontmatter: {}", md);
+ assert!(md.contains("title: Hello Page"), "got: {}", md);
+ assert!(md.contains("description: A test page."), "got: {}", md);
+ // Article content present
+ assert!(md.contains("Hello"), "got: {}", md);
+ assert!(md.contains("World"), "got: {}", md);
+ // Noise absent
+ assert!(!md.contains("Nav"), "got: {}", md);
+ assert!(!md.contains("Footer"), "got: {}", md);
+ assert!(!md.contains("window.noise"), "got: {}", md);
+ }
+
+ #[test]
+ fn test_body_filter_passthrough_when_wants_markdown_false() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = false;
+
+ let html = b"Hello
";
+ let result = run_body_filter_single_chunk(&mut ctx, html);
+
+ // Should return unchanged bytes
+ assert!(result.is_some());
+ assert_eq!(result.unwrap().as_ref(), html);
+ }
+
+ #[test]
+ fn test_body_filter_size_guard_disables_conversion() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+
+ // Create a body slightly larger than 2 MB
+ let oversized = vec![b'x'; MAX_MARKDOWN_BODY_BYTES + 1];
+ let result = run_body_filter_single_chunk(&mut ctx, &oversized);
+
+ // Should fall back to passthrough — returns original bytes, conversion disabled
+ assert!(
+ !ctx.wants_markdown,
+ "wants_markdown should be reset to false"
+ );
+ assert!(result.is_some());
+ assert_eq!(result.unwrap().len(), oversized.len());
+ }
+
+ #[test]
+ fn test_body_filter_multi_chunk_accumulation() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+
+ // Simulate two chunks arriving before end_of_stream (split mid-tag)
+ let chunk1 = Bytes::from_static(b"Greet");
+ let chunk2 = Bytes::from_static(b"ings
");
+
+ // First chunk — not end of stream
+ {
+ let mut body: Option = Some(chunk1);
+ if ctx.wants_markdown {
+ if let Some(c) = body.take() {
+ ctx.markdown_buffer.extend_from_slice(&c);
+ }
+ // end_of_stream = false → return None (suppress)
+ }
+ }
+
+ // Second chunk — end of stream
+ {
+ let mut body: Option = Some(chunk2);
+ let end_of_stream = true;
+ if ctx.wants_markdown {
+ if let Some(c) = body.take() {
+ ctx.markdown_buffer.extend_from_slice(&c);
+ }
+ if end_of_stream {
+ let html_str = String::from_utf8_lossy(&ctx.markdown_buffer);
+ let document = scraper::Html::parse_document(&html_str);
+ let content = extract_content_html(&document);
+ let markdown = htmd::convert(&content).unwrap_or_default();
+ ctx.markdown_buffer = Vec::new();
+ body = Some(Bytes::from(markdown));
+ }
+ }
+
+ let result = body;
+ assert!(result.is_some());
+ let md = String::from_utf8(result.unwrap().to_vec()).unwrap();
+ assert!(md.contains("Greetings"), "Expected 'Greetings' in: {}", md);
+ }
+ }
+
+ // ── SSE passthrough (critical safety test) ────────────────────────────────
+
+ #[test]
+ fn test_sse_passthrough_unaffected() {
+ // Even if wants_markdown was somehow set, SSE responses must never be buffered.
+ // The upstream_response_filter resets wants_markdown for SSE, but we also
+ // guard in response_body_filter. Verify the guard works.
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true; // pretend the guard in upstream_response_filter was skipped
+ ctx.is_sse = true;
+
+ let sse_chunk = Bytes::from_static(b"data: hello\n\n");
+
+ // Replicate the response_body_filter guard for SSE
+ if ctx.is_sse || ctx.is_websocket {
+ // pass through immediately — no buffering, no conversion
+ } else if ctx.wants_markdown {
+ panic!("Should not reach markdown conversion branch for SSE");
+ }
+
+ // body should be unchanged (the SSE branch never touches it)
+ assert_eq!(sse_chunk.as_ref(), b"data: hello\n\n");
+ }
+}
+
+// ── Pipeline integration tests ────────────────────────────────────────────────
+//
+// These tests exercise the full gate → header-rewrite → body-filter pipeline
+// without needing a live Pingora session. They construct `ResponseHeader` and
+// `ProxyContext` directly and call the extracted free functions
+// (`apply_markdown_upstream_gate`, `apply_markdown_response_headers`) plus the
+// body-filter logic that `run_body_filter_single_chunk` (in markdown_tests)
+// already covers, so here we focus on the header and gate behaviour and on
+// every edge-case the body filter must handle gracefully.
+#[cfg(test)]
+mod markdown_pipeline_tests {
+ use super::*;
+ use bytes::Bytes;
+ use std::time::Instant;
+
+ // ── Helpers ──────────────────────────────────────────────────────────────
+
+ fn make_ctx() -> ProxyContext {
+ ProxyContext {
+ response_modified: false,
+ response_compressed: false,
+ upstream_response_headers: None,
+ content_type: None,
+ buffer: vec![],
+ project: None,
+ environment: None,
+ deployment: None,
+ request_id: "test-req".to_string(),
+ start_time: Instant::now(),
+ method: "GET".to_string(),
+ path: "/".to_string(),
+ query_string: None,
+ host: "example.com".to_string(),
+ user_agent: "TestAgent/1.0".to_string(),
+ referrer: None,
+ ip_address: Some("127.0.0.1".to_string()),
+ visitor_id: None,
+ visitor_id_i32: None,
+ session_id: None,
+ session_id_i32: None,
+ is_new_session: false,
+ request_headers: None,
+ response_headers: None,
+ request_visitor_cookie: None,
+ request_session_cookie: None,
+ is_sse: false,
+ is_websocket: false,
+ skip_tracking: false,
+ routing_status: "pending".to_string(),
+ error_message: None,
+ upstream_host: None,
+ container_id: None,
+ tls_fingerprint: None,
+ tls_version: None,
+ tls_cipher: None,
+ sni_hostname: None,
+ upstream_body_bytes_received: 0,
+ wants_markdown: false,
+ markdown_buffer: Vec::new(),
+ }
+ }
+
+ /// Build a `ResponseHeader` with an explicit status and optional `Content-Type`.
+ fn make_response(status: u16, content_type: Option<&str>) -> ResponseHeader {
+ let mut resp = ResponseHeader::build(status, None).unwrap();
+ if let Some(ct) = content_type {
+ resp.insert_header("Content-Type", ct).unwrap();
+ }
+ resp
+ }
+
+ /// Simulate the full pipeline for a single-chunk body.
+ /// Returns (final_ctx, outbound_response_header, body_bytes).
+ fn run_pipeline(
+ mut ctx: ProxyContext,
+ mut resp: ResponseHeader,
+ body: &[u8],
+ ) -> (ProxyContext, ResponseHeader, Option) {
+ // Phase 1: upstream_response_filter — gate
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+
+ // Phase 2: response_filter — header rewrite
+ apply_markdown_response_headers(&mut resp, &ctx);
+
+ // Phase 3: response_body_filter — buffer + convert (single-chunk, end_of_stream=true)
+ let body_out = if ctx.is_sse || ctx.is_websocket {
+ Some(Bytes::copy_from_slice(body))
+ } else if ctx.wants_markdown {
+ let chunk = Bytes::copy_from_slice(body);
+ if ctx.markdown_buffer.len() + chunk.len() > MAX_MARKDOWN_BODY_BYTES {
+ ctx.wants_markdown = false;
+ let mut flushed = std::mem::take(&mut ctx.markdown_buffer);
+ flushed.extend_from_slice(&chunk);
+ Some(Bytes::from(flushed))
+ } else {
+ ctx.markdown_buffer.extend_from_slice(&chunk);
+ let html = String::from_utf8_lossy(&ctx.markdown_buffer);
+ let document = scraper::Html::parse_document(&html);
+ let meta = extract_page_meta(&document);
+ let content = extract_content_html(&document);
+ let markdown = htmd::convert(&content).unwrap_or_default();
+ ctx.markdown_buffer = Vec::new();
+ let final_md = match meta.to_frontmatter() {
+ Some(fm) => fm + &markdown,
+ None => markdown,
+ };
+ Some(Bytes::from(final_md))
+ }
+ } else {
+ Some(Bytes::copy_from_slice(body))
+ };
+
+ (ctx, resp, body_out)
+ }
+
+ // ── Gate tests ────────────────────────────────────────────────────────────
+
+ #[test]
+ fn gate_allows_200_text_html() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(200, Some("text/html; charset=utf-8"));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(ctx.wants_markdown, "200 text/html should be allowed");
+ assert_eq!(
+ resp.headers.get("vary").and_then(|v| v.to_str().ok()),
+ Some("Accept"),
+ "Vary: Accept must be set"
+ );
+ }
+
+ #[test]
+ fn gate_cancels_non_html_content_type() {
+ for ct in &[
+ "application/json",
+ "text/plain",
+ "image/png",
+ "application/octet-stream",
+ ] {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(200, Some(ct));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(
+ !ctx.wants_markdown,
+ "wants_markdown must be false for Content-Type: {}",
+ ct
+ );
+ }
+ }
+
+ #[test]
+ fn gate_cancels_missing_content_type() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(200, None);
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(
+ !ctx.wants_markdown,
+ "missing Content-Type must cancel conversion"
+ );
+ }
+
+ #[test]
+ fn gate_cancels_4xx_even_with_html() {
+ for status in &[400u16, 401, 403, 404, 422, 429] {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(*status, Some("text/html; charset=utf-8"));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(
+ !ctx.wants_markdown,
+ "wants_markdown must be false for status {}",
+ status
+ );
+ }
+ }
+
+ #[test]
+ fn gate_cancels_5xx_even_with_html() {
+ for status in &[500u16, 502, 503, 504] {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(*status, Some("text/html; charset=utf-8"));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(
+ !ctx.wants_markdown,
+ "wants_markdown must be false for status {}",
+ status
+ );
+ }
+ }
+
+ #[test]
+ fn gate_cancels_3xx_redirect() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(302, Some("text/html"));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(!ctx.wants_markdown, "302 redirect should cancel conversion");
+ }
+
+ #[test]
+ fn gate_handles_uppercase_content_type() {
+ // Some upstreams send "TEXT/HTML" — must still be recognised.
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(200, Some("TEXT/HTML; CHARSET=UTF-8"));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(ctx.wants_markdown, "uppercase TEXT/HTML must be allowed");
+ }
+
+ #[test]
+ fn gate_cancels_sse_even_with_html() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ ctx.is_sse = true;
+ let mut resp = make_response(200, Some("text/html"));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(!ctx.wants_markdown, "SSE must cancel conversion");
+ }
+
+ #[test]
+ fn gate_cancels_websocket_even_with_html() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ ctx.is_websocket = true;
+ let mut resp = make_response(200, Some("text/html"));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(!ctx.wants_markdown, "WebSocket must cancel conversion");
+ }
+
+ #[test]
+ fn gate_noop_when_wants_markdown_false() {
+ // If wants_markdown is already false the gate must not touch the response.
+ let mut ctx = make_ctx(); // wants_markdown = false
+ let mut resp = make_response(200, Some("text/html"));
+ apply_markdown_upstream_gate(&mut resp, &mut ctx);
+ assert!(!ctx.wants_markdown);
+ assert!(
+ resp.headers.get("vary").is_none(),
+ "Vary must NOT be added when wants_markdown is false"
+ );
+ }
+
+ // ── Header-rewrite tests ──────────────────────────────────────────────────
+
+ #[test]
+ fn header_rewrite_sets_markdown_content_type() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(200, Some("text/html; charset=utf-8"));
+ // Simulate Content-Length being set by upstream
+ resp.insert_header("Content-Length", "1234").unwrap();
+ resp.insert_header("Content-Encoding", "gzip").unwrap();
+ apply_markdown_response_headers(&mut resp, &ctx);
+ assert_eq!(
+ resp.headers
+ .get("content-type")
+ .and_then(|v| v.to_str().ok()),
+ Some("text/markdown; charset=utf-8")
+ );
+ assert!(
+ resp.headers.get("content-length").is_none(),
+ "Content-Length must be removed"
+ );
+ assert!(
+ resp.headers.get("content-encoding").is_none(),
+ "Content-Encoding must be removed"
+ );
+ assert_eq!(
+ resp.headers
+ .get("x-markdown-tokens")
+ .and_then(|v| v.to_str().ok()),
+ Some("0"),
+ "X-Markdown-Tokens placeholder must be present"
+ );
+ }
+
+ #[test]
+ fn header_rewrite_noop_when_wants_markdown_false() {
+ let ctx = make_ctx(); // wants_markdown = false
+ let mut resp = make_response(200, Some("text/html"));
+ apply_markdown_response_headers(&mut resp, &ctx);
+ assert_eq!(
+ resp.headers
+ .get("content-type")
+ .and_then(|v| v.to_str().ok()),
+ Some("text/html"),
+ "Content-Type must be unchanged when wants_markdown is false"
+ );
+ assert!(resp.headers.get("x-markdown-tokens").is_none());
+ }
+
+ // ── Full pipeline tests ───────────────────────────────────────────────────
+
+ #[test]
+ fn pipeline_converts_html_to_markdown() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let resp = make_response(200, Some("text/html; charset=utf-8"));
+ let html =
+ b"Hello World
A paragraph.
";
+
+ let (_ctx, out_resp, body) = run_pipeline(ctx, resp, html);
+
+ // Headers
+ assert_eq!(
+ out_resp
+ .headers
+ .get("content-type")
+ .and_then(|v| v.to_str().ok()),
+ Some("text/markdown; charset=utf-8")
+ );
+ assert!(out_resp.headers.get("x-markdown-tokens").is_some());
+
+ // Body
+ let md = String::from_utf8(body.unwrap().to_vec()).unwrap();
+ assert!(
+ md.contains("Hello World"),
+ "heading must appear in output: {}",
+ md
+ );
+ assert!(
+ md.contains("A paragraph"),
+ "paragraph must appear in output: {}",
+ md
+ );
+ }
+
+ #[test]
+ fn pipeline_passthrough_on_non_html_content_type() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let resp = make_response(200, Some("application/json"));
+ let json = br#"{"key":"value"}"#;
+
+ let (final_ctx, out_resp, body) = run_pipeline(ctx, resp, json);
+
+ assert!(
+ !final_ctx.wants_markdown,
+ "gate must have cancelled conversion"
+ );
+ assert_eq!(
+ out_resp
+ .headers
+ .get("content-type")
+ .and_then(|v| v.to_str().ok()),
+ Some("application/json"),
+ "Content-Type must be unchanged"
+ );
+ assert!(out_resp.headers.get("x-markdown-tokens").is_none());
+ assert_eq!(body.unwrap().as_ref(), json);
+ }
+
+ #[test]
+ fn pipeline_passthrough_on_missing_content_type() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let resp = make_response(200, None);
+ let payload = b"some raw bytes";
+
+ let (final_ctx, out_resp, body) = run_pipeline(ctx, resp, payload);
+
+ assert!(!final_ctx.wants_markdown);
+ assert!(out_resp.headers.get("content-type").is_none());
+ assert!(out_resp.headers.get("x-markdown-tokens").is_none());
+ assert_eq!(body.unwrap().as_ref(), payload);
+ }
+
+ #[test]
+ fn pipeline_passthrough_on_404() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let html = b"Not Found
";
+ let resp = make_response(404, Some("text/html; charset=utf-8"));
+
+ let (final_ctx, out_resp, body) = run_pipeline(ctx, resp, html);
+
+ assert!(!final_ctx.wants_markdown, "404 must cancel conversion");
+ assert_eq!(
+ out_resp
+ .headers
+ .get("content-type")
+ .and_then(|v| v.to_str().ok()),
+ Some("text/html; charset=utf-8"),
+ "Content-Type must be unchanged for 404"
+ );
+ // Body must be the original HTML, not markdown
+ assert_eq!(body.unwrap().as_ref(), html);
+ }
+
+ #[test]
+ fn pipeline_passthrough_on_500() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let html = b"Internal Error
";
+ let resp = make_response(500, Some("text/html"));
+
+ let (final_ctx, _out_resp, body) = run_pipeline(ctx, resp, html);
+
+ assert!(!final_ctx.wants_markdown);
+ assert_eq!(body.unwrap().as_ref(), html);
+ }
+
+ #[test]
+ fn pipeline_passthrough_on_302_redirect() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let mut resp = make_response(302, Some("text/html"));
+ resp.insert_header("Location", "https://example.com/new")
+ .unwrap();
+
+ let (final_ctx, out_resp, body) = run_pipeline(ctx, resp, b"");
+
+ assert!(!final_ctx.wants_markdown);
+ assert_eq!(
+ out_resp
+ .headers
+ .get("content-type")
+ .and_then(|v| v.to_str().ok()),
+ Some("text/html")
+ );
+ assert!(out_resp.headers.get("x-markdown-tokens").is_none());
+ assert_eq!(body.unwrap().as_ref(), b"");
+ }
+
+ #[test]
+ fn pipeline_passthrough_when_not_requesting_markdown() {
+ // Client did not send Accept: text/markdown — wants_markdown stays false throughout.
+ let ctx = make_ctx(); // wants_markdown = false
+ let resp = make_response(200, Some("text/html"));
+ let html = b"Hello
";
+
+ let (final_ctx, out_resp, body) = run_pipeline(ctx, resp, html);
+
+ assert!(!final_ctx.wants_markdown);
+ assert_eq!(
+ out_resp
+ .headers
+ .get("content-type")
+ .and_then(|v| v.to_str().ok()),
+ Some("text/html")
+ );
+ // Body unchanged
+ assert_eq!(body.unwrap().as_ref(), html);
+ }
+
+ #[test]
+ fn pipeline_converts_uppercase_content_type() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let resp = make_response(200, Some("TEXT/HTML"));
+ let html = b"Content
";
+
+ let (_ctx, out_resp, body) = run_pipeline(ctx, resp, html);
+
+ assert_eq!(
+ out_resp
+ .headers
+ .get("content-type")
+ .and_then(|v| v.to_str().ok()),
+ Some("text/markdown; charset=utf-8")
+ );
+ let md = String::from_utf8(body.unwrap().to_vec()).unwrap();
+ assert!(
+ md.contains("Content"),
+ "body text must survive conversion: {}",
+ md
+ );
+ }
+
+ #[test]
+ fn pipeline_size_guard_passthrough_on_oversized_body() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let resp = make_response(200, Some("text/html; charset=utf-8"));
+ let oversized = vec![b'x'; MAX_MARKDOWN_BODY_BYTES + 1];
+
+ let (final_ctx, _out_resp, body) = run_pipeline(ctx, resp, &oversized);
+
+ assert!(
+ !final_ctx.wants_markdown,
+ "size guard must disable conversion"
+ );
+ assert_eq!(
+ body.unwrap().len(),
+ oversized.len(),
+ "original bytes must be returned unchanged"
+ );
+ }
+
+ #[test]
+ fn pipeline_includes_frontmatter_when_meta_present() {
+ let mut ctx = make_ctx();
+ ctx.wants_markdown = true;
+ let resp = make_response(200, Some("text/html; charset=utf-8"));
+ let html = br#"
+
+
+
+
+ Body text.
+ "#;
+
+ let (_ctx, _out_resp, body) = run_pipeline(ctx, resp, html);
+ let md = String::from_utf8(body.unwrap().to_vec()).unwrap();
+
+ assert!(
+ md.starts_with("---\n"),
+ "output must start with YAML frontmatter"
+ );
+ assert!(
+ md.contains("title: My Article"),
+ "og:title must be in frontmatter"
+ );
+ assert!(
+ md.contains("description: A great read"),
+ "description must be in frontmatter"
+ );
+ assert!(
+ md.contains("Body text."),
+ "article body must appear after frontmatter"
+ );
+ }
+
+ #[test]
+ fn pipeline_vary_header_set_only_on_conversion() {
+ // Vary: Accept must appear when conversion happens, not when it is cancelled.
+ let mut ctx_yes = make_ctx();
+ ctx_yes.wants_markdown = true;
+ let mut resp_yes = make_response(200, Some("text/html"));
+ apply_markdown_upstream_gate(&mut resp_yes, &mut ctx_yes);
+ assert_eq!(
+ resp_yes.headers.get("vary").and_then(|v| v.to_str().ok()),
+ Some("Accept")
+ );
+
+ let mut ctx_no = make_ctx();
+ ctx_no.wants_markdown = true;
+ let mut resp_no = make_response(200, Some("application/json"));
+ apply_markdown_upstream_gate(&mut resp_no, &mut ctx_no);
+ assert!(
+ resp_no.headers.get("vary").is_none(),
+ "Vary must NOT be added when conversion is cancelled"
+ );
+ }
+}