diff --git a/CHANGELOG.md b/CHANGELOG.md index 8adf055..d2c26ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +#### Spec Serialization Compliance +- **Breaking:** `Mark::Math { value }` field renamed to `Mark::Math { source }` to match spec +- **Breaking:** Simple marks (Bold, Italic, etc.) now serialize as strings (`"bold"`) instead of objects (`{"type":"bold"}`) +- **Breaking:** Extension marks serialize with colon-delimited type (`"semantic:citation"`) instead of wrapper (`{"type":"extension","namespace":"semantic","markType":"citation"}`) +- **Breaking:** Extension blocks serialize with colon-delimited type (`"academic:theorem"`) instead of wrapper format +- **Breaking:** `Block::block_type()` returns `Cow<'_, str>` instead of `&'static str`; extension blocks return `"namespace:blockType"` instead of `"extension"` +- `FigCaption` block type serializes as `"figcaption"` (lowercase) instead of `"figCaption"` +- All old formats are accepted on deserialization for backward compatibility +- Added conformance test suite (`tests/conformance.rs`) to prevent future spec drift + ## [0.4.0] - 2026-02-16 ### Added diff --git a/cdx-cli/src/commands/inspect.rs b/cdx-cli/src/commands/inspect.rs index b22324f..87d23b7 100644 --- a/cdx-cli/src/commands/inspect.rs +++ b/cdx-cli/src/commands/inspect.rs @@ -126,7 +126,7 @@ fn display_text( println!( " {}. {}", i + 1, - format_block_type(block.block_type()).cyan(), + format_block_type(&block.block_type()).cyan(), ); } } @@ -188,7 +188,7 @@ fn format_block_type(block_type: &str) -> String { "svg" => "SVG", "barcode" => "Barcode", "figure" => "Figure", - "figCaption" => "Figure Caption", + "figcaption" => "Figure Caption", other => other, } .to_string() diff --git a/cdx-core/src/content/block.rs b/cdx-core/src/content/block.rs index 496d6c5..0e572d8 100644 --- a/cdx-core/src/content/block.rs +++ b/cdx-core/src/content/block.rs @@ -1,5 +1,9 @@ //! Content block types. +use std::borrow::Cow; + +use serde::de; +use serde::ser::SerializeMap; use serde::{Deserialize, Serialize}; use super::Text; @@ -82,27 +86,29 @@ impl BlockAttributes { /// /// Blocks are the structural elements of a document, containing /// either other blocks (containers) or text content (leaves). -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "camelCase")] +/// +/// # Serialization +/// +/// Blocks serialize as JSON objects with a `"type"` field. Core block types +/// use camelCase names (e.g., `"paragraph"`, `"codeBlock"`). Extension blocks +/// use colon-delimited types (e.g., `"forms:textInput"`, `"academic:theorem"`). +#[derive(Debug, Clone, PartialEq)] pub enum Block { /// Standard paragraph block. Paragraph { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Text content. children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Section heading (levels 1-6). Heading { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Heading level (1-6). @@ -112,93 +118,78 @@ pub enum Block { children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Ordered or unordered list. List { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Whether the list is ordered (numbered). ordered: bool, /// Starting number for ordered lists. - #[serde(default, skip_serializing_if = "Option::is_none")] start: Option, /// List items (must be `ListItem` blocks). children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Item within a list. ListItem { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Checkbox state (None = not a checkbox). - #[serde(default, skip_serializing_if = "Option::is_none")] checked: Option, /// Block content. children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Quoted content block. Blockquote { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Block content. children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Source code or preformatted text. CodeBlock { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Programming language identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] language: Option, /// Syntax highlighting theme. - #[serde(default, skip_serializing_if = "Option::is_none")] highlighting: Option, /// Pre-tokenized syntax highlighting. - #[serde(default, skip_serializing_if = "Option::is_none")] tokens: Option>, /// Code content (single text node, no marks). children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Thematic break between sections. HorizontalRule { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, }, @@ -208,32 +199,27 @@ pub enum Block { /// Tabular data. Table { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Table rows. children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Row within a table. TableRow { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Whether this is a header row. - #[serde(default, skip_serializing_if = "std::ops::Not::not")] header: bool, /// Table cells. children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, @@ -246,7 +232,6 @@ pub enum Block { /// Line break within a block. Break { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, }, @@ -256,42 +241,36 @@ pub enum Block { /// Definition item (term + description pair). DefinitionItem { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Children (typically `DefinitionTerm` and `DefinitionDescription`). children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Definition term. DefinitionTerm { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Term text content. children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, /// Definition description. DefinitionDescription { /// Optional unique identifier. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, /// Description content (blocks). children: Vec, /// Block attributes. - #[serde(default, skip_serializing_if = "BlockAttributes::is_empty")] attributes: BlockAttributes, }, @@ -1057,6 +1036,736 @@ impl DefinitionListBlock { } } +/// Helper: serialize a Block variant by converting to Value, injecting "type", then serializing. +fn serialize_block_as_map( + type_str: &str, + value: &serde_json::Value, + serializer: S, +) -> Result { + use serde::ser::Error; + let obj = value + .as_object() + .ok_or_else(|| S::Error::custom("expected object"))?; + let mut map = serializer.serialize_map(Some(1 + obj.len()))?; + map.serialize_entry("type", type_str)?; + for (k, v) in obj { + map.serialize_entry(k, v)?; + } + map.end() +} + +/// Helper struct for serializing inline struct variants (Paragraph, Heading, etc.). +/// These don't have their own named struct, so we serialize field-by-field. +#[derive(Serialize)] +struct InlineParagraph<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, + children: &'a Vec, + #[serde(skip_serializing_if = "BlockAttributes::is_empty")] + attributes: &'a BlockAttributes, +} + +#[derive(Serialize)] +struct InlineHeading<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, + level: u8, + children: &'a Vec, + #[serde(skip_serializing_if = "BlockAttributes::is_empty")] + attributes: &'a BlockAttributes, +} + +#[derive(Serialize)] +struct InlineList<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, + ordered: bool, + #[serde(skip_serializing_if = "Option::is_none")] + start: &'a Option, + children: &'a Vec, + #[serde(skip_serializing_if = "BlockAttributes::is_empty")] + attributes: &'a BlockAttributes, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct InlineListItem<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, + #[serde(skip_serializing_if = "Option::is_none")] + checked: &'a Option, + children: &'a Vec, + #[serde(skip_serializing_if = "BlockAttributes::is_empty")] + attributes: &'a BlockAttributes, +} + +#[derive(Serialize)] +struct InlineContainer<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, + children: &'a Vec, + #[serde(skip_serializing_if = "BlockAttributes::is_empty")] + attributes: &'a BlockAttributes, +} + +#[derive(Serialize)] +#[serde(rename_all = "camelCase")] +struct InlineCodeBlock<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, + #[serde(skip_serializing_if = "Option::is_none")] + language: &'a Option, + #[serde(skip_serializing_if = "Option::is_none")] + highlighting: &'a Option, + #[serde(skip_serializing_if = "Option::is_none")] + tokens: &'a Option>, + children: &'a Vec, + #[serde(skip_serializing_if = "BlockAttributes::is_empty")] + attributes: &'a BlockAttributes, +} + +#[derive(Serialize)] +struct InlineTableRow<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, + #[serde(skip_serializing_if = "std::ops::Not::not")] + header: bool, + children: &'a Vec, + #[serde(skip_serializing_if = "BlockAttributes::is_empty")] + attributes: &'a BlockAttributes, +} + +#[derive(Serialize)] +struct InlineTextContainer<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, + children: &'a Vec, + #[serde(skip_serializing_if = "BlockAttributes::is_empty")] + attributes: &'a BlockAttributes, +} + +#[derive(Serialize)] +struct InlineIdOnly<'a> { + #[serde(skip_serializing_if = "Option::is_none")] + id: &'a Option, +} + +impl Serialize for Block { + #[allow(clippy::too_many_lines)] + fn serialize(&self, serializer: S) -> Result { + use serde::ser::Error; + + match self { + Self::Paragraph { + id, + children, + attributes, + } => { + let inner = InlineParagraph { + id, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("paragraph", &val, serializer) + } + Self::Heading { + id, + level, + children, + attributes, + } => { + let inner = InlineHeading { + id, + level: *level, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("heading", &val, serializer) + } + Self::List { + id, + ordered, + start, + children, + attributes, + } => { + let inner = InlineList { + id, + ordered: *ordered, + start, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("list", &val, serializer) + } + Self::ListItem { + id, + checked, + children, + attributes, + } => { + let inner = InlineListItem { + id, + checked, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("listItem", &val, serializer) + } + Self::Blockquote { + id, + children, + attributes, + } => { + let inner = InlineContainer { + id, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("blockquote", &val, serializer) + } + Self::CodeBlock { + id, + language, + highlighting, + tokens, + children, + attributes, + } => { + let inner = InlineCodeBlock { + id, + language, + highlighting, + tokens, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("codeBlock", &val, serializer) + } + Self::HorizontalRule { id } => { + let inner = InlineIdOnly { id }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("horizontalRule", &val, serializer) + } + Self::Image(img) => { + let val = serde_json::to_value(img).map_err(S::Error::custom)?; + serialize_block_as_map("image", &val, serializer) + } + Self::Table { + id, + children, + attributes, + } => { + let inner = InlineContainer { + id, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("table", &val, serializer) + } + Self::TableRow { + id, + header, + children, + attributes, + } => { + let inner = InlineTableRow { + id, + header: *header, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("tableRow", &val, serializer) + } + Self::TableCell(cell) => { + let val = serde_json::to_value(cell).map_err(S::Error::custom)?; + serialize_block_as_map("tableCell", &val, serializer) + } + Self::Math(math) => { + let val = serde_json::to_value(math).map_err(S::Error::custom)?; + serialize_block_as_map("math", &val, serializer) + } + Self::Break { id } => { + let inner = InlineIdOnly { id }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("break", &val, serializer) + } + Self::DefinitionList(dl) => { + let val = serde_json::to_value(dl).map_err(S::Error::custom)?; + serialize_block_as_map("definitionList", &val, serializer) + } + Self::DefinitionItem { + id, + children, + attributes, + } => { + let inner = InlineContainer { + id, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("definitionItem", &val, serializer) + } + Self::DefinitionTerm { + id, + children, + attributes, + } => { + let inner = InlineTextContainer { + id, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("definitionTerm", &val, serializer) + } + Self::DefinitionDescription { + id, + children, + attributes, + } => { + let inner = InlineContainer { + id, + children, + attributes, + }; + let val = serde_json::to_value(&inner).map_err(S::Error::custom)?; + serialize_block_as_map("definitionDescription", &val, serializer) + } + Self::Measurement(m) => { + let val = serde_json::to_value(m).map_err(S::Error::custom)?; + serialize_block_as_map("measurement", &val, serializer) + } + Self::Signature(sig) => { + let val = serde_json::to_value(sig).map_err(S::Error::custom)?; + serialize_block_as_map("signature", &val, serializer) + } + Self::Svg(svg) => { + let val = serde_json::to_value(svg).map_err(S::Error::custom)?; + serialize_block_as_map("svg", &val, serializer) + } + Self::Barcode(bc) => { + let val = serde_json::to_value(bc).map_err(S::Error::custom)?; + serialize_block_as_map("barcode", &val, serializer) + } + Self::Figure(fig) => { + let val = serde_json::to_value(fig).map_err(S::Error::custom)?; + serialize_block_as_map("figure", &val, serializer) + } + Self::FigCaption(fc) => { + let val = serde_json::to_value(fc).map_err(S::Error::custom)?; + serialize_block_as_map("figcaption", &val, serializer) + } + Self::Admonition(adm) => { + let val = serde_json::to_value(adm).map_err(S::Error::custom)?; + serialize_block_as_map("admonition", &val, serializer) + } + Self::Extension(ext) => { + // Extension blocks: type is "namespace:blockType", flatten id/children/fallback/attributes + let type_str = ext.full_type(); + let attr_count = ext.attributes.as_object().map_or(0, serde_json::Map::len); + let mut count = 1; // type + if ext.id.is_some() { + count += 1; + } + if !ext.children.is_empty() { + count += 1; + } + if ext.fallback.is_some() { + count += 1; + } + count += attr_count; + + let mut map = serializer.serialize_map(Some(count))?; + map.serialize_entry("type", &type_str)?; + if let Some(id) = &ext.id { + map.serialize_entry("id", id)?; + } + if !ext.children.is_empty() { + map.serialize_entry("children", &ext.children)?; + } + if let Some(fallback) = &ext.fallback { + map.serialize_entry("fallback", fallback)?; + } + if let Some(obj) = ext.attributes.as_object() { + for (k, v) in obj { + map.serialize_entry(k, v)?; + } + } + map.end() + } + } + } +} + +impl<'de> Deserialize<'de> for Block { + #[allow(clippy::too_many_lines)] + fn deserialize>(deserializer: D) -> Result { + // Deserialize into a generic Value first, then dispatch based on "type" + let mut value = serde_json::Value::deserialize(deserializer)?; + + let obj = value + .as_object_mut() + .ok_or_else(|| de::Error::custom("block must be an object"))?; + + let type_str = obj + .get("type") + .and_then(serde_json::Value::as_str) + .ok_or_else(|| de::Error::missing_field("type"))? + .to_string(); + + // Remove "type" before deserializing into inner structs (they don't expect it) + obj.remove("type"); + + match type_str.as_str() { + "paragraph" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Paragraph { + id: inner.id, + children: inner.children, + attributes: inner.attributes, + }) + } + "heading" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + level: u8, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Heading { + id: inner.id, + level: inner.level, + children: inner.children, + attributes: inner.attributes, + }) + } + "list" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + ordered: bool, + #[serde(default)] + start: Option, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::List { + id: inner.id, + ordered: inner.ordered, + start: inner.start, + children: inner.children, + attributes: inner.attributes, + }) + } + "listItem" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + checked: Option, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::ListItem { + id: inner.id, + checked: inner.checked, + children: inner.children, + attributes: inner.attributes, + }) + } + "blockquote" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Blockquote { + id: inner.id, + children: inner.children, + attributes: inner.attributes, + }) + } + "codeBlock" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + language: Option, + #[serde(default)] + highlighting: Option, + #[serde(default)] + tokens: Option>, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::CodeBlock { + id: inner.id, + language: inner.language, + highlighting: inner.highlighting, + tokens: inner.tokens, + children: inner.children, + attributes: inner.attributes, + }) + } + "horizontalRule" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::HorizontalRule { id: inner.id }) + } + "image" => { + let img: ImageBlock = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Image(img)) + } + "table" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Table { + id: inner.id, + children: inner.children, + attributes: inner.attributes, + }) + } + "tableRow" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + header: bool, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::TableRow { + id: inner.id, + header: inner.header, + children: inner.children, + attributes: inner.attributes, + }) + } + "tableCell" => { + let cell: TableCellBlock = + serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::TableCell(cell)) + } + "math" => { + let math: MathBlock = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Math(math)) + } + "break" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Break { id: inner.id }) + } + "definitionList" => { + let dl: DefinitionListBlock = + serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::DefinitionList(dl)) + } + "definitionItem" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::DefinitionItem { + id: inner.id, + children: inner.children, + attributes: inner.attributes, + }) + } + "definitionTerm" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::DefinitionTerm { + id: inner.id, + children: inner.children, + attributes: inner.attributes, + }) + } + "definitionDescription" => { + #[derive(Deserialize)] + struct Inner { + #[serde(default)] + id: Option, + #[serde(default)] + children: Vec, + #[serde(default)] + attributes: BlockAttributes, + } + let inner: Inner = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::DefinitionDescription { + id: inner.id, + children: inner.children, + attributes: inner.attributes, + }) + } + "measurement" => { + let m: MeasurementBlock = + serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Measurement(m)) + } + "signature" => { + let sig: SignatureBlock = + serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Signature(sig)) + } + "svg" => { + let svg: SvgBlock = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Svg(svg)) + } + "barcode" => { + let bc: BarcodeBlock = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Barcode(bc)) + } + "figure" => { + let fig: FigureBlock = serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Figure(fig)) + } + "figcaption" | "figCaption" => { + // Accept both "figcaption" (spec) and "figCaption" (old format) + let fc: FigCaptionBlock = + serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::FigCaption(fc)) + } + "admonition" => { + let adm: AdmonitionBlock = + serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Admonition(adm)) + } + + // Old format backward compat: {"type": "extension", "namespace": "...", "blockType": "..."} + "extension" => { + let ext: ExtensionBlock = + serde_json::from_value(value).map_err(de::Error::custom)?; + Ok(Block::Extension(ext)) + } + + // Colon-delimited extension type (new format) + other if other.contains(':') => { + let (namespace, block_type) = other.split_once(':').unwrap(); + let obj = value + .as_object() + .ok_or_else(|| de::Error::custom("expected object"))?; + + let id = obj + .get("id") + .and_then(serde_json::Value::as_str) + .map(ToString::to_string); + let children: Vec = obj + .get("children") + .map(|v| serde_json::from_value(v.clone())) + .transpose() + .map_err(de::Error::custom)? + .unwrap_or_default(); + let fallback: Option> = obj + .get("fallback") + .map(|v| serde_json::from_value(v.clone())) + .transpose() + .map_err(de::Error::custom)?; + + // Collect remaining keys as attributes + let reserved = ["id", "children", "fallback"]; + let mut attrs = serde_json::Map::new(); + for (k, v) in obj { + if !reserved.contains(&k.as_str()) { + attrs.insert(k.clone(), v.clone()); + } + } + let attributes = if attrs.is_empty() { + serde_json::Value::Null + } else { + serde_json::Value::Object(attrs) + }; + + Ok(Block::Extension(ExtensionBlock { + namespace: namespace.to_string(), + block_type: block_type.to_string(), + id, + attributes, + children, + fallback, + })) + } + + unknown => Err(de::Error::custom(format!("unknown block type: {unknown}"))), + } + } +} + // Convenience constructors impl Block { /// Create a paragraph block. @@ -1305,36 +2014,36 @@ impl Block { /// Get the block type as a string. /// - /// For extension blocks, this returns "extension". Use [`ExtensionBlock::full_type()`] - /// to get the namespaced type like "forms:textInput". + /// For core blocks, returns the camelCase type name. + /// For extension blocks, returns the colon-delimited type (e.g., `"forms:textInput"`). #[must_use] - pub fn block_type(&self) -> &'static str { + pub fn block_type(&self) -> Cow<'_, str> { match self { - Self::Paragraph { .. } => "paragraph", - Self::Heading { .. } => "heading", - Self::List { .. } => "list", - Self::ListItem { .. } => "listItem", - Self::Blockquote { .. } => "blockquote", - Self::CodeBlock { .. } => "codeBlock", - Self::HorizontalRule { .. } => "horizontalRule", - Self::Image(_) => "image", - Self::Table { .. } => "table", - Self::TableRow { .. } => "tableRow", - Self::TableCell(_) => "tableCell", - Self::Math(_) => "math", - Self::Break { .. } => "break", - Self::DefinitionList(_) => "definitionList", - Self::DefinitionItem { .. } => "definitionItem", - Self::DefinitionTerm { .. } => "definitionTerm", - Self::DefinitionDescription { .. } => "definitionDescription", - Self::Measurement(_) => "measurement", - Self::Signature(_) => "signature", - Self::Svg(_) => "svg", - Self::Barcode(_) => "barcode", - Self::Figure(_) => "figure", - Self::FigCaption(_) => "figCaption", - Self::Admonition(_) => "admonition", - Self::Extension(_) => "extension", + Self::Paragraph { .. } => Cow::Borrowed("paragraph"), + Self::Heading { .. } => Cow::Borrowed("heading"), + Self::List { .. } => Cow::Borrowed("list"), + Self::ListItem { .. } => Cow::Borrowed("listItem"), + Self::Blockquote { .. } => Cow::Borrowed("blockquote"), + Self::CodeBlock { .. } => Cow::Borrowed("codeBlock"), + Self::HorizontalRule { .. } => Cow::Borrowed("horizontalRule"), + Self::Image(_) => Cow::Borrowed("image"), + Self::Table { .. } => Cow::Borrowed("table"), + Self::TableRow { .. } => Cow::Borrowed("tableRow"), + Self::TableCell(_) => Cow::Borrowed("tableCell"), + Self::Math(_) => Cow::Borrowed("math"), + Self::Break { .. } => Cow::Borrowed("break"), + Self::DefinitionList(_) => Cow::Borrowed("definitionList"), + Self::DefinitionItem { .. } => Cow::Borrowed("definitionItem"), + Self::DefinitionTerm { .. } => Cow::Borrowed("definitionTerm"), + Self::DefinitionDescription { .. } => Cow::Borrowed("definitionDescription"), + Self::Measurement(_) => Cow::Borrowed("measurement"), + Self::Signature(_) => Cow::Borrowed("signature"), + Self::Svg(_) => Cow::Borrowed("svg"), + Self::Barcode(_) => Cow::Borrowed("barcode"), + Self::Figure(_) => Cow::Borrowed("figure"), + Self::FigCaption(_) => Cow::Borrowed("figcaption"), + Self::Admonition(_) => Cow::Borrowed("admonition"), + Self::Extension(ext) => Cow::Owned(ext.full_type()), } } @@ -1535,7 +2244,7 @@ mod tests { fn test_extension_block() { let ext = Block::extension("forms", "textInput"); assert!(ext.is_extension()); - assert_eq!(ext.block_type(), "extension"); + assert_eq!(ext.block_type(), "forms:textInput"); if let Block::Extension(inner) = &ext { assert_eq!(inner.namespace, "forms"); @@ -1687,7 +2396,7 @@ mod tests { #[test] fn test_figcaption() { let fc = Block::figcaption(vec![Text::plain("Caption text")]); - assert_eq!(fc.block_type(), "figCaption"); + assert_eq!(fc.block_type(), "figcaption"); } #[test] diff --git a/cdx-core/src/content/text.rs b/cdx-core/src/content/text.rs index e64bef3..24eb787 100644 --- a/cdx-core/src/content/text.rs +++ b/cdx-core/src/content/text.rs @@ -1,5 +1,7 @@ //! Text nodes and formatting marks. +use serde::de::{self, MapAccess, Visitor}; +use serde::ser::SerializeMap; use serde::{Deserialize, Serialize}; use crate::content::block::MathFormat; @@ -105,8 +107,13 @@ impl Text { /// /// Marks represent inline formatting such as bold, italic, links, etc. /// Multiple marks can be applied to the same text node. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(tag = "type", rename_all = "camelCase")] +/// +/// # Serialization +/// +/// Simple marks (Bold, Italic, etc.) serialize as plain strings: `"bold"`. +/// Complex marks (Link, Math, etc.) serialize as objects with a `"type"` field. +/// Extension marks serialize with their namespaced type as `"type"`. +#[derive(Debug, Clone, PartialEq, Eq)] pub enum Mark { /// Bold/strong text. Bold, @@ -135,7 +142,6 @@ pub enum Mark { href: String, /// Optional link title. - #[serde(default, skip_serializing_if = "Option::is_none")] title: Option, }, @@ -153,7 +159,6 @@ pub enum Mark { number: u32, /// Optional unique identifier for cross-referencing. - #[serde(default, skip_serializing_if = "Option::is_none")] id: Option, }, @@ -162,8 +167,8 @@ pub enum Mark { /// Math format (latex or mathml). format: MathFormat, - /// The mathematical expression. - value: String, + /// The mathematical expression source. + source: String, }, /// Extension mark for custom/unknown mark types. @@ -422,6 +427,247 @@ impl ExtensionMark { } } +/// Infer the extension namespace from a mark type string. +/// +/// Used during deserialization when an unknown type string is encountered +/// without explicit namespace information. +fn infer_mark_namespace(mark_type: &str) -> &'static str { + match mark_type { + "citation" | "entity" | "glossary" => "semantic", + "theorem-ref" | "equation-ref" | "algorithm-ref" => "academic", + "cite" => "legal", + "highlight" => "collaboration", + "index" => "presentation", + _ => "", + } +} + +impl Serialize for Mark { + fn serialize(&self, serializer: S) -> Result { + match self { + // Simple marks serialize as plain strings + Self::Bold => serializer.serialize_str("bold"), + Self::Italic => serializer.serialize_str("italic"), + Self::Underline => serializer.serialize_str("underline"), + Self::Strikethrough => serializer.serialize_str("strikethrough"), + Self::Code => serializer.serialize_str("code"), + Self::Superscript => serializer.serialize_str("superscript"), + Self::Subscript => serializer.serialize_str("subscript"), + + // Complex marks serialize as objects with "type" field + Self::Link { href, title } => { + let len = 2 + usize::from(title.is_some()); + let mut map = serializer.serialize_map(Some(len))?; + map.serialize_entry("type", "link")?; + map.serialize_entry("href", href)?; + if let Some(t) = title { + map.serialize_entry("title", t)?; + } + map.end() + } + Self::Anchor { id } => { + let mut map = serializer.serialize_map(Some(2))?; + map.serialize_entry("type", "anchor")?; + map.serialize_entry("id", id)?; + map.end() + } + Self::Footnote { number, id } => { + let len = 2 + usize::from(id.is_some()); + let mut map = serializer.serialize_map(Some(len))?; + map.serialize_entry("type", "footnote")?; + map.serialize_entry("number", number)?; + if let Some(i) = id { + map.serialize_entry("id", i)?; + } + map.end() + } + Self::Math { format, source } => { + let mut map = serializer.serialize_map(Some(3))?; + map.serialize_entry("type", "math")?; + map.serialize_entry("format", format)?; + map.serialize_entry("source", source)?; + map.end() + } + + // Extension marks: type is "namespace:markType", attributes flattened + Self::Extension(ext) => { + let type_str = ext.full_type(); + let attr_count = ext.attributes.as_object().map_or(0, serde_json::Map::len); + let mut map = serializer.serialize_map(Some(1 + attr_count))?; + map.serialize_entry("type", &type_str)?; + if let Some(obj) = ext.attributes.as_object() { + for (k, v) in obj { + map.serialize_entry(k, v)?; + } + } + map.end() + } + } + } +} + +impl<'de> Deserialize<'de> for Mark { + #[allow(clippy::too_many_lines)] + fn deserialize>(deserializer: D) -> Result { + struct MarkVisitor; + + impl<'de> Visitor<'de> for MarkVisitor { + type Value = Mark; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("a string (simple mark) or an object (complex mark)") + } + + // Simple marks can be plain strings + fn visit_str(self, v: &str) -> Result { + match v { + "bold" => Ok(Mark::Bold), + "italic" => Ok(Mark::Italic), + "underline" => Ok(Mark::Underline), + "strikethrough" => Ok(Mark::Strikethrough), + "code" => Ok(Mark::Code), + "superscript" => Ok(Mark::Superscript), + "subscript" => Ok(Mark::Subscript), + other => { + // Unknown string: treat as extension mark + let (ns, mt) = if let Some((ns, mt)) = other.split_once(':') { + (ns.to_string(), mt.to_string()) + } else { + (infer_mark_namespace(other).to_string(), other.to_string()) + }; + Ok(Mark::Extension(ExtensionMark::new(ns, mt))) + } + } + } + + // Complex marks are objects with a "type" field + #[allow(clippy::too_many_lines)] + fn visit_map>(self, mut map: A) -> Result { + let mut type_str: Option = None; + let mut fields = serde_json::Map::new(); + + while let Some(key) = map.next_key::()? { + if key == "type" { + type_str = Some(map.next_value()?); + } else { + let value: serde_json::Value = map.next_value()?; + fields.insert(key, value); + } + } + + let type_str = type_str.ok_or_else(|| de::Error::missing_field("type"))?; + + match type_str.as_str() { + // Simple marks in object form + "bold" => Ok(Mark::Bold), + "italic" => Ok(Mark::Italic), + "underline" => Ok(Mark::Underline), + "strikethrough" => Ok(Mark::Strikethrough), + "code" => Ok(Mark::Code), + "superscript" => Ok(Mark::Superscript), + "subscript" => Ok(Mark::Subscript), + + // Complex core marks + "link" => { + let href = fields + .get("href") + .and_then(serde_json::Value::as_str) + .ok_or_else(|| de::Error::missing_field("href"))? + .to_string(); + let title = fields + .get("title") + .and_then(serde_json::Value::as_str) + .map(ToString::to_string); + Ok(Mark::Link { href, title }) + } + "anchor" => { + let id = fields + .get("id") + .and_then(serde_json::Value::as_str) + .ok_or_else(|| de::Error::missing_field("id"))? + .to_string(); + Ok(Mark::Anchor { id }) + } + "footnote" => { + let number = fields + .get("number") + .and_then(serde_json::Value::as_u64) + .ok_or_else(|| de::Error::missing_field("number"))?; + let id = fields + .get("id") + .and_then(serde_json::Value::as_str) + .map(ToString::to_string); + Ok(Mark::Footnote { + number: u32::try_from(number) + .map_err(|_| de::Error::custom("footnote number too large"))?, + id, + }) + } + "math" => { + let format_val = fields + .get("format") + .ok_or_else(|| de::Error::missing_field("format"))?; + let format: MathFormat = serde_json::from_value(format_val.clone()) + .map_err(de::Error::custom)?; + // Accept both "source" and "value" (backward compat) + let source = fields + .get("source") + .or_else(|| fields.get("value")) + .and_then(serde_json::Value::as_str) + .ok_or_else(|| de::Error::missing_field("source"))? + .to_string(); + Ok(Mark::Math { format, source }) + } + + // Old format backward compat: {"type": "extension", "namespace": "...", "markType": "..."} + "extension" => { + let namespace = fields + .get("namespace") + .and_then(serde_json::Value::as_str) + .unwrap_or("") + .to_string(); + let mark_type = fields + .get("markType") + .and_then(serde_json::Value::as_str) + .unwrap_or("") + .to_string(); + let attributes = fields + .get("attributes") + .cloned() + .unwrap_or(serde_json::Value::Null); + Ok(Mark::Extension(ExtensionMark { + namespace, + mark_type, + attributes, + })) + } + + // Colon-delimited extension type or unknown type + other => { + let (namespace, mark_type) = if let Some((ns, mt)) = other.split_once(':') { + (ns.to_string(), mt.to_string()) + } else { + (infer_mark_namespace(other).to_string(), other.to_string()) + }; + let attributes = if fields.is_empty() { + serde_json::Value::Null + } else { + serde_json::Value::Object(fields) + }; + Ok(Mark::Extension(ExtensionMark { + namespace, + mark_type, + attributes, + })) + } + } + } + } + + deserializer.deserialize_any(MarkVisitor) + } +} + impl Mark { /// Get the type of this mark. #[must_use] @@ -525,15 +771,30 @@ mod tests { let text = Text::bold("Test"); let json = serde_json::to_string(&text).unwrap(); assert!(json.contains("\"value\":\"Test\"")); - assert!(json.contains("\"type\":\"bold\"")); + // Simple marks serialize as strings + assert!(json.contains("\"bold\"")); } #[test] fn test_text_deserialization() { + // New format: simple marks as strings + let json = r#"{"value":"Test","marks":["bold","italic"]}"#; + let text: Text = serde_json::from_str(json).unwrap(); + assert_eq!(text.value, "Test"); + assert_eq!(text.marks.len(), 2); + assert_eq!(text.marks[0], Mark::Bold); + assert_eq!(text.marks[1], Mark::Italic); + } + + #[test] + fn test_text_deserialization_object_format() { + // Old format: simple marks as objects (backward compat) let json = r#"{"value":"Test","marks":[{"type":"bold"},{"type":"italic"}]}"#; let text: Text = serde_json::from_str(json).unwrap(); assert_eq!(text.value, "Test"); assert_eq!(text.marks.len(), 2); + assert_eq!(text.marks[0], Mark::Bold); + assert_eq!(text.marks[1], Mark::Italic); } #[test] @@ -602,7 +863,7 @@ mod tests { let mark = Mark::Math { format: MathFormat::Latex, - value: "E = mc^2".to_string(), + source: "E = mc^2".to_string(), }; assert_eq!(mark.mark_type(), MarkType::Math); } @@ -613,23 +874,23 @@ mod tests { let mark = Mark::Math { format: MathFormat::Latex, - value: "\\frac{1}{2}".to_string(), + source: "\\frac{1}{2}".to_string(), }; let json = serde_json::to_string(&mark).unwrap(); assert!(json.contains("\"type\":\"math\"")); assert!(json.contains("\"format\":\"latex\"")); - assert!(json.contains("\"value\":\"\\\\frac{1}{2}\"")); + assert!(json.contains("\"source\":\"\\\\frac{1}{2}\"")); } #[test] fn test_math_mark_deserialization() { use crate::content::block::MathFormat; - let json = r#"{"type":"math","format":"mathml","value":"..."}"#; + let json = r#"{"type":"math","format":"mathml","source":"..."}"#; let mark: Mark = serde_json::from_str(json).unwrap(); - if let Mark::Math { format, value } = mark { + if let Mark::Math { format, source } = mark { assert_eq!(format, MathFormat::Mathml); - assert_eq!(value, "..."); + assert_eq!(source, "..."); } else { panic!("Expected Math mark"); } @@ -643,7 +904,7 @@ mod tests { "x²", vec![Mark::Math { format: MathFormat::Latex, - value: "x^2".to_string(), + source: "x^2".to_string(), }], ); assert!(text.has_mark(MarkType::Math)); @@ -713,14 +974,38 @@ mod tests { let mark = Mark::Extension(ext); let json = serde_json::to_string(&mark).unwrap(); - assert!(json.contains("\"type\":\"extension\"")); - assert!(json.contains("\"namespace\":\"semantic\"")); - assert!(json.contains("\"markType\":\"citation\"")); + // New format: type is "namespace:markType", attributes flattened + assert!(json.contains("\"type\":\"semantic:citation\"")); assert!(json.contains("\"ref\":\"smith2023\"")); + // Should NOT contain old wrapper fields + assert!(!json.contains("\"namespace\"")); + assert!(!json.contains("\"markType\"")); + } + + #[test] + fn test_extension_mark_deserialization_new_format() { + // New format: colon-delimited type with flattened attributes + let json = r#"{ + "type": "legal:cite", + "citation": "Brown v. Board of Education" + }"#; + let mark: Mark = serde_json::from_str(json).unwrap(); + + if let Mark::Extension(ext) = mark { + assert_eq!(ext.namespace, "legal"); + assert_eq!(ext.mark_type, "cite"); + assert_eq!( + ext.get_string_attribute("citation"), + Some("Brown v. Board of Education") + ); + } else { + panic!("Expected Extension mark"); + } } #[test] - fn test_extension_mark_deserialization() { + fn test_extension_mark_deserialization_old_format() { + // Old format backward compat: "extension" wrapper with namespace/markType let json = r#"{ "type": "extension", "namespace": "legal", diff --git a/cdx-core/src/content/validation.rs b/cdx-core/src/content/validation.rs index 3891109..8670760 100644 --- a/cdx-core/src/content/validation.rs +++ b/cdx-core/src/content/validation.rs @@ -461,7 +461,7 @@ fn validate_figcaption( if parent != Some(ParentContext::Figure) { errors.push(ValidationError { path: path.to_string(), - message: "figCaption should be a child of figure".to_string(), + message: "figcaption should be a child of figure".to_string(), }); } validate_text_children(children, path, errors); diff --git a/cdx-core/tests/conformance.rs b/cdx-core/tests/conformance.rs new file mode 100644 index 0000000..56aa107 --- /dev/null +++ b/cdx-core/tests/conformance.rs @@ -0,0 +1,398 @@ +//! Spec conformance tests. +//! +//! These tests verify that cdx-core's JSON wire format matches the Codex file +//! format specification. Each test compares serialization output against spec +//! examples and verifies backward-compatible deserialization of old formats. + +use cdx_core::content::{Block, Mark, MathFormat, Text}; +use cdx_core::extensions::ExtensionBlock; + +// ============================================================================ +// Mark format conformance +// ============================================================================ + +#[test] +fn simple_marks_serialize_as_strings() { + let marks = vec![ + (Mark::Bold, "\"bold\""), + (Mark::Italic, "\"italic\""), + (Mark::Underline, "\"underline\""), + (Mark::Strikethrough, "\"strikethrough\""), + (Mark::Code, "\"code\""), + (Mark::Superscript, "\"superscript\""), + (Mark::Subscript, "\"subscript\""), + ]; + + for (mark, expected) in marks { + let json = serde_json::to_string(&mark).unwrap(); + assert_eq!( + json, expected, + "Mark::{mark:?} should serialize as {expected}" + ); + } +} + +#[test] +fn simple_marks_deserialize_from_string() { + let cases = vec![ + ("\"bold\"", Mark::Bold), + ("\"italic\"", Mark::Italic), + ("\"underline\"", Mark::Underline), + ("\"strikethrough\"", Mark::Strikethrough), + ("\"code\"", Mark::Code), + ("\"superscript\"", Mark::Superscript), + ("\"subscript\"", Mark::Subscript), + ]; + + for (json, expected) in cases { + let mark: Mark = serde_json::from_str(json).unwrap(); + assert_eq!( + mark, expected, + "String {json} should deserialize to {expected:?}" + ); + } +} + +#[test] +fn simple_marks_deserialize_from_object() { + // Backward compat: old format used objects for simple marks + let cases = vec![ + (r#"{"type":"bold"}"#, Mark::Bold), + (r#"{"type":"italic"}"#, Mark::Italic), + (r#"{"type":"code"}"#, Mark::Code), + ]; + + for (json, expected) in cases { + let mark: Mark = serde_json::from_str(json).unwrap(); + assert_eq!( + mark, expected, + "Object {json} should deserialize to {expected:?}" + ); + } +} + +#[test] +fn mixed_mark_array_deserializes() { + // Mix of string and object marks in a single array + let json = r#"["bold", {"type":"link","href":"https://example.com"}, "italic"]"#; + let marks: Vec = serde_json::from_str(json).unwrap(); + + assert_eq!(marks.len(), 3); + assert_eq!(marks[0], Mark::Bold); + assert!(matches!(&marks[1], Mark::Link { href, .. } if href == "https://example.com")); + assert_eq!(marks[2], Mark::Italic); +} + +#[test] +fn extension_mark_serializes_without_wrapper() { + use cdx_core::content::ExtensionMark; + + let mark = Mark::Extension(ExtensionMark::citation("smith2023")); + let json = serde_json::to_string(&mark).unwrap(); + let val: serde_json::Value = serde_json::from_str(&json).unwrap(); + + // Type should be "semantic:citation", not "extension" + assert_eq!(val["type"], "semantic:citation"); + assert_eq!(val["ref"], "smith2023"); + + // Should NOT have wrapper fields + assert!(val.get("namespace").is_none()); + assert!(val.get("markType").is_none()); +} + +#[test] +fn extension_mark_deserializes_new_format() { + let json = r#"{"type":"semantic:citation","ref":"smith2023"}"#; + let mark: Mark = serde_json::from_str(json).unwrap(); + + if let Mark::Extension(ext) = &mark { + assert_eq!(ext.namespace, "semantic"); + assert_eq!(ext.mark_type, "citation"); + assert_eq!(ext.get_string_attribute("ref"), Some("smith2023")); + } else { + panic!("Expected Extension mark, got {mark:?}"); + } +} + +#[test] +fn extension_mark_deserializes_old_format() { + // Backward compat: old "extension" wrapper format + let json = r#"{"type":"extension","namespace":"semantic","markType":"citation","attributes":{"ref":"smith2023"}}"#; + let mark: Mark = serde_json::from_str(json).unwrap(); + + if let Mark::Extension(ext) = &mark { + assert_eq!(ext.namespace, "semantic"); + assert_eq!(ext.mark_type, "citation"); + assert_eq!(ext.get_string_attribute("ref"), Some("smith2023")); + } else { + panic!("Expected Extension mark, got {mark:?}"); + } +} + +#[test] +fn math_mark_uses_source_field() { + let mark = Mark::Math { + format: MathFormat::Latex, + source: "E=mc^2".to_string(), + }; + let json = serde_json::to_string(&mark).unwrap(); + let val: serde_json::Value = serde_json::from_str(&json).unwrap(); + + assert_eq!(val["type"], "math"); + assert_eq!(val["source"], "E=mc^2"); + // "value" should NOT be present + assert!(val.get("value").is_none()); +} + +#[test] +fn math_mark_backward_compat_value_field() { + // Old format used "value" instead of "source" + let json = r#"{"type":"math","format":"latex","value":"E=mc^2"}"#; + let mark: Mark = serde_json::from_str(json).unwrap(); + + if let Mark::Math { format, source } = &mark { + assert_eq!(*format, MathFormat::Latex); + assert_eq!(source, "E=mc^2"); + } else { + panic!("Expected Math mark, got {mark:?}"); + } +} + +// ============================================================================ +// Block format conformance +// ============================================================================ + +#[test] +fn figcaption_serializes_lowercase() { + let fc = Block::figcaption(vec![Text::plain("Figure 1")]); + let json = serde_json::to_string(&fc).unwrap(); + let val: serde_json::Value = serde_json::from_str(&json).unwrap(); + + assert_eq!(val["type"], "figcaption"); +} + +#[test] +fn figcaption_backward_compat_camel_case() { + // Old format used "figCaption" (camelCase) + let json = r#"{"type":"figCaption","children":[{"value":"Caption"}]}"#; + let block: Block = serde_json::from_str(json).unwrap(); + assert_eq!(block.block_type(), "figcaption"); +} + +#[test] +fn extension_block_serializes_with_colon_type() { + let ext = ExtensionBlock::new("academic", "theorem") + .with_id("thm-1") + .with_attributes(serde_json::json!({"variant": "lemma", "numbered": true})); + let block = Block::Extension(ext); + + let json = serde_json::to_string(&block).unwrap(); + let val: serde_json::Value = serde_json::from_str(&json).unwrap(); + + // Type should be "academic:theorem" + assert_eq!(val["type"], "academic:theorem"); + assert_eq!(val["id"], "thm-1"); + // Attributes should be flattened + assert_eq!(val["variant"], "lemma"); + assert_eq!(val["numbered"], true); + // Should NOT have wrapper fields + assert!(val.get("namespace").is_none()); + assert!(val.get("blockType").is_none()); + assert!(val.get("attributes").is_none()); +} + +#[test] +fn extension_block_deserializes_new_format() { + let json = r#"{"type":"academic:theorem","id":"thm-1","variant":"lemma","numbered":true}"#; + let block: Block = serde_json::from_str(json).unwrap(); + + if let Block::Extension(ext) = &block { + assert_eq!(ext.namespace, "academic"); + assert_eq!(ext.block_type, "theorem"); + assert_eq!(ext.id, Some("thm-1".to_string())); + assert_eq!(ext.get_string_attribute("variant"), Some("lemma")); + assert_eq!(ext.get_bool_attribute("numbered"), Some(true)); + } else { + panic!("Expected Extension block, got paragraph/etc"); + } +} + +#[test] +fn extension_block_deserializes_old_format() { + // Backward compat: old "extension" wrapper format + let json = r#"{"type":"extension","namespace":"forms","blockType":"textInput","id":"name-field","attributes":{"label":"Name","required":true}}"#; + let block: Block = serde_json::from_str(json).unwrap(); + + if let Block::Extension(ext) = &block { + assert_eq!(ext.namespace, "forms"); + assert_eq!(ext.block_type, "textInput"); + assert_eq!(ext.id, Some("name-field".to_string())); + assert_eq!(ext.get_string_attribute("label"), Some("Name")); + assert_eq!(ext.get_bool_attribute("required"), Some(true)); + } else { + panic!("Expected Extension block"); + } +} + +#[test] +fn extension_block_type_returns_colon_format() { + let block = Block::extension("forms", "textInput"); + assert_eq!(block.block_type(), "forms:textInput"); +} + +// ============================================================================ +// Spec example round-trips +// ============================================================================ + +#[test] +fn spec_example_text_with_bold_string_mark() { + // Spec: bold marks are strings in the marks array + let spec_json = r#"{"value":"Important","marks":["bold"]}"#; + + // Deserialize + let text: Text = serde_json::from_str(spec_json).unwrap(); + assert_eq!(text.value, "Important"); + assert_eq!(text.marks, vec![Mark::Bold]); + + // Re-serialize matches spec format + let output = serde_json::to_string(&text).unwrap(); + let output_val: serde_json::Value = serde_json::from_str(&output).unwrap(); + let spec_val: serde_json::Value = serde_json::from_str(spec_json).unwrap(); + assert_eq!(output_val, spec_val); +} + +#[test] +fn spec_example_text_with_citation_mark() { + // Spec: extension marks use "namespace:markType" as type, attributes flattened + let spec_json = + r#"{"value":"important claim","marks":[{"type":"semantic:citation","ref":"smith2023"}]}"#; + + let text: Text = serde_json::from_str(spec_json).unwrap(); + assert_eq!(text.value, "important claim"); + assert_eq!(text.marks.len(), 1); + + if let Mark::Extension(ext) = &text.marks[0] { + assert_eq!(ext.namespace, "semantic"); + assert_eq!(ext.mark_type, "citation"); + assert_eq!(ext.get_string_attribute("ref"), Some("smith2023")); + } else { + panic!("Expected Extension mark"); + } + + // Re-serialize matches spec format + let output = serde_json::to_string(&text).unwrap(); + let output_val: serde_json::Value = serde_json::from_str(&output).unwrap(); + let spec_val: serde_json::Value = serde_json::from_str(spec_json).unwrap(); + assert_eq!(output_val, spec_val); +} + +#[test] +fn spec_example_extension_block_academic_theorem() { + // Spec: extension blocks use "namespace:blockType" as type + let spec_json = r#"{ + "type": "academic:theorem", + "id": "thm-pythagoras", + "variant": "theorem", + "children": [ + {"type": "paragraph", "children": [{"value": "In a right triangle..."}]} + ] + }"#; + + let block: Block = serde_json::from_str(spec_json).unwrap(); + if let Block::Extension(ext) = &block { + assert_eq!(ext.namespace, "academic"); + assert_eq!(ext.block_type, "theorem"); + assert_eq!(ext.id, Some("thm-pythagoras".to_string())); + assert_eq!(ext.get_string_attribute("variant"), Some("theorem")); + assert_eq!(ext.children.len(), 1); + } else { + panic!("Expected Extension block"); + } + + // Re-serialize and verify format + let output = serde_json::to_string(&block).unwrap(); + let output_val: serde_json::Value = serde_json::from_str(&output).unwrap(); + assert_eq!(output_val["type"], "academic:theorem"); + assert_eq!(output_val["id"], "thm-pythagoras"); + assert_eq!(output_val["variant"], "theorem"); +} + +#[test] +fn spec_example_figure_with_figcaption() { + // Spec: figcaption uses lowercase "figcaption" type + let spec_json = r#"{ + "type": "figure", + "children": [ + {"type": "image", "src": "photo.png", "alt": "A photo"}, + {"type": "figcaption", "children": [{"value": "Figure 1: A photo"}]} + ] + }"#; + + let block: Block = serde_json::from_str(spec_json).unwrap(); + if let Block::Figure(fig) = &block { + assert_eq!(fig.children.len(), 2); + assert_eq!(fig.children[1].block_type(), "figcaption"); + } else { + panic!("Expected Figure block"); + } + + // Re-serialize and verify format + let output = serde_json::to_string(&block).unwrap(); + let output_val: serde_json::Value = serde_json::from_str(&output).unwrap(); + assert_eq!(output_val["children"][1]["type"], "figcaption"); +} + +#[test] +fn spec_example_math_inline_mark() { + // Spec: inline math mark uses "source" field + let spec_json = r#"{"value":"x²","marks":[{"type":"math","format":"latex","source":"x^2"}]}"#; + + let text: Text = serde_json::from_str(spec_json).unwrap(); + if let Mark::Math { format, source } = &text.marks[0] { + assert_eq!(*format, MathFormat::Latex); + assert_eq!(source, "x^2"); + } else { + panic!("Expected Math mark"); + } + + // Re-serialize matches spec + let output = serde_json::to_string(&text).unwrap(); + let output_val: serde_json::Value = serde_json::from_str(&output).unwrap(); + let spec_val: serde_json::Value = serde_json::from_str(spec_json).unwrap(); + assert_eq!(output_val, spec_val); +} + +#[test] +fn extension_block_roundtrip_preserves_format() { + // Create → serialize → deserialize → serialize again, format should match + let ext = ExtensionBlock::new("forms", "textInput") + .with_id("name-field") + .with_attributes(serde_json::json!({"label": "Full Name", "required": true})); + let block = Block::Extension(ext); + + let json1 = serde_json::to_string(&block).unwrap(); + let parsed: Block = serde_json::from_str(&json1).unwrap(); + let json2 = serde_json::to_string(&parsed).unwrap(); + + let val1: serde_json::Value = serde_json::from_str(&json1).unwrap(); + let val2: serde_json::Value = serde_json::from_str(&json2).unwrap(); + assert_eq!(val1, val2); +} + +#[test] +fn extension_mark_roundtrip_preserves_format() { + use cdx_core::content::ExtensionMark; + + let mark = Mark::Extension(ExtensionMark::theorem_ref_formatted( + "#thm-1", + "{variant} {number}", + )); + + let json1 = serde_json::to_string(&mark).unwrap(); + let parsed: Mark = serde_json::from_str(&json1).unwrap(); + let json2 = serde_json::to_string(&parsed).unwrap(); + + let val1: serde_json::Value = serde_json::from_str(&json1).unwrap(); + let val2: serde_json::Value = serde_json::from_str(&json2).unwrap(); + assert_eq!(val1, val2); +} diff --git a/cdx-swift-bridge/src/content.rs b/cdx-swift-bridge/src/content.rs index 829a53a..9881b87 100644 --- a/cdx-swift-bridge/src/content.rs +++ b/cdx-swift-bridge/src/content.rs @@ -358,8 +358,10 @@ impl From<&cdx_core::content::Mark> for CdxTextMark { number: *number, id: id.clone(), }, - cdx_core::content::Mark::Math { format: _, value } => CdxTextMark::Math { - value: value.clone(), + cdx_core::content::Mark::Math { + format: _, source, .. + } => CdxTextMark::Math { + value: source.clone(), display: false, }, cdx_core::content::Mark::Extension(_) => { diff --git a/cdx-swift-bridge/src/document.rs b/cdx-swift-bridge/src/document.rs index 30af8db..0c989a4 100644 --- a/cdx-swift-bridge/src/document.rs +++ b/cdx-swift-bridge/src/document.rs @@ -624,7 +624,7 @@ pub fn convert_text_to_core(span: &CdxTextSpan) -> cdx_core::content::Text { }, CdxTextMark::Math { value, display: _ } => cdx_core::content::Mark::Math { format: cdx_core::content::MathFormat::Latex, - value: value.clone(), + source: value.clone(), }, }) .collect();