diff --git a/packages/docs-builder/src/parse.spec.ts b/packages/docs-builder/src/parse.spec.ts
new file mode 100644
index 0000000..dcb1980
--- /dev/null
+++ b/packages/docs-builder/src/parse.spec.ts
@@ -0,0 +1,278 @@
+// Copyright (c) 2025 Climate Interactive / New Venture Fund. All rights reserved.
+
+import { describe, expect, it } from 'vitest'
+
+import type { Config } from './config'
+import { Context } from './context'
+import { parseMarkdownPageContent } from './parse'
+
+const config: Config = {
+ mode: 'development',
+ baseProjDir: 'xxx',
+ sourceDir: 'xxx',
+ outDir: 'xxx',
+ version: '25.1.0',
+ langs: [],
+ formats: [],
+ template: 'default',
+ author: 'Climate Interactive',
+ logoPath: 'xxx',
+ defs: [],
+ pages: ['page_1.md'],
+ untranslated: [],
+ options: {}
+}
+
+describe('parseMarkdownPageContent', () => {
+ it('should parse valid Markdown content', () => {
+ const mdCommon = `\
+
+
+
+This text was defined in \`common.md\`.
+
+
+This page was generated by [\`docs-builder\`](https://github.com/climateinteractive/docs-builder).
+
+
+
+
+Appendix
+
+
+
+
+Examples
+
+
+Footnotes
+`
+
+ const mdGlossary = `\
+# Glossary
+
+##
+
+**page**: one side of a sheet of paper in a collection of sheets bound together
+`
+
+ const mdPage = `\
+---
+fragments:
+ head: ['example']
+---
+
+# Page 1
+
+
+This is the first [page][glossary_page].
+
+## :section_examples:
+
+
+
+This is a sentence with __bold__ and _italic_ text.
+
+This block has two paragraphs that are captured using a \`begin-def\` / \`end-def\` pair.
+
+
+
+
+this text was defined using "hidden" flag
+
+
+Use \`def[hidden]\` to define some text without making it appear, then "use" it later on the page (like this: _:page_1__examples__hidden_text:_).
+
+:content_placeholder:
+
+
+This sentence refers to a footnote. footnote-ref:fn_example
+
+| person | age |
+|--|--|
+| Alice | 42 |
+| Bob | 99 |
+
+
+\`\`\`js
+// This is a code block
+const one = 1
+const two = 2
+const answer = one + two
+\`\`\`
+
+## :section_footnotes:
+
+footnote:fn_example This is a footnote (only in English for now).
+
+_:github_project:_
+`
+
+ // Parse the common and glossary strings so that the strings are part of the context
+ const enContext = new Context(config, 'en')
+ parseMarkdownPageContent(enContext, 'common.md', mdCommon)
+ parseMarkdownPageContent(enContext, 'glossary.md', mdGlossary)
+
+ // Verify that an error is thrown if the English content contains invalid link syntax
+ const parsed = parseMarkdownPageContent(enContext, 'page_1.md', mdPage)
+ expect(parsed.frontmatter).toEqual({
+ fragments: {
+ head: ['example']
+ }
+ })
+ expect(parsed.raw).toEqual(`
+# Page 1🔗
+
+This is the first [page](glossary:page).
+
+## Examples🔗
+
+This is a sentence with **bold** and _italic_ text.
+
+This block has two paragraphs that are captured using a \`begin-def\` / \`end-def\` pair.
+
+
+
+Use \`def[hidden]\` to define some text without making it appear, then "use" it later on the page (like this: _this text was defined using "hidden" flag_).
+
+This text was defined in \`common.md\`.
+
+This sentence refers to a footnote. footnote-ref:fn_example
+
+| person | age |
+|--|--|
+| Alice | 42 |
+| Bob | 99 |
+
+\`\`\`js
+// This is a code block
+const one = 1
+const two = 2
+const answer = one + two
+\`\`\`
+
+## Footnotes🔗
+
+footnote:fn_example This is a footnote (only in English for now).
+
+_This page was generated by [\`docs-builder\`](https://github.com/climateinteractive/docs-builder)._
+
+
+`)
+ })
+
+ it('should throw an error if an unknown command is used', () => {
+ const md = `\
+
+`
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
+ `Unknown command 'somecommand' (page=page_1.md)`
+ )
+ })
+
+ it('should throw an error if a command is used with an invalid identifier', () => {
+ const md = `\
+
+Hello
+`
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
+ `Identifier (key_with_INVALID_chars) must contain only lowercase letters, digits, and underscores (page=page_1.md)`
+ )
+ })
+
+ it('should throw an error if a begin-def command is not closed', () => {
+ const md = `\
+
+
+Hello
+`
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
+ `Command 'begin-def' was not resolved or closed (page=page_1.md)`
+ )
+ })
+
+ it('should throw an error if an unexpected command is used before a begin-def is closed', () => {
+ const md = `\
+
+
+Hello
+
+
+`
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
+ `Unexpected command 'def' while current command 'begin-def' (id=example_1) is in effect (page=page_1.md)`
+ )
+ })
+
+ it('should throw an error if a begin-def / end-def pair does not contain any text', () => {
+ const md = `\
+
+
+
+`
+
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
+ `Saw 'end-def' but no tokens were included (page=page_1.md)`
+ )
+ })
+
+ it('should throw an error if an end-def command is used without a corresponding begin-def command', () => {
+ const md = `\
+Hello
+
+
+`
+
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
+ `Saw 'end-def' without corresponding 'begin-def' (page=page_1.md)`
+ )
+ })
+
+ it('should throw an error if text replacement syntax is used with an unknown string identifier', () => {
+ const md = `\
+Hello
+
+:some_id:
+`
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
+ 'Unknown replacement for id=some_id (page=page_1.md)'
+ )
+ })
+
+ it('should throw an error if two spaces are detected at the end of a line', () => {
+ const spaces = ' '.repeat(2)
+ const md = `\
+# Section 1
+
+Hello${spaces}
+there
+`
+
+ let expectedMsg = ''
+ expectedMsg += 'Detected two or more spaces at the end of a text line. '
+ expectedMsg += 'Markdown interprets this as a line break, which can be surprising. '
+ expectedMsg += 'If the spaces were added unintentionally, remove the extra spaces. '
+ expectedMsg += `If you do want a line break, use an explicit HTML 'br' tag instead. `
+ expectedMsg += '(page=page_1.md scope=section_1)'
+
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(expectedMsg)
+ })
+
+ it('should throw an error if a reference-style link cannot be resolved', () => {
+ const md = `\
+[This is a link][unknown_ref]
+`
+ const enContext = new Context(config, 'en')
+ expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
+ 'Unresolved reference-style link found for lang=en link=[This is a link][unknown_ref]'
+ )
+ })
+})
diff --git a/packages/docs-builder/src/parse.ts b/packages/docs-builder/src/parse.ts
index 3770950..1cddb18 100644
--- a/packages/docs-builder/src/parse.ts
+++ b/packages/docs-builder/src/parse.ts
@@ -38,23 +38,41 @@ type Links = {
* @return The translated Markdown content.
*/
export function parseMarkdownPage(context: Context, relPath: string): MarkdownPage {
+ // Read the Markdown file
+ const filePath = resolve(context.config.baseProjDir, relPath)
+ const origMarkdownWithFrontmatter = readTextFile(filePath)
+
+ // Parse the Markdown content
+ return parseMarkdownPageContent(context, relPath, origMarkdownWithFrontmatter)
+}
+
+/**
+ * Parse the given base (English) Markdown page and translate it into the language
+ * associated with the context.
+ *
+ * @param context The language-specific context.
+ * @param relPath The path to the Markdown file, relative to the base project directory.
+ * @param origMarkdownWithFrontmatter The Markdown content with optional frontmatter.
+ * @return The translated Markdown content.
+ */
+export function parseMarkdownPageContent(
+ context: Context,
+ relPath: string,
+ origMarkdownWithFrontmatter: string
+): MarkdownPage {
// Configure marked.js
marked.setOptions({
headerIds: false
})
- // Set the current page (for error reporting)
- context.setCurrentPage(relPath)
-
- // Read the Markdown file
- const filePath = resolve(context.config.baseProjDir, relPath)
- const origMarkdownWithFrontmatter = readTextFile(filePath)
-
// Separate frontmatter from the content
const origMarkdownSeparated = matter(origMarkdownWithFrontmatter)
const origMarkdown = origMarkdownSeparated.content
const frontmatter = origMarkdownSeparated.data
+ // Set the current page (for error reporting)
+ context.setCurrentPage(relPath)
+
// Append synthesized link info for glossary references so that source files can
// use `[link text][glossary_term]` without manually defining a reference for
// "glossary_term"; it will be converted to `[...](glossary:term)` automatically
@@ -445,7 +463,7 @@ function processToken(context: Context, state: ProcessState, token: marked.Token
let msg = 'Detected two or more spaces at the end of a text line.'
msg += ' Markdown interprets this as a line break, which can be surprising.'
msg += ' If the spaces were added unintentionally, remove the extra spaces.'
- msg += ' If you do want a line break, use an explicit HTML `
` tag instead.'
+ msg += ` If you do want a line break, use an explicit HTML 'br' tag instead.`
throw new Error(context.getScopedMessage(msg))
}
default: