diff --git a/packages/docs-builder/src/parse.spec.ts b/packages/docs-builder/src/parse.spec.ts new file mode 100644 index 0000000..dcb1980 --- /dev/null +++ b/packages/docs-builder/src/parse.spec.ts @@ -0,0 +1,278 @@ +// Copyright (c) 2025 Climate Interactive / New Venture Fund. All rights reserved. + +import { describe, expect, it } from 'vitest' + +import type { Config } from './config' +import { Context } from './context' +import { parseMarkdownPageContent } from './parse' + +const config: Config = { + mode: 'development', + baseProjDir: 'xxx', + sourceDir: 'xxx', + outDir: 'xxx', + version: '25.1.0', + langs: [], + formats: [], + template: 'default', + author: 'Climate Interactive', + logoPath: 'xxx', + defs: [], + pages: ['page_1.md'], + untranslated: [], + options: {} +} + +describe('parseMarkdownPageContent', () => { + it('should parse valid Markdown content', () => { + const mdCommon = `\ + + + +This text was defined in \`common.md\`. + + +This page was generated by [\`docs-builder\`](https://github.com/climateinteractive/docs-builder). + + + + +Appendix + + + + +Examples + + +Footnotes +` + + const mdGlossary = `\ +# Glossary + +## + +**page**: one side of a sheet of paper in a collection of sheets bound together +` + + const mdPage = `\ +--- +fragments: + head: ['example'] +--- + +# Page 1 + + +This is the first [page][glossary_page]. + +## :section_examples: + + + +This is a sentence with __bold__ and _italic_ text. + +This block has two paragraphs that are captured using a \`begin-def\` / \`end-def\` pair. + + + + +this text was defined using "hidden" flag + + +Use \`def[hidden]\` to define some text without making it appear, then "use" it later on the page (like this: _:page_1__examples__hidden_text:_). + +:content_placeholder: + + +This sentence refers to a footnote. footnote-ref:fn_example + +| person | age | +|--|--| +| Alice | 42 | +| Bob | 99 | + + +\`\`\`js +// This is a code block +const one = 1 +const two = 2 +const answer = one + two +\`\`\` + +## :section_footnotes: + +footnote:fn_example This is a footnote (only in English for now). + +_:github_project:_ +` + + // Parse the common and glossary strings so that the strings are part of the context + const enContext = new Context(config, 'en') + parseMarkdownPageContent(enContext, 'common.md', mdCommon) + parseMarkdownPageContent(enContext, 'glossary.md', mdGlossary) + + // Verify that an error is thrown if the English content contains invalid link syntax + const parsed = parseMarkdownPageContent(enContext, 'page_1.md', mdPage) + expect(parsed.frontmatter).toEqual({ + fragments: { + head: ['example'] + } + }) + expect(parsed.raw).toEqual(` +# Page 1🔗 + +This is the first [page](glossary:page). + +## Examples🔗 + +This is a sentence with **bold** and _italic_ text. + +This block has two paragraphs that are captured using a \`begin-def\` / \`end-def\` pair. + + + +Use \`def[hidden]\` to define some text without making it appear, then "use" it later on the page (like this: _this text was defined using "hidden" flag_). + +This text was defined in \`common.md\`. + +This sentence refers to a footnote. footnote-ref:fn_example + +| person | age | +|--|--| +| Alice | 42 | +| Bob | 99 | + +\`\`\`js +// This is a code block +const one = 1 +const two = 2 +const answer = one + two +\`\`\` + +## Footnotes🔗 + +footnote:fn_example This is a footnote (only in English for now). + +_This page was generated by [\`docs-builder\`](https://github.com/climateinteractive/docs-builder)._ + + +`) + }) + + it('should throw an error if an unknown command is used', () => { + const md = `\ + +` + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow( + `Unknown command 'somecommand' (page=page_1.md)` + ) + }) + + it('should throw an error if a command is used with an invalid identifier', () => { + const md = `\ + +Hello +` + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow( + `Identifier (key_with_INVALID_chars) must contain only lowercase letters, digits, and underscores (page=page_1.md)` + ) + }) + + it('should throw an error if a begin-def command is not closed', () => { + const md = `\ + + +Hello +` + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow( + `Command 'begin-def' was not resolved or closed (page=page_1.md)` + ) + }) + + it('should throw an error if an unexpected command is used before a begin-def is closed', () => { + const md = `\ + + +Hello + + +` + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow( + `Unexpected command 'def' while current command 'begin-def' (id=example_1) is in effect (page=page_1.md)` + ) + }) + + it('should throw an error if a begin-def / end-def pair does not contain any text', () => { + const md = `\ + + + +` + + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow( + `Saw 'end-def' but no tokens were included (page=page_1.md)` + ) + }) + + it('should throw an error if an end-def command is used without a corresponding begin-def command', () => { + const md = `\ +Hello + + +` + + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow( + `Saw 'end-def' without corresponding 'begin-def' (page=page_1.md)` + ) + }) + + it('should throw an error if text replacement syntax is used with an unknown string identifier', () => { + const md = `\ +Hello + +:some_id: +` + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow( + 'Unknown replacement for id=some_id (page=page_1.md)' + ) + }) + + it('should throw an error if two spaces are detected at the end of a line', () => { + const spaces = ' '.repeat(2) + const md = `\ +# Section 1 + +Hello${spaces} +there +` + + let expectedMsg = '' + expectedMsg += 'Detected two or more spaces at the end of a text line. ' + expectedMsg += 'Markdown interprets this as a line break, which can be surprising. ' + expectedMsg += 'If the spaces were added unintentionally, remove the extra spaces. ' + expectedMsg += `If you do want a line break, use an explicit HTML 'br' tag instead. ` + expectedMsg += '(page=page_1.md scope=section_1)' + + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(expectedMsg) + }) + + it('should throw an error if a reference-style link cannot be resolved', () => { + const md = `\ +[This is a link][unknown_ref] +` + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow( + 'Unresolved reference-style link found for lang=en link=[This is a link][unknown_ref]' + ) + }) +}) diff --git a/packages/docs-builder/src/parse.ts b/packages/docs-builder/src/parse.ts index 3770950..1cddb18 100644 --- a/packages/docs-builder/src/parse.ts +++ b/packages/docs-builder/src/parse.ts @@ -38,23 +38,41 @@ type Links = { * @return The translated Markdown content. */ export function parseMarkdownPage(context: Context, relPath: string): MarkdownPage { + // Read the Markdown file + const filePath = resolve(context.config.baseProjDir, relPath) + const origMarkdownWithFrontmatter = readTextFile(filePath) + + // Parse the Markdown content + return parseMarkdownPageContent(context, relPath, origMarkdownWithFrontmatter) +} + +/** + * Parse the given base (English) Markdown page and translate it into the language + * associated with the context. + * + * @param context The language-specific context. + * @param relPath The path to the Markdown file, relative to the base project directory. + * @param origMarkdownWithFrontmatter The Markdown content with optional frontmatter. + * @return The translated Markdown content. + */ +export function parseMarkdownPageContent( + context: Context, + relPath: string, + origMarkdownWithFrontmatter: string +): MarkdownPage { // Configure marked.js marked.setOptions({ headerIds: false }) - // Set the current page (for error reporting) - context.setCurrentPage(relPath) - - // Read the Markdown file - const filePath = resolve(context.config.baseProjDir, relPath) - const origMarkdownWithFrontmatter = readTextFile(filePath) - // Separate frontmatter from the content const origMarkdownSeparated = matter(origMarkdownWithFrontmatter) const origMarkdown = origMarkdownSeparated.content const frontmatter = origMarkdownSeparated.data + // Set the current page (for error reporting) + context.setCurrentPage(relPath) + // Append synthesized link info for glossary references so that source files can // use `[link text][glossary_term]` without manually defining a reference for // "glossary_term"; it will be converted to `[...](glossary:term)` automatically @@ -445,7 +463,7 @@ function processToken(context: Context, state: ProcessState, token: marked.Token let msg = 'Detected two or more spaces at the end of a text line.' msg += ' Markdown interprets this as a line break, which can be surprising.' msg += ' If the spaces were added unintentionally, remove the extra spaces.' - msg += ' If you do want a line break, use an explicit HTML `
` tag instead.' + msg += ` If you do want a line break, use an explicit HTML 'br' tag instead.` throw new Error(context.getScopedMessage(msg)) } default: