Skip to content

Commit 28196b4

Browse files
test: add tests for parsing phase
1 parent 6137402 commit 28196b4

2 files changed

Lines changed: 304 additions & 8 deletions

File tree

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
// Copyright (c) 2025 Climate Interactive / New Venture Fund. All rights reserved.
2+
3+
import { describe, expect, it } from 'vitest'
4+
5+
import type { Config } from './config'
6+
import { Context } from './context'
7+
import { parseMarkdownPageContent } from './parse'
8+
9+
const config: Config = {
10+
mode: 'development',
11+
baseProjDir: 'xxx',
12+
sourceDir: 'xxx',
13+
outDir: 'xxx',
14+
version: '25.1.0',
15+
langs: [],
16+
formats: [],
17+
template: 'default',
18+
author: 'Climate Interactive',
19+
logoPath: 'xxx',
20+
defs: [],
21+
pages: ['page_1.md'],
22+
untranslated: [],
23+
options: {}
24+
}
25+
26+
describe('parseMarkdownPageContent', () => {
27+
it('should parse valid Markdown content', () => {
28+
const mdCommon = `\
29+
<!-- Common strings -->
30+
31+
<!-- def:content_placeholder -->
32+
This text was defined in \`common.md\`.
33+
34+
<!-- def:github_project -->
35+
This page was generated by [\`docs-builder\`](https://github.com/climateinteractive/docs-builder).
36+
37+
<!-- Titles for untranslated pages -->
38+
39+
<!-- def:appendix__title -->
40+
Appendix
41+
42+
<!-- Common section headers -->
43+
44+
<!-- def:section_examples -->
45+
Examples
46+
47+
<!-- def:section_footnotes -->
48+
Footnotes
49+
`
50+
51+
const mdGlossary = `\
52+
# <!-- section:glossary --><!-- def:title -->Glossary
53+
54+
## <!-- section:page -->
55+
<!-- def:def -->
56+
**page**: one side of a sheet of paper in a collection of sheets bound together
57+
`
58+
59+
const mdPage = `\
60+
---
61+
fragments:
62+
head: ['example']
63+
---
64+
65+
# <!-- section:page_1 --><!-- def:title -->Page 1
66+
67+
<!-- def:intro -->
68+
This is the first [page][glossary_page].
69+
70+
## <!-- section:examples -->:section_examples:
71+
72+
<!-- begin-def:example_1 -->
73+
74+
This is a sentence with __bold__ and _italic_ text.
75+
76+
This block has two paragraphs that are captured using a \`begin-def\` / \`end-def\` pair.
77+
78+
<!-- end-def -->
79+
80+
<!-- def[hidden]:hidden_text -->
81+
this text was defined using "hidden" flag
82+
83+
<!-- def:example_2 -->
84+
Use \`def[hidden]\` to define some text without making it appear, then "use" it later on the page (like this: _:page_1__examples__hidden_text:_).
85+
86+
:content_placeholder:
87+
88+
<!-- def:example_3 -->
89+
This sentence refers to a footnote. footnote-ref:fn_example
90+
91+
| <!-- def:header_person-->person | <!-- def:header_age -->age |
92+
|--|--|
93+
| Alice | 42 |
94+
| Bob | 99 |
95+
96+
97+
\`\`\`js
98+
// This is a code block
99+
const one = 1
100+
const two = 2
101+
const answer = one + two
102+
\`\`\`
103+
104+
## <!-- section:footnotes -->:section_footnotes:
105+
106+
footnote:fn_example This is a footnote (only in English for now).
107+
108+
_:github_project:_
109+
`
110+
111+
// Parse the common and glossary strings so that the strings are part of the context
112+
const enContext = new Context(config, 'en')
113+
parseMarkdownPageContent(enContext, 'common.md', mdCommon)
114+
parseMarkdownPageContent(enContext, 'glossary.md', mdGlossary)
115+
116+
// Verify that an error is thrown if the English content contains invalid link syntax
117+
const parsed = parseMarkdownPageContent(enContext, 'page_1.md', mdPage)
118+
expect(parsed.frontmatter).toEqual({
119+
fragments: {
120+
head: ['example']
121+
}
122+
})
123+
expect(parsed.raw).toEqual(`
124+
# <a name="page_1"></a>Page 1<a class="heading-link" href="#page_1">&#128279;</a>
125+
126+
This is the first [page](glossary:page).
127+
128+
## <a name="page_1__examples"></a>Examples<a class="heading-link" href="#page_1__examples">&#128279;</a>
129+
130+
This is a sentence with **bold** and _italic_ text.
131+
132+
This block has two paragraphs that are captured using a \`begin-def\` / \`end-def\` pair.
133+
134+
135+
136+
Use \`def[hidden]\` to define some text without making it appear, then &quot;use&quot; it later on the page (like this: _this text was defined using "hidden" flag_).
137+
138+
This text was defined in \`common.md\`.
139+
140+
This sentence refers to a footnote. footnote-ref:fn_example
141+
142+
| person | age |
143+
|--|--|
144+
| Alice | 42 |
145+
| Bob | 99 |
146+
147+
\`\`\`js
148+
// This is a code block
149+
const one = 1
150+
const two = 2
151+
const answer = one + two
152+
\`\`\`
153+
154+
## <a name="page_1__footnotes"></a>Footnotes<a class="heading-link" href="#page_1__footnotes">&#128279;</a>
155+
156+
footnote:fn_example This is a footnote (only in English for now).
157+
158+
_This page was generated by [\`docs-builder\`](https://github.com/climateinteractive/docs-builder)._
159+
160+
161+
`)
162+
})
163+
164+
it('should throw an error if an unknown command is used', () => {
165+
const md = `\
166+
<!-- somecommand:key -->
167+
`
168+
const enContext = new Context(config, 'en')
169+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
170+
`Unknown command 'somecommand' (page=page_1.md)`
171+
)
172+
})
173+
174+
it('should throw an error if a command is used with an invalid identifier', () => {
175+
const md = `\
176+
<!-- def:key_with_INVALID_chars -->
177+
Hello
178+
`
179+
const enContext = new Context(config, 'en')
180+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
181+
`Identifier (key_with_INVALID_chars) must contain only lowercase letters, digits, and underscores (page=page_1.md)`
182+
)
183+
})
184+
185+
it('should throw an error if a begin-def command is not closed', () => {
186+
const md = `\
187+
<!-- begin-def:example_1 -->
188+
189+
Hello
190+
`
191+
const enContext = new Context(config, 'en')
192+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
193+
`Command 'begin-def' was not resolved or closed (page=page_1.md)`
194+
)
195+
})
196+
197+
it('should throw an error if an unexpected command is used before a begin-def is closed', () => {
198+
const md = `\
199+
<!-- begin-def:example_1 -->
200+
201+
Hello
202+
203+
<!-- def:example_2 -->
204+
`
205+
const enContext = new Context(config, 'en')
206+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
207+
`Unexpected command 'def' while current command 'begin-def' (id=example_1) is in effect (page=page_1.md)`
208+
)
209+
})
210+
211+
it('should throw an error if a begin-def / end-def pair does not contain any text', () => {
212+
const md = `\
213+
<!-- begin-def:example_1 -->
214+
215+
<!-- end-def -->
216+
`
217+
218+
const enContext = new Context(config, 'en')
219+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
220+
`Saw 'end-def' but no tokens were included (page=page_1.md)`
221+
)
222+
})
223+
224+
it('should throw an error if an end-def command is used without a corresponding begin-def command', () => {
225+
const md = `\
226+
Hello
227+
228+
<!-- end-def -->
229+
`
230+
231+
const enContext = new Context(config, 'en')
232+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
233+
`Saw 'end-def' without corresponding 'begin-def' (page=page_1.md)`
234+
)
235+
})
236+
237+
it('should throw an error if text replacement syntax is used with an unknown string identifier', () => {
238+
const md = `\
239+
Hello
240+
241+
:some_id:
242+
`
243+
const enContext = new Context(config, 'en')
244+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
245+
'Unknown replacement for id=some_id (page=page_1.md)'
246+
)
247+
})
248+
249+
it('should throw an error if two spaces are detected at the end of a line', () => {
250+
const spaces = ' '.repeat(2)
251+
const md = `\
252+
# <!-- section:section_1 -->Section 1
253+
254+
Hello${spaces}
255+
there
256+
`
257+
258+
let expectedMsg = ''
259+
expectedMsg += 'Detected two or more spaces at the end of a text line. '
260+
expectedMsg += 'Markdown interprets this as a line break, which can be surprising. '
261+
expectedMsg += 'If the spaces were added unintentionally, remove the extra spaces. '
262+
expectedMsg += `If you do want a line break, use an explicit HTML 'br' tag instead. `
263+
expectedMsg += '(page=page_1.md scope=section_1)'
264+
265+
const enContext = new Context(config, 'en')
266+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(expectedMsg)
267+
})
268+
269+
it('should throw an error if a reference-style link cannot be resolved', () => {
270+
const md = `\
271+
[This is a link][unknown_ref]
272+
`
273+
const enContext = new Context(config, 'en')
274+
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(
275+
'Unresolved reference-style link found for lang=en link=[This is a link][unknown_ref]'
276+
)
277+
})
278+
})

packages/docs-builder/src/parse.ts

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,23 +38,41 @@ type Links = {
3838
* @return The translated Markdown content.
3939
*/
4040
export function parseMarkdownPage(context: Context, relPath: string): MarkdownPage {
41+
// Read the Markdown file
42+
const filePath = resolve(context.config.baseProjDir, relPath)
43+
const origMarkdownWithFrontmatter = readTextFile(filePath)
44+
45+
// Parse the Markdown content
46+
return parseMarkdownPageContent(context, relPath, origMarkdownWithFrontmatter)
47+
}
48+
49+
/**
50+
* Parse the given base (English) Markdown page and translate it into the language
51+
* associated with the context.
52+
*
53+
* @param context The language-specific context.
54+
* @param relPath The path to the Markdown file, relative to the base project directory.
55+
* @param origMarkdownWithFrontmatter The Markdown content with optional frontmatter.
56+
* @return The translated Markdown content.
57+
*/
58+
export function parseMarkdownPageContent(
59+
context: Context,
60+
relPath: string,
61+
origMarkdownWithFrontmatter: string
62+
): MarkdownPage {
4163
// Configure marked.js
4264
marked.setOptions({
4365
headerIds: false
4466
})
4567

46-
// Set the current page (for error reporting)
47-
context.setCurrentPage(relPath)
48-
49-
// Read the Markdown file
50-
const filePath = resolve(context.config.baseProjDir, relPath)
51-
const origMarkdownWithFrontmatter = readTextFile(filePath)
52-
5368
// Separate frontmatter from the content
5469
const origMarkdownSeparated = matter(origMarkdownWithFrontmatter)
5570
const origMarkdown = origMarkdownSeparated.content
5671
const frontmatter = origMarkdownSeparated.data
5772

73+
// Set the current page (for error reporting)
74+
context.setCurrentPage(relPath)
75+
5876
// Append synthesized link info for glossary references so that source files can
5977
// use `[link text][glossary_term]` without manually defining a reference for
6078
// "glossary_term"; it will be converted to `[...](glossary:term)` automatically
@@ -445,7 +463,7 @@ function processToken(context: Context, state: ProcessState, token: marked.Token
445463
let msg = 'Detected two or more spaces at the end of a text line.'
446464
msg += ' Markdown interprets this as a line break, which can be surprising.'
447465
msg += ' If the spaces were added unintentionally, remove the extra spaces.'
448-
msg += ' If you do want a line break, use an explicit HTML `<br/>` tag instead.'
466+
msg += ` If you do want a line break, use an explicit HTML 'br' tag instead.`
449467
throw new Error(context.getScopedMessage(msg))
450468
}
451469
default:

0 commit comments

Comments
 (0)