Skip to content

Commit 63cf56d

Browse files
fix: move invalid syntax detection to gen HTML phase
1 parent bfae031 commit 63cf56d

4 files changed

Lines changed: 132 additions & 65 deletions

File tree

packages/docs-builder/src/gen-html.spec.ts

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,108 @@
22

33
import { describe, expect, it } from 'vitest'
44

5-
import { convertMarkdownToHtml, subscriptify } from './gen-html'
5+
import type { Config } from './config'
6+
import { Context } from './context'
7+
import { convertMarkdownToHtml, generateHtml, subscriptify } from './gen-html'
8+
import { parseMarkdownPageContent } from './parse'
9+
10+
const config: Config = {
11+
mode: 'development',
12+
baseProjDir: 'xxx',
13+
sourceDir: 'xxx',
14+
outDir: 'xxx',
15+
version: '25.1.0',
16+
langs: [{ code: 'de', version: '25.1.0' }],
17+
formats: [],
18+
template: 'default',
19+
author: 'Climate Interactive',
20+
logoPath: 'xxx',
21+
defs: [],
22+
pages: ['page_1.md'],
23+
untranslated: [],
24+
options: {}
25+
}
26+
27+
describe('generateHtml', () => {
28+
it('should convert valid Markdown', () => {
29+
const md = `\
30+
This is a valid normal link: [page](https://climateinteractive.org)
31+
32+
This is a valid reference-style link: [page][ref]
33+
34+
This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text
35+
36+
This is a valid reference-style link: [page][ref] (with parentheses after) and more text
37+
38+
[ref]: https://climateinteractive.org
39+
`
40+
41+
const html = generateHtml(new Context(config, 'en'), 'page_1.md', { raw: md })
42+
expect(html.baseName).toBe('page_1')
43+
expect(html.relPath).toBe('page_1.html')
44+
expect(html.body).toBe(`\
45+
<p>This is a valid normal link: <a href="https://climateinteractive.org">page</a></p>
46+
<p>This is a valid reference-style link: <a href="https://climateinteractive.org">page</a></p>
47+
<p>This is a valid normal link: <a href="https://climateinteractive.org">page</a> (with parentheses after) and more text</p>
48+
<p>This is a valid reference-style link: <a href="https://climateinteractive.org">page</a> (with parentheses after) and more text</p>
49+
`)
50+
})
51+
52+
it.only('should throw an error if invalid link syntax is detected', () => {
53+
const links = `\
54+
This is a valid normal link: [page](https://climateinteractive.org)
55+
56+
This is a valid reference-style link: [page][ref]
57+
58+
This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text
59+
60+
This is a valid reference-style link: [page][ref] (with parentheses after) and more text
61+
62+
This is an invalid normal link: [page] (https://climateinteractive.org) (with parentheses after) and more text
63+
64+
This is an invalid reference-style link: [page] [ref] (with parentheses after) and more text
65+
`
66+
67+
const md = `\
68+
# <!-- section:section_1 -->Section 1
69+
70+
<!-- begin-def:block_1 -->
71+
72+
${links}
73+
74+
<!-- end-def -->
75+
76+
[ref]: https://climateinteractive.org
77+
`
78+
79+
// Verify that an error is thrown if the English content contains invalid link syntax.
80+
// Note that in the English case, the invalid ref link will be converted to an HTML link.
81+
const enContext = new Context(config, 'en')
82+
const enMd = parseMarkdownPageContent(enContext, 'page_1.md', md)
83+
expect(() => generateHtml(enContext, 'page_1.md', { raw: enMd.raw })).toThrow(`\
84+
Detected invalid Markdown link syntax in the generated HTML:
85+
[page] (&lt;a href
86+
[page] &lt;a href
87+
To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (page=page_1.md)`)
88+
89+
// Verify that an error is thrown if the translated content contains invalid link syntax.
90+
// Note that in the non-English case, the invalid ref link target will not be converted
91+
// to an HTML link (unlike the English case above), so the error message will be different.
92+
const deContext = enContext.derive(
93+
'de',
94+
new Map([
95+
['section_1__title', 'Section 1'],
96+
['section_1__block_1', links]
97+
])
98+
)
99+
const deMd = parseMarkdownPageContent(deContext, 'page_1.md', md)
100+
expect(() => generateHtml(deContext, 'page_1.md', { raw: deMd.raw })).toThrow(`\
101+
Detected invalid Markdown link syntax in the generated HTML:
102+
[page] (&lt;a href
103+
[page] [ref]
104+
To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (lang=de page=page_1.md)`)
105+
})
106+
})
6107

7108
describe('subscriptify', () => {
8109
it('should convert chemical formulas', () => {

packages/docs-builder/src/gen-html.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ export function generateHtml(context: Context, mdRelPath: string, mdPage: Markdo
144144
// Convert the Markdown content to HTML
145145
const body = convertMarkdownToHtml(context, md)
146146

147+
// Check for evidence of invalid Markdown link syntax that remains in the generated HTML
148+
checkForInvalidLinkSyntax(context, body)
149+
147150
// Save the names of the `<head>` fragments to include
148151
const headFragments = mdPage.frontmatter?.fragments?.head || []
149152

@@ -617,3 +620,30 @@ export function subscriptify(s: string): string {
617620
return subscriptMap.get(m1)
618621
})
619622
}
623+
624+
// This will match cases where a space in the Markdown link syntax caused the link parts
625+
// to be converted to separate elements in the HTML output, for example:
626+
// Markdown: [text] (https://example.com)
627+
// HTML: [text] (<a href="https://example.com">https://example.com</a>)
628+
// Markdown: [text] [ref]
629+
// HTML (en): [text] <a href="https://climateinteractive.org">ref</a>
630+
// HTML (xx): [text] [ref]
631+
// Note that the generated HTML in the second example is different for the English and
632+
// non-English cases (due to different parsing code paths), so we need to detect both.
633+
const invalidLinkRegExp = /\[([^\]]+)\]\s+(\(?<a\s\w+|\[([^\]]+)\])/g
634+
635+
/**
636+
* Throw an error if the given HTML text contains evidence ofinvalid Markdown link syntax.
637+
*/
638+
function checkForInvalidLinkSyntax(context: Context, md: string): void {
639+
const matches = md.match(invalidLinkRegExp)
640+
if (matches) {
641+
let msg = 'Detected invalid Markdown link syntax in the generated HTML:\n'
642+
for (const match of matches) {
643+
msg += `${match.replace('<', '&lt;')}\n`
644+
}
645+
msg +=
646+
'To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref]'
647+
throw new Error(context.getScopedMessage(msg))
648+
}
649+
}

packages/docs-builder/src/parse.spec.ts

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -275,49 +275,4 @@ there
275275
'Unresolved reference-style link found for lang=en link=[This is a link][unknown_ref]'
276276
)
277277
})
278-
279-
it('should throw an error if invalid link syntax is detected', () => {
280-
const links = `\
281-
This is a valid normal link: [page](https://climateinteractive.org)
282-
283-
This is a valid reference-style link: [page][ref]
284-
285-
This is an invalid normal link: [page] (https://climateinteractive.org)
286-
287-
This is an invalid reference-style link: [page] [ref]
288-
`
289-
290-
const md = `\
291-
# <!-- section:section_1 -->Section 1
292-
293-
<!-- begin-def:block_1 -->
294-
295-
${links}
296-
297-
<!-- end-def -->
298-
299-
[ref]: https://climateinteractive.org
300-
`
301-
302-
function expectedMsg(lang: string) {
303-
const langPart = lang === 'en' ? '' : `lang=${lang} `
304-
return `\
305-
Detected invalid link syntax:
306-
[page] (https://climateinteractive.org)
307-
[page] [ref]
308-
To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (${langPart}page=page_1.md scope=section_1)`
309-
}
310-
311-
// Verify that an error is thrown if the English content contains invalid link syntax
312-
const enContext = new Context(config, 'en')
313-
expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(expectedMsg('en'))
314-
315-
// Verify that an error is thrown if the translated content contains invalid link syntax
316-
const deBlocks = new Map([
317-
['section_1__title', 'Section 1'],
318-
['section_1__block_1', links]
319-
])
320-
const deContext = new Context(config, 'de', undefined, deBlocks)
321-
expect(() => parseMarkdownPageContent(deContext, 'page_1.md', md)).toThrow(expectedMsg('de'))
322-
})
323278
})

packages/docs-builder/src/parse.ts

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,6 @@ function addBlockForTokens(
517517
sep = '\n\n'
518518
}
519519
const blockText = blockParts.join(sep)
520-
checkForInvalidLinkSyntax(context, blockText)
521520
context.addBlock(localBlockId, blockText)
522521
return tokens
523522
} else {
@@ -526,7 +525,6 @@ function addBlockForTokens(
526525
const blockText = context.getTranslatedBlockText(fullBlockId)
527526
if (blockText) {
528527
// Parse the translated tokens and insert them
529-
checkForInvalidLinkSyntax(context, blockText)
530528
// XXX: If there is more than one token (even if some are whitespace only), we
531529
// need to include extra newlines after the last block, otherwise marked will
532530
// not parse the text correctly
@@ -541,23 +539,6 @@ function addBlockForTokens(
541539
}
542540
}
543541

544-
/**
545-
* Throw an error if the given text contains invalid link syntax.
546-
*/
547-
function checkForInvalidLinkSyntax(context: Context, md: string): void {
548-
const invalidLinkRegExp = /\[([^\]]+)\]\s+\(([^)]+)\)|\[([^\]]+)\]\s+\[([^\]]+)\]/g
549-
const matches = md.match(invalidLinkRegExp)
550-
if (matches) {
551-
let msg = 'Detected invalid link syntax:\n'
552-
for (const match of matches) {
553-
msg += `${match}\n`
554-
}
555-
msg +=
556-
'To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref]'
557-
throw new Error(context.getScopedMessage(msg))
558-
}
559-
}
560-
561542
/**
562543
* Log a warning if the given text token is on a translatable page but is
563544
* not included in a `def` or `begin/end-def` pair.

0 commit comments

Comments
 (0)