Skip to content

Commit 806e1cf

Browse files
fix: throw error if invalid link syntax is detected (#93)
Fixes #91
1 parent 3d25e27 commit 806e1cf

3 files changed

Lines changed: 136 additions & 8 deletions

File tree

packages/docs-builder/src/gen-html.spec.ts

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,108 @@
22

33
import { describe, expect, it } from 'vitest'
44

5-
import { convertMarkdownToHtml, subscriptify } from './gen-html'
5+
import type { Config } from './config'
6+
import { Context } from './context'
7+
import { convertMarkdownToHtml, generateHtml, subscriptify } from './gen-html'
8+
import { parseMarkdownPageContent } from './parse'
9+
10+
const config: Config = {
11+
mode: 'development',
12+
baseProjDir: 'xxx',
13+
sourceDir: 'xxx',
14+
outDir: 'xxx',
15+
version: '25.1.0',
16+
langs: [{ code: 'de', version: '25.1.0' }],
17+
formats: [],
18+
template: 'default',
19+
author: 'Climate Interactive',
20+
logoPath: 'xxx',
21+
defs: [],
22+
pages: ['page_1.md'],
23+
untranslated: [],
24+
options: {}
25+
}
26+
27+
describe('generateHtml', () => {
28+
it('should convert valid Markdown', () => {
29+
const md = `\
30+
This is a valid normal link: [page](https://climateinteractive.org)
31+
32+
This is a valid reference-style link: [page][ref]
33+
34+
This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text
35+
36+
This is a valid reference-style link: [page][ref] (with parentheses after) and more text
37+
38+
[ref]: https://climateinteractive.org
39+
`
40+
41+
const html = generateHtml(new Context(config, 'en'), 'page_1.md', { raw: md })
42+
expect(html.baseName).toBe('page_1')
43+
expect(html.relPath).toBe('page_1.html')
44+
expect(html.body).toBe(`\
45+
<p>This is a valid normal link: <a href="https://climateinteractive.org">page</a></p>
46+
<p>This is a valid reference-style link: <a href="https://climateinteractive.org">page</a></p>
47+
<p>This is a valid normal link: <a href="https://climateinteractive.org">page</a> (with parentheses after) and more text</p>
48+
<p>This is a valid reference-style link: <a href="https://climateinteractive.org">page</a> (with parentheses after) and more text</p>
49+
`)
50+
})
51+
52+
it('should throw an error if invalid link syntax is detected', () => {
53+
const links = `\
54+
This is a valid normal link: [page](https://climateinteractive.org)
55+
56+
This is a valid reference-style link: [page][ref]
57+
58+
This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text
59+
60+
This is a valid reference-style link: [page][ref] (with parentheses after) and more text
61+
62+
This is an invalid normal link: [page] (https://climateinteractive.org) (with parentheses after) and more text
63+
64+
This is an invalid reference-style link: [page] [ref] (with parentheses after) and more text
65+
`
66+
67+
const md = `\
68+
# <!-- section:section_1 -->Section 1
69+
70+
<!-- begin-def:block_1 -->
71+
72+
${links}
73+
74+
<!-- end-def -->
75+
76+
[ref]: https://climateinteractive.org
77+
`
78+
79+
// Verify that an error is thrown if the English content contains invalid link syntax.
80+
// Note that in the English case, the invalid ref link will be converted to an HTML link.
81+
const enContext = new Context(config, 'en')
82+
const enMd = parseMarkdownPageContent(enContext, 'page_1.md', md)
83+
expect(() => generateHtml(enContext, 'page_1.md', { raw: enMd.raw })).toThrow(`\
84+
Detected invalid Markdown link syntax in the generated HTML:
85+
[page] (&lt;a href
86+
[page] &lt;a href
87+
To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (page=page_1.md)`)
88+
89+
// Verify that an error is thrown if the translated content contains invalid link syntax.
90+
// Note that in the non-English case, the invalid ref link target will not be converted
91+
// to an HTML link (unlike the English case above), so the error message will be different.
92+
const deContext = enContext.derive(
93+
'de',
94+
new Map([
95+
['section_1__title', 'Section 1'],
96+
['section_1__block_1', links]
97+
])
98+
)
99+
const deMd = parseMarkdownPageContent(deContext, 'page_1.md', md)
100+
expect(() => generateHtml(deContext, 'page_1.md', { raw: deMd.raw })).toThrow(`\
101+
Detected invalid Markdown link syntax in the generated HTML:
102+
[page] (&lt;a href
103+
[page] [ref]
104+
To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (lang=de page=page_1.md)`)
105+
})
106+
})
6107

7108
describe('subscriptify', () => {
8109
it('should convert chemical formulas', () => {
@@ -29,7 +130,7 @@ describe('convertMarkdownToHtml', () => {
29130
'<p>This is -CO<sub>2</sub>-</p>\n'
30131
)
31132
expect(convertMarkdownToHtml(undefined, '# This is CO2')).toBe(
32-
'<h1 id="this-is-co2">This is CO<sub>2</sub></h1>\n'
133+
'<h1>This is CO<sub>2</sub></h1>\n'
33134
)
34135
expect(convertMarkdownToHtml(undefined, '> This is _CO2_')).toBe(
35136
'<blockquote>\n<p>This is <em>CO<sub>2</sub></em></p>\n</blockquote>\n'

packages/docs-builder/src/gen-html.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ export function generateHtml(context: Context, mdRelPath: string, mdPage: Markdo
144144
// Convert the Markdown content to HTML
145145
const body = convertMarkdownToHtml(context, md)
146146

147+
// Check for evidence of invalid Markdown link syntax that remains in the generated HTML
148+
checkForInvalidLinkSyntax(context, body)
149+
147150
// Save the names of the `<head>` fragments to include
148151
const headFragments = mdPage.frontmatter?.fragments?.head || []
149152

@@ -592,7 +595,9 @@ export function convertMarkdownToHtml(context: Context, md: string): string {
592595
})
593596

594597
// Parse the Markdown into HTML
595-
return marked.parse(md)
598+
return marked.parse(md, {
599+
headerIds: false
600+
})
596601
}
597602

598603
/**
@@ -617,3 +622,30 @@ export function subscriptify(s: string): string {
617622
return subscriptMap.get(m1)
618623
})
619624
}
625+
626+
// This will match cases where a space in the Markdown link syntax caused the link parts
627+
// to be converted to separate elements in the HTML output, for example:
628+
// Markdown: [text] (https://example.com)
629+
// HTML: [text] (<a href="https://example.com">https://example.com</a>)
630+
// Markdown: [text] [ref]
631+
// HTML (en): [text] <a href="https://climateinteractive.org">ref</a>
632+
// HTML (xx): [text] [ref]
633+
// Note that the generated HTML in the second example is different for the English and
634+
// non-English cases (due to different parsing code paths), so we need to detect both.
635+
const invalidLinkRegExp = /\[([^\]]+)\]\s+(\(?<a\s\w+|\[([^\]]+)\])/g
636+
637+
/**
638+
* Throw an error if the given HTML text contains evidence ofinvalid Markdown link syntax.
639+
*/
640+
function checkForInvalidLinkSyntax(context: Context, md: string): void {
641+
const matches = md.match(invalidLinkRegExp)
642+
if (matches) {
643+
let msg = 'Detected invalid Markdown link syntax in the generated HTML:\n'
644+
for (const match of matches) {
645+
msg += `${match.replace('<', '&lt;')}\n`
646+
}
647+
msg +=
648+
'To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref]'
649+
throw new Error(context.getScopedMessage(msg))
650+
}
651+
}

packages/docs-builder/src/parse.ts

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,6 @@ export function parseMarkdownPageContent(
6060
relPath: string,
6161
origMarkdownWithFrontmatter: string
6262
): MarkdownPage {
63-
// Configure marked.js
64-
marked.setOptions({
65-
headerIds: false
66-
})
67-
6863
// Separate frontmatter from the content
6964
const origMarkdownSeparated = matter(origMarkdownWithFrontmatter)
7065
const origMarkdown = origMarkdownSeparated.content

0 commit comments

Comments
 (0)