From 4f20ec0ea4490946bc260a02a84f70d6856caa79 Mon Sep 17 00:00:00 2001 From: Zach Young Date: Sun, 18 Aug 2024 23:34:34 -0700 Subject: [PATCH 1/5] ref links work; but regression in HTML roundtrip for 163 --- link.go | 54 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/link.go b/link.go index 2b3e4a1..2773f1d 100644 --- a/link.go +++ b/link.go @@ -13,6 +13,15 @@ import ( "golang.org/x/text/cases" ) +type RefStyle int + +const ( + NoRef RefStyle = iota + Full + Collpased + Shortcut +) + // Note: Link and Image are the same underlying struct by design, // so that code can safely convert between *Link and *Image. @@ -24,6 +33,9 @@ type Link struct { URL string Title string TitleChar byte // ', " or ) + + Label string + RefStyle RefStyle } // An Image is an [Inline] representing an [image] ( tag). @@ -34,6 +46,9 @@ type Image struct { URL string Title string TitleChar byte + + Label string + RefStyle RefStyle } func (*Link) Inline() {} @@ -53,18 +68,30 @@ func (x *Link) printHTML(p *printer) { } func (x *Link) printMarkdown(p *printer) { + fmt.Printf("%+v\n", x) p.WriteByte('[') for _, c := range x.Inner { c.printMarkdown(p) } - p.WriteString("](") - u := mdLinkEscaper.Replace(x.URL) - if u == "" || strings.ContainsAny(u, " ") { - u = "<" + u + ">" + p.WriteByte(']') + switch x.RefStyle { + case NoRef: + p.WriteByte('(') + u := mdLinkEscaper.Replace(x.URL) + if u == "" || strings.ContainsAny(u, " ") { + u = "<" + u + ">" + } + p.WriteString(u) + printLinkTitleMarkdown(p, x.Title, x.TitleChar) + p.WriteByte(')') + case Full: + p.WriteByte('[') + p.WriteString(x.Label) + p.WriteByte(']') + case Collpased: + p.WriteString("[]") + case Shortcut: } - p.WriteString(u) - printLinkTitleMarkdown(p, x.Title, x.TitleChar) - p.WriteByte(')') } func printLinkTitleMarkdown(p *printer, title string, titleChar byte) { @@ -193,8 +220,9 @@ func parseLinkClose(p *parser, s string, start int, open *openPlain) (*Link, int if !ok { break } - if link, ok := p.links[normalizeLabel(label)]; ok { - return &Link{URL: link.URL, Title: link.Title}, i, true + label = normalizeLabel(label) + if link, ok := p.links[label]; ok { + return &Link{URL: link.URL, Title: link.Title, Label: label, RefStyle: Full}, i, true } // Note: Could break here, but CommonMark dingus does not // fall back to trying Text for [Text][Label] when Label is unknown. @@ -205,12 +233,16 @@ func parseLinkClose(p *parser, s string, start int, open *openPlain) (*Link, int // Collapsed or shortcut reference link: [Text][] or [Text]. end := i + 1 + refStyle := Shortcut if strings.HasPrefix(s[end:], "[]") { end += 2 + refStyle = Collpased } - if link, ok := p.links[normalizeLabel(s[open.i:i])]; ok { - return &Link{URL: link.URL, Title: link.Title}, end, true + label := normalizeLabel(s[open.i:i]) + fmt.Printf("s:%q, label:%q\n", s, label) + if link, ok := p.links[label]; ok { + return &Link{URL: link.URL, Title: link.Title, Label: label, RefStyle: refStyle}, end, true } return nil, 0, false } From 69475597d74094383bf9a3adb9ad8fd57ec3fd93 Mon Sep 17 00:00:00 2001 From: Zach Young Date: Sun, 18 Aug 2024 23:50:24 -0700 Subject: [PATCH 2/5] remove debug print statements; added test --- link.go | 2 -- testdata/linkref_fmt.txt | 12 ++++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/link.go b/link.go index 2773f1d..3b42b1c 100644 --- a/link.go +++ b/link.go @@ -68,7 +68,6 @@ func (x *Link) printHTML(p *printer) { } func (x *Link) printMarkdown(p *printer) { - fmt.Printf("%+v\n", x) p.WriteByte('[') for _, c := range x.Inner { c.printMarkdown(p) @@ -240,7 +239,6 @@ func parseLinkClose(p *parser, s string, start int, open *openPlain) (*Link, int } label := normalizeLabel(s[open.i:i]) - fmt.Printf("s:%q, label:%q\n", s, label) if link, ok := p.links[label]; ok { return &Link{URL: link.URL, Title: link.Title, Label: label, RefStyle: refStyle}, end, true } diff --git a/testdata/linkref_fmt.txt b/testdata/linkref_fmt.txt index 35f95e8..3210106 100644 --- a/testdata/linkref_fmt.txt +++ b/testdata/linkref_fmt.txt @@ -37,3 +37,15 @@ A document. [r1]: u1 (title1) [r2]: u2 "title2" [r3]: u3 'title3' +-- reflink-full -- +[full][full] + +[full]: u1 +-- reflink-collapsed -- +[collapsed][] + +[collapsed]: u1 +-- reflink-shortcut -- +[shortcut] + +[shortcut]: u1 From ff538e99b2d27adca6c7d848f80c57590adef727 Mon Sep 17 00:00:00 2001 From: Zach Young Date: Sun, 18 Aug 2024 23:58:15 -0700 Subject: [PATCH 3/5] added casing to test --- link.go | 8 ++++---- testdata/linkref_fmt.txt | 12 ++++++------ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/link.go b/link.go index 3b42b1c..307af5e 100644 --- a/link.go +++ b/link.go @@ -219,8 +219,8 @@ func parseLinkClose(p *parser, s string, start int, open *openPlain) (*Link, int if !ok { break } - label = normalizeLabel(label) - if link, ok := p.links[label]; ok { + if link, ok := p.links[normalizeLabel(label)]; ok { + fmt.Println("label:", label) return &Link{URL: link.URL, Title: link.Title, Label: label, RefStyle: Full}, i, true } // Note: Could break here, but CommonMark dingus does not @@ -238,8 +238,8 @@ func parseLinkClose(p *parser, s string, start int, open *openPlain) (*Link, int refStyle = Collpased } - label := normalizeLabel(s[open.i:i]) - if link, ok := p.links[label]; ok { + label := s[open.i:i] + if link, ok := p.links[normalizeLabel(label)]; ok { return &Link{URL: link.URL, Title: link.Title, Label: label, RefStyle: refStyle}, end, true } return nil, 0, false diff --git a/testdata/linkref_fmt.txt b/testdata/linkref_fmt.txt index 3210106..869bf9e 100644 --- a/testdata/linkref_fmt.txt +++ b/testdata/linkref_fmt.txt @@ -38,14 +38,14 @@ A document. [r2]: u2 "title2" [r3]: u3 'title3' -- reflink-full -- -[full][full] +[Foo bar][Baz] -[full]: u1 +[Baz]: u1 -- reflink-collapsed -- -[collapsed][] +[Foo bar][] -[collapsed]: u1 +[Foo bar]: u1 -- reflink-shortcut -- -[shortcut] +[Foo bar] -[shortcut]: u1 +[Foo bar]: u1 From 796704f3a2cbafeb4564d974965a4c21d82c8378 Mon Sep 17 00:00:00 2001 From: Zach Young Date: Mon, 19 Aug 2024 09:06:06 -0700 Subject: [PATCH 4/5] added tests to roundTripFailures --- link.go | 22 +++++++++++----------- md_test.go | 6 ++++++ testdata/linkref_fmt.txt | 8 ++++---- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/link.go b/link.go index 307af5e..57a2b13 100644 --- a/link.go +++ b/link.go @@ -219,8 +219,8 @@ func parseLinkClose(p *parser, s string, start int, open *openPlain) (*Link, int if !ok { break } - if link, ok := p.links[normalizeLabel(label)]; ok { - fmt.Println("label:", label) + label = normalizeLabel(label) + if link, ok := p.links[label]; ok { return &Link{URL: link.URL, Title: link.Title, Label: label, RefStyle: Full}, i, true } // Note: Could break here, but CommonMark dingus does not @@ -238,8 +238,8 @@ func parseLinkClose(p *parser, s string, start int, open *openPlain) (*Link, int refStyle = Collpased } - label := s[open.i:i] - if link, ok := p.links[normalizeLabel(label)]; ok { + label := normalizeLabel(s[open.i:i]) + if link, ok := p.links[label]; ok { return &Link{URL: link.URL, Title: link.Title, Label: label, RefStyle: refStyle}, end, true } return nil, 0, false @@ -426,13 +426,13 @@ func parseLinkLabel(p *parser, s string, i int) (string, int, bool) { // normalizeLabel returns the normalized label for s, for uniquely identifying that label. func normalizeLabel(s string) string { - if strings.Contains(s, "[") || strings.Contains(s, "]") { - // Labels cannot have [ ] so avoid the work of translating. - // This is especially important for pathlogical cases like - // [[[[[[[[[[a]]]]]]]]]] which would otherwise generate quadratic - // amounts of garbage. - return "" - } + // if strings.Contains(s, "[") || strings.Contains(s, "]") { + // // Labels cannot have [ ] so avoid the work of translating. + // // This is especially important for pathlogical cases like + // // [[[[[[[[[[a]]]]]]]]]] which would otherwise generate quadratic + // // amounts of garbage. + // return "" + // } // “To normalize a label, strip off the opening and closing brackets, // perform the Unicode case fold, strip leading and trailing spaces, tabs, and line endings, diff --git a/md_test.go b/md_test.go index 4af400b..bf3df04 100644 --- a/md_test.go +++ b/md_test.go @@ -43,6 +43,7 @@ var roundTripFailures = map[string]bool{ "TestToHTML/spec0.29/57": true, // setext heading "TestToHTML/spec0.29/63": true, // setext heading "TestToHTML/spec0.29/65": true, // newline in heading + "TestToHTML/spec0.29/163": true, // escaped bracket in label "TestToHTML/spec0.29/171": true, // link ref def "TestToHTML/spec0.29/208": true, // weird list "TestToHTML/spec0.29/227": true, // weird list @@ -58,6 +59,7 @@ var roundTripFailures = map[string]bool{ "TestToHTML/spec0.29/331": true, // backtick spaces "TestToHTML/spec0.29/349": true, // backticks "TestToHTML/spec0.29/502": true, // escape quotes + "TestToHTML/spec0.29/545": true, // escaped bracket in label "TestToHTML/spec0.30/26": true, // escape plain "TestToHTML/spec0.30/37": true, // escape plain @@ -72,6 +74,7 @@ var roundTripFailures = map[string]bool{ "TestToHTML/spec0.30/87": true, // setext heading "TestToHTML/spec0.30/93": true, // setext heading "TestToHTML/spec0.30/95": true, // newline in heading + "TestToHTML/spec0.30/194": true, // escaped bracket in label "TestToHTML/spec0.30/202": true, // link ref def "TestToHTML/spec0.30/238": true, // weird list "TestToHTML/spec0.30/257": true, // weird list @@ -81,6 +84,7 @@ var roundTripFailures = map[string]bool{ "TestToHTML/spec0.30/331": true, // backtick spaces "TestToHTML/spec0.30/349": true, // backticks "TestToHTML/spec0.30/505": true, // escape quotes + "TestToHTML/spec0.30/548": true, // escaped bracket in label "TestToHTML/spec0.31.2/26": true, // escape plain "TestToHTML/spec0.31.2/37": true, // escape plain @@ -95,6 +99,7 @@ var roundTripFailures = map[string]bool{ "TestToHTML/spec0.31.2/87": true, // setext heading "TestToHTML/spec0.31.2/93": true, // setext heading "TestToHTML/spec0.31.2/95": true, // newline in heading + "TestToHTML/spec0.31.2/194": true, // escaped bracket in label "TestToHTML/spec0.31.2/202": true, // link ref def "TestToHTML/spec0.31.2/238": true, // weird list "TestToHTML/spec0.31.2/257": true, // weird list @@ -104,6 +109,7 @@ var roundTripFailures = map[string]bool{ "TestToHTML/spec0.31.2/331": true, // backtick spaces "TestToHTML/spec0.31.2/349": true, // backticks "TestToHTML/spec0.31.2/506": true, // escape quotes + "TestToHTML/spec0.31.2/549": true, // escaped bracket in label "TestToHTML/table/gfm200": true, // table "TestToHTML/table/2": true, // table diff --git a/testdata/linkref_fmt.txt b/testdata/linkref_fmt.txt index 869bf9e..f0a23c0 100644 --- a/testdata/linkref_fmt.txt +++ b/testdata/linkref_fmt.txt @@ -38,14 +38,14 @@ A document. [r2]: u2 "title2" [r3]: u3 'title3' -- reflink-full -- -[Foo bar][Baz] +[Foo bar][baz] -[Baz]: u1 +[baz]: u1 -- reflink-collapsed -- [Foo bar][] -[Foo bar]: u1 +[foo bar]: u1 -- reflink-shortcut -- [Foo bar] -[Foo bar]: u1 +[foo bar]: u1 From a003029db9a27eee763fab633d83a362e8ee4e96 Mon Sep 17 00:00:00 2001 From: Zach Young Date: Mon, 19 Aug 2024 09:09:49 -0700 Subject: [PATCH 5/5] normalizeLabel: added bracket guard back --- link.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/link.go b/link.go index 57a2b13..3b42b1c 100644 --- a/link.go +++ b/link.go @@ -426,13 +426,13 @@ func parseLinkLabel(p *parser, s string, i int) (string, int, bool) { // normalizeLabel returns the normalized label for s, for uniquely identifying that label. func normalizeLabel(s string) string { - // if strings.Contains(s, "[") || strings.Contains(s, "]") { - // // Labels cannot have [ ] so avoid the work of translating. - // // This is especially important for pathlogical cases like - // // [[[[[[[[[[a]]]]]]]]]] which would otherwise generate quadratic - // // amounts of garbage. - // return "" - // } + if strings.Contains(s, "[") || strings.Contains(s, "]") { + // Labels cannot have [ ] so avoid the work of translating. + // This is especially important for pathlogical cases like + // [[[[[[[[[[a]]]]]]]]]] which would otherwise generate quadratic + // amounts of garbage. + return "" + } // “To normalize a label, strip off the opening and closing brackets, // perform the Unicode case fold, strip leading and trailing spaces, tabs, and line endings,