From 2e97618f175f69e441e6d89bfb822aa946b574ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20R=C3=B8nningstad?= Date: Sat, 13 Jan 2024 21:06:44 +0100 Subject: [PATCH 1/2] Improve linkRegExp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Solves/improves the following: - Setting a custom linkRegExp messes with the logic for adding protocol or mailto. Use named capture groups in the regex, and add a case in the href generation that just uses the match directly if neither the url nor the email regex matches, which would mean that the user's custom regex matched instead. - Make https the default protocol instead of http - Use a more general regex for matching the protocol. Signed-off-by: Øyvind Rønningstad --- README.md | 2 +- source/Editor.ts | 20 +++++++++++--------- test/squire.spec.ts | 7 ++++--- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 6710e80d..a42f94e2 100644 --- a/README.md +++ b/README.md @@ -475,4 +475,4 @@ This is useful when the document needs to be changed programmatically, but those ### linkRegExp -This is the regular expression used to automatically mark up links when inserting HTML or after pressing space. You can change it if you want to use a custom regular expression for detecting links, or set to `/[]/` to turn off link detection. +This is the regular expression used to automatically mark up links when inserting HTML or after pressing space. You can change it if you want to use a custom regular expression for detecting links, or set to `/[]/` to turn off link detection. To append to the existing regex, set it to `linkRegExp.source + '|' + newLinkRegExp`. diff --git a/source/Editor.ts b/source/Editor.ts index 0440229d..cbc0b3fa 100644 --- a/source/Editor.ts +++ b/source/Editor.ts @@ -1741,7 +1741,7 @@ class Squire { // Only look on boundaries '\\b(?:' + // Capture group 1: URLs - '(' + + '(?' + // Add links to URLS // Starts with: '(?:' + @@ -1775,7 +1775,7 @@ class Squire { '\\([^\\s()<>]+\\)' + ')' + // Capture group 2: Emails - ')|(' + + ')|(?' + // Add links to emails '[\\w\\-.%+]+@(?:[\\w\\-]+\\.)+[a-z]{2,}\\b' + // Allow query parameters in the mailto: style @@ -1788,8 +1788,8 @@ class Squire { ); */ linkRegExp = - /\b(?:((?:(?:ht|f)tps?:\/\/|www\d{0,3}[.]|[a-z0-9][a-z0-9.\-]*[.][a-z]{2,}\/)(?:[^\s()<>]+|\([^\s()<>]+\))+(?:[^\s?&`!()\[\]{};:'".,<>«»“”‘’]|\([^\s()<>]+\)))|([\w\-.%+]+@(?:[\w\-]+\.)+[a-z]{2,}\b(?:[?][^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+(?:&[^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+)*)?))/i; - + /\b(?:(?(?:(?:[a-z+]+:)?\/\/|www\d{0,3}[.]|[a-z0-9][a-z0-9.\-]*[.][a-z]{2,}\/)(?:[^\s()<>]+|\([^\s()<>]+\))+(?:[^\s?&`!()\[\]{};:'".,<>«»“”‘’]|\([^\s()<>]+\)))|(?[\w\-.%+]+@(?:[\w\-]+\.)+[a-z]{2,}\b(?:[?][^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+(?:&[^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+)*)?))/i; + linkRegExpHandlers = {} addDetectedLinks( searchInNode: DocumentFragment | Node, root?: DocumentFragment | HTMLElement, @@ -1819,11 +1819,13 @@ class Squire { 'A', Object.assign( { - href: match[1] - ? /^(?:ht|f)tps?:/i.test(match[1]) - ? match[1] - : 'http://' + match[1] - : 'mailto:' + match[0], + href: match.groups['url'] + ? /^(?:[a-z+]+:)?\/\//i.test(match[0]) + ? match[0] + : 'https://' + match[0] + : match.groups['email'] + ? 'mailto:' + match[0] + : match[0], }, defaultAttributes, ), diff --git a/test/squire.spec.ts b/test/squire.spec.ts index ad978cfa..7fbc2bfe 100644 --- a/test/squire.spec.ts +++ b/test/squire.spec.ts @@ -472,9 +472,9 @@ describe('Squire RTE', () => { 'https://google.com': 'https://google.com/', 'https://www.google.com': 'https://www.google.com/', 'https://www.google.com/': 'https://www.google.com/', - 'https://google.com/?': 'https://google.com/', - 'https://google.com?': 'https://google.com/', - 'https://google.com?a': 'https://google.com/?a', + 'HTTPS://google.com/?': 'https://google.com/', // Test protocol matching + 'ftp://google.com?': 'ftp://google.com/', // Test protocol matching + 'redis://google.com?a': 'redis://google.com?a', // Test protocol matching 'https://google.com?a=': 'https://google.com/?a=', 'https://google.com?a=b': 'https://google.com/?a=b', 'https://google.com?a=b?': 'https://google.com/?a=b', @@ -486,6 +486,7 @@ describe('Squire RTE', () => { 'https://google.com?a=b&c=d&': 'https://google.com/?a=b&c=d', 'https://google.com?a=b&c=d&e=': 'https://google.com/?a=b&c=d&e=', 'https://google.com?a=b&c=d&e=f': 'https://google.com/?a=b&c=d&e=f', + 'www.google.com': 'https://www.google.com/', // Test prepending protocol }; Object.keys(LINK_MAP).forEach((input) => { From ec0c3e612ab7e9fc9d0cf09486db572783996a3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98yvind=20R=C3=B8nningstad?= Date: Sat, 13 Jan 2024 22:15:39 +0100 Subject: [PATCH 2/2] Introduce linkRegExpHandlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A map of capture groups to functions for producing the href of a link. By default, it is e.g. used to prepend https:// to links matching on www. so they are not interpreted as relative. The user can add their own handlers if modifying linkRegExp. Signed-off-by: Øyvind Rønningstad --- README.md | 6 +++++- source/Editor.ts | 17 +++++++++-------- test/squire.spec.ts | 4 ++++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a42f94e2..89e5e8ba 100644 --- a/README.md +++ b/README.md @@ -475,4 +475,8 @@ This is useful when the document needs to be changed programmatically, but those ### linkRegExp -This is the regular expression used to automatically mark up links when inserting HTML or after pressing space. You can change it if you want to use a custom regular expression for detecting links, or set to `/[]/` to turn off link detection. To append to the existing regex, set it to `linkRegExp.source + '|' + newLinkRegExp`. +This is the regular expression used to automatically mark up links when inserting HTML or after pressing space. You can change it if you want to use a custom regular expression for detecting links, or set to `/[]/` to turn off link detection. To append to the existing regex, set it to `linkRegExp.source + '|' + newLinkRegExp`. For compatibility with linkRegExpHandlers use named capture groups (`?`). + +### linkRegExpHandlers + +This is a map of handlers for different types of matches in linkRegExp. For example, linkRegExp has a named group 'url' that matches urls, and a named group 'email' that matches emails. linkRegExpHandler['url'] and linkRegExpHandler['email'] are functions that take in the matching string and returns what the link should have in its 'href'. diff --git a/source/Editor.ts b/source/Editor.ts index cbc0b3fa..c97ba763 100644 --- a/source/Editor.ts +++ b/source/Editor.ts @@ -1789,7 +1789,11 @@ class Squire { */ linkRegExp = /\b(?:(?(?:(?:[a-z+]+:)?\/\/|www\d{0,3}[.]|[a-z0-9][a-z0-9.\-]*[.][a-z]{2,}\/)(?:[^\s()<>]+|\([^\s()<>]+\))+(?:[^\s?&`!()\[\]{};:'".,<>«»“”‘’]|\([^\s()<>]+\)))|(?[\w\-.%+]+@(?:[\w\-]+\.)+[a-z]{2,}\b(?:[?][^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+(?:&[^&?\s]+=[^\s?&`!()\[\]{};:'".,<>«»“”‘’]+)*)?))/i; - linkRegExpHandlers = {} + linkRegExpHandlers = { + 'url': (m) => {return /^(?:[a-z+]+:)?\/\//i.test(m) + ? m : 'https://' + m}, + 'email': (m) => {return 'mailto:' + m}, + 'default': (m) => {return m}}; addDetectedLinks( searchInNode: DocumentFragment | Node, root?: DocumentFragment | HTMLElement, @@ -1815,17 +1819,14 @@ class Squire { node, ); } + let handler = Object.keys(this.linkRegExpHandlers).filter( + key => Object.keys(match.groups).includes(key) && match.groups[key])[0] + || 'default'; const child = createElement( 'A', Object.assign( { - href: match.groups['url'] - ? /^(?:[a-z+]+:)?\/\//i.test(match[0]) - ? match[0] - : 'https://' + match[0] - : match.groups['email'] - ? 'mailto:' + match[0] - : match[0], + href: this.linkRegExpHandlers[handler](match[0]) }, defaultAttributes, ), diff --git a/test/squire.spec.ts b/test/squire.spec.ts index 7fbc2bfe..38f72510 100644 --- a/test/squire.spec.ts +++ b/test/squire.spec.ts @@ -487,10 +487,14 @@ describe('Squire RTE', () => { 'https://google.com?a=b&c=d&e=': 'https://google.com/?a=b&c=d&e=', 'https://google.com?a=b&c=d&e=f': 'https://google.com/?a=b&c=d&e=f', 'www.google.com': 'https://www.google.com/', // Test prepending protocol + 'foobar': 'http://localhost/foobar', // Test default handler + 'search': 'http://localhost/replace', // Test custom handler }; Object.keys(LINK_MAP).forEach((input) => { it('should auto convert links to anchor: ' + input, () => { + editor.linkRegExp = new RegExp(editor.linkRegExp.source + "|(foobar)|(?search)", editor.linkRegExp.flags); + editor.linkRegExpHandlers['extra'] = (m) => {return 'replace'}; editor.insertHTML(input); const link = document.querySelector('a')!; expect(link.href).toBe(LINK_MAP[input]);