From ef14d97abe9b72d6171a98482c45bd7316e8ad1b Mon Sep 17 00:00:00 2001 From: Ilya Titov Date: Tue, 20 Jan 2026 17:18:23 +0000 Subject: [PATCH] Fix duplicate feed links extracted from HTTP pages --- core/pages/add.ts | 31 +++++++++++++++++++++-------- core/test/pages/add.test.ts | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 8 deletions(-) diff --git a/core/pages/add.ts b/core/pages/add.ts index fe4fb9ba..58e0150f 100644 --- a/core/pages/add.ts +++ b/core/pages/add.ts @@ -146,20 +146,35 @@ export const addPage = createPage('add', () => { $url.set(normalizedUrl) }) + /** + * Removes duplicated feed links differing only by protocol. + */ + function dedupeLinks(links: string[]): string[] { + let seen = new Set() + return links.filter(link => { + let key = link.replace(/^https?:\/\//, 'https://') + if (seen.has(key)) return false + seen.add(key) + return true + }) + } + /** * Extracts links to all known feed types from the HTTP response containing * the HTML document. */ function getLinksFromText(response: TextResponse): string[] { let names = Object.keys(loaders) as LoaderName[] - return names.reduce((links, name) => { - /* node:coverage ignore next 5 */ - try { - return links.concat(loaders[name].getMineLinksFromText(response)) - } catch { - return links - } - }, []) + return dedupeLinks( + names.reduce((links, name) => { + /* node:coverage ignore next 5 */ + try { + return links.concat(loaders[name].getMineLinksFromText(response)) + } catch { + return links + } + }, []) + ) } /** Guess a list of default/fallback links for all feed types */ diff --git a/core/test/pages/add.test.ts b/core/test/pages/add.test.ts index f478929d..ccd9cd66 100644 --- a/core/test/pages/add.test.ts +++ b/core/test/pages/add.test.ts @@ -613,3 +613,42 @@ test('supports links with query parameters', async () => { equal(popup.notFound, false) equal(popup.param, 'https://example.com/news?format=rss') }) + +test('deduplicates feed links differing only by protocol', async () => { + let page = openPage({ + params: {}, + route: 'add' + }) + + keepMount(page.candidates) + + expectRequest('http://example.com').andRespond( + 200, + ` + + + subscribe + + ` + ) + + let rss = 'Feed' + expectRequest('https://example.com/feed.atom').andRespond( + 200, + rss, + 'text/xml' + ) + + page.params.url.set('http://example.com') + await waitLoading(page.searching) + + equal(page.candidates.get().length, 1) + equalWithText(page.candidates.get(), [ + { + loader: loaders.atom, + name: 'atom', + title: 'Feed', + url: 'https://example.com/feed.atom' + } + ]) +})