-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcontent.js
More file actions
99 lines (83 loc) · 2.57 KB
/
content.js
File metadata and controls
99 lines (83 loc) · 2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// This script runs in the context of web pages
// Listen for messages from the extension
chrome.runtime.onMessage.addListener(function(request, _sender, sendResponse) {
if (request.action === "getLinks") {
const links = getAllPageLinks();
sendResponse({ links });
} else if (request.action === "getLinksFromSelection") {
const links = getLinksFromSelection();
sendResponse({ links });
}
// Return true keeps the message channel open if callers decide to respond async
return true;
});
// Shared URL matching regex and cleanup
const URL_REGEX = /(https?:\/\/[^\s\"\'\)\<\>]+)/g;
function cleanupUrl(raw) {
let s = raw;
while (
s.endsWith('.') ||
s.endsWith(',') ||
s.endsWith(';') ||
s.endsWith(')') ||
s.endsWith('"') ||
s.endsWith("'")
) {
s = s.slice(0, -1);
}
return s;
}
// Function to get all links on the page with additional metadata
function getAllPageLinks() {
const linkElements = document.querySelectorAll('a');
const links = [];
const seenUrls = new Set(); // For tracking duplicates
linkElements.forEach(link => {
const href = link.href;
// Skip if not a valid http/https link or already seen
if (!href || !href.startsWith('http') || seenUrls.has(href)) {
return;
}
// Add to seen set and links array
seenUrls.add(href);
links.push(href);
});
// Also look for links in text content using regex (for markdown-style links, etc.)
// This can find links not in <a> tags
try {
const bodyText = document.body.innerText;
const matches = bodyText.match(URL_REGEX);
if (matches) {
matches.forEach(url => {
// Clean up URL - remove trailing punctuation
const cleanUrl = cleanupUrl(url);
if (cleanUrl.startsWith('http') && !seenUrls.has(cleanUrl)) {
seenUrls.add(cleanUrl);
links.push(cleanUrl);
}
});
}
} catch (e) {
console.error('Error extracting text links:', e);
}
return links;
}
// Function to extract links from selected text on the page
function getLinksFromSelection() {
const selection = window.getSelection().toString();
const links = [];
const seenUrls = new Set(); // For tracking duplicates
// Simple regex for http/https URLs
const matches = selection.match(URL_REGEX);
if (matches) {
matches.forEach(url => {
// Clean up URL - remove trailing punctuation commonly included when selecting text
const cleanUrl = cleanupUrl(url);
if (!seenUrls.has(cleanUrl)) {
seenUrls.add(cleanUrl);
links.push(cleanUrl);
}
});
}
return links;
}