-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_prothsearch.py
More file actions
62 lines (57 loc) · 1.93 KB
/
get_prothsearch.py
File metadata and controls
62 lines (57 loc) · 1.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Lista med URL:er
urls = [
"http://www.prothsearch.com/riesel1c.html",
"http://www.prothsearch.com/riesel1b.html",
"http://www.prothsearch.com/riesel1a.html",
"http://www.prothsearch.com/riesel1.html"
]
# Funktion för att extrahera Proth-primtal från en URL
def extract_proth_primes(url):
import re
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
data = []
font_tags = soup.find_all('font', attrs={'color': 'Blue'})
k = None
for idx, tag in enumerate(font_tags):
k_tag = tag.find('b')
if k_tag and k_tag.text.strip().isdigit():
k = int(k_tag.text.strip())
else:
continue
# Collect n values from the next siblings until the next <font color="Blue">
current = tag.next_sibling
while current:
if getattr(current, 'name', None) == 'font' and current.get('color') == 'Blue':
break
if hasattr(current, 'get_text'):
text = current.get_text()
else:
text = str(current)
if '[' in text:
text = text.split('[')[0]
nums = re.findall(r'\d+', text)
for n in nums:
try:
n_int = int(n)
data.append({'k': k, 'n': n_int})
except Exception:
continue
current = current.next_sibling
return data
# Samla all data
all_data = []
for url in urls:
print(f"Hämtar data från: {url}")
all_data.extend(extract_proth_primes(url))
# Konvertera till DataFrame och spara
df = pd.DataFrame(all_data)
if not df.empty:
df = df[['k', 'n']]
df.to_csv("prothsearch_primtal.csv", index=False)
print(f"✅ Sparat {len(df)} rader till 'prothsearch_primtal.csv'")
else:
print("❌ Inga Proth-primaler hittades och sparades.")