-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmonitor.py
More file actions
100 lines (78 loc) Β· 3.75 KB
/
monitor.py
File metadata and controls
100 lines (78 loc) Β· 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os, requests, html, re
from bs4 import BeautifulSoup
from datetime import datetime, timedelta, timezone # AMENDED
# === CONFIG ===
TELEGRAM_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN")
CHAT_ID = os.getenv("TELEGRAM_CHAT_ID")
QLD_URL = "https://www.qbcc.qld.gov.au/news-resources/news"
# Local Time Offset (UTC+11)
LOCAL_TZ = timezone(timedelta(hours=11))
def send_telegram(text):
if not text: return
url = f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage"
requests.post(url, json={
"chat_id": CHAT_ID,
"text": text,
"parse_mode": "HTML",
"disable_web_page_preview": True
}, timeout=30)
def get_qld_data():
print("π Fetching QBCC Headlines (Stealth Mode)...")
results = []
seen_links = set()
session = requests.Session()
session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Referer": "https://www.google.com/",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1"
})
try:
session.get("https://www.qbcc.qld.gov.au/", timeout=15)
r = session.get(QLD_URL, timeout=20)
soup = BeautifulSoup(r.content, "html.parser")
articles = soup.select('.views-row, article, .news-item')
for item in articles:
heading_tag = item.find(['h3', 'h2', 'h4'])
link_tag = item.find('a', href=True)
if not heading_tag or not link_tag:
continue
raw_title = heading_tag.get_text().strip()
clean_title = re.sub(r'(Article|News|Campaign|Media release|Read More|\| \d+ \w+ \d{4})', '', raw_title, flags=re.IGNORECASE).strip()
if len(clean_title) < 10:
continue
link = link_tag['href']
full_url = link if link.startswith("http") else f"https://www.qbcc.qld.gov.au{link}"
if full_url not in seen_links:
results.append(f"β’ <b>[π° QLD]</b> {html.escape(clean_title)}\nπ {full_url}")
seen_links.add(full_url)
except Exception as e:
print(f"Scraper error: {e}")
return results
def main():
if not TELEGRAM_TOKEN or not CHAT_ID: return
headlines = get_qld_data()
# --- AMENDED: Force Local Date ---
today_local = datetime.now(LOCAL_TZ).strftime('%d %b %Y')
header = f"βοΈ <b>QBCC Queensland Update</b>\nπ
{today_local}\n\n"
# ---------------------------------
if headlines:
body = "\n\n".join(headlines[:15])
else:
body = "<i>No new QLD headings found today (Check Stealth/Session settings).</i>"
footer = (
"\n\n---\n"
"<b>π QLD PERMANENT REGISTERS:</b>\n"
"π‘ <a href='https://my.qbcc.qld.gov.au/myQBCC/s/suspended-registers'>Suspended Registers</a>\n"
"π΄ <a href='https://my.qbcc.qld.gov.au/myQBCC/s/cancelled-registers'>Cancelled Registers</a>\n"
"π« <a href='https://my.qbcc.qld.gov.au/myQBCC/s/excluded-individual-register'>Excluded Individuals (Banned)</a>\n"
"π <a href='https://my.qbcc.qld.gov.au/myQBCC/s/adjudication-registry'>Adjudication Registry</a>\n"
"π <a href='https://my.qbcc.qld.gov.au/myQBCC/s/building-certifier-licensee-register'>Building Certifiers</a>\n"
"π <a href='https://my.qbcc.qld.gov.au/myQBCC/s/owner-builder-licensee-register'>Owner Builders</a>"
)
send_telegram(header + body + footer)
if __name__ == "__main__":
main()