forked from santifer/career-ops
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck-liveness.mjs
More file actions
146 lines (120 loc) · 4.83 KB
/
check-liveness.mjs
File metadata and controls
146 lines (120 loc) · 4.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env node
/**
* check-liveness.mjs — Playwright job link liveness checker
*
* Tests whether job posting URLs are still active or have expired.
* Uses the same detection logic as scan.md step 7.5.
* Zero Claude API tokens — pure Playwright.
*
* Usage:
* node check-liveness.mjs <url1> [url2] ...
* node check-liveness.mjs --file urls.txt
*
* Exit code: 0 if all active, 1 if any expired or uncertain
*/
import { chromium } from 'playwright';
import { readFile } from 'fs/promises';
const EXPIRED_PATTERNS = [
/job (is )?no longer available/i,
/job.*no longer open/i, // Greenhouse: "The job you are looking for is no longer open."
/position has been filled/i,
/this job has expired/i,
/job posting has expired/i,
/no longer accepting applications/i,
/this (position|role|job) (is )?no longer/i,
/this job (listing )?is closed/i,
/job (listing )?not found/i,
/the page you are looking for doesn.t exist/i, // Workday /job/ 404
/\d+\s+jobs?\s+found/i, // Workday: landed on listing page ("663 JOBS FOUND") instead of a specific job
/search for jobs page is loaded/i, // Workday SPA indicator for listing page
/diese stelle (ist )?(nicht mehr|bereits) besetzt/i,
/offre (expirée|n'est plus disponible)/i,
];
// URL patterns that indicate an ATS has redirected away from the job (closed/expired)
const EXPIRED_URL_PATTERNS = [
/[?&]error=true/i, // Greenhouse redirect on closed jobs
];
const APPLY_PATTERNS = [
/\bapply\b/i, // catches "Apply", "Apply Now", "Apply for this Job"
/\bsolicitar\b/i,
/\bbewerben\b/i,
/\bpostuler\b/i,
/submit application/i,
/easy apply/i,
/start application/i, // Ashby
/ich bewerbe mich/i, // German Greenhouse
];
// Below this length the page is probably just nav/footer (closed ATS page)
const MIN_CONTENT_CHARS = 300;
async function checkUrl(page, url) {
try {
const response = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 });
const status = response?.status() ?? 0;
if (status === 404 || status === 410) {
return { result: 'expired', reason: `HTTP ${status}` };
}
// Give SPAs (Ashby, Lever, Workday) time to hydrate
await page.waitForTimeout(2000);
// Check if the ATS redirected to an error/listing page (e.g. Greenhouse ?error=true)
const finalUrl = page.url();
for (const pattern of EXPIRED_URL_PATTERNS) {
if (pattern.test(finalUrl)) {
return { result: 'expired', reason: `redirect to ${finalUrl}` };
}
}
const bodyText = await page.evaluate(() => document.body?.innerText ?? '');
// Apply button is the strongest positive signal — check it first.
// This short-circuits before expired patterns that can appear on active pages
// (e.g. Workday's split-view layout shows "N JOBS FOUND" even on active job pages).
if (APPLY_PATTERNS.some(p => p.test(bodyText))) {
return { result: 'active', reason: 'apply button detected' };
}
for (const pattern of EXPIRED_PATTERNS) {
if (pattern.test(bodyText)) {
return { result: 'expired', reason: `pattern matched: ${pattern.source}` };
}
}
if (bodyText.trim().length < MIN_CONTENT_CHARS) {
return { result: 'expired', reason: 'insufficient content — likely nav/footer only' };
}
return { result: 'uncertain', reason: 'content present but no apply button found' };
} catch (err) {
return { result: 'expired', reason: `navigation error: ${err.message.split('\n')[0]}` };
}
}
async function main() {
const args = process.argv.slice(2);
if (args.length === 0) {
console.error('Usage: node check-liveness.mjs <url1> [url2] ...');
console.error(' node check-liveness.mjs --file urls.txt');
process.exit(1);
}
let urls;
if (args[0] === '--file') {
const text = await readFile(args[1], 'utf-8');
urls = text.split('\n').map(l => l.trim()).filter(l => l && !l.startsWith('#'));
} else {
urls = args;
}
console.log(`Checking ${urls.length} URL(s)...\n`);
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
let active = 0, expired = 0, uncertain = 0;
// Sequential — project rule: never Playwright in parallel
for (const url of urls) {
const { result, reason } = await checkUrl(page, url);
const icon = { active: '✅', expired: '❌', uncertain: '⚠️' }[result];
console.log(`${icon} ${result.padEnd(10)} ${url}`);
if (result !== 'active') console.log(` ${reason}`);
if (result === 'active') active++;
else if (result === 'expired') expired++;
else uncertain++;
}
await browser.close();
console.log(`\nResults: ${active} active ${expired} expired ${uncertain} uncertain`);
if (expired > 0 || uncertain > 0) process.exit(1);
}
main().catch(err => {
console.error('Fatal:', err.message);
process.exit(1);
});