forked from santifer/career-ops
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmerge-tracker.mjs
More file actions
331 lines (287 loc) · 11.5 KB
/
merge-tracker.mjs
File metadata and controls
331 lines (287 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#!/usr/bin/env node
/**
* merge-tracker.mjs — Merge batch tracker additions into applications.md
*
* Handles multiple TSV formats:
* - 9-col: num\tdate\tcompany\trole\tstatus\tscore\tpdf\treport\tnotes
* - 8-col: num\tdate\tcompany\trole\tstatus\tscore\tpdf\treport (no notes)
* - Pipe-delimited (markdown table row): | col | col | ... |
*
* Dedup: company normalized + role fuzzy match + report number match
* If duplicate with higher score → update in-place, update report link
* Validates status against states.yml (rejects non-canonical, logs warning)
*
* Run: node career-ops/merge-tracker.mjs [--dry-run] [--verify]
*/
import { readFileSync, writeFileSync, readdirSync, mkdirSync, renameSync, existsSync } from 'fs';
import { join, basename, dirname } from 'path';
import { fileURLToPath } from 'url';
const CAREER_OPS = dirname(fileURLToPath(import.meta.url));
// Support both layouts: data/applications.md (boilerplate) and applications.md (original)
const APPS_FILE = existsSync(join(CAREER_OPS, 'data/applications.md'))
? join(CAREER_OPS, 'data/applications.md')
: join(CAREER_OPS, 'applications.md');
const ADDITIONS_DIR = join(CAREER_OPS, 'batch/tracker-additions');
const MERGED_DIR = join(ADDITIONS_DIR, 'merged');
const DRY_RUN = process.argv.includes('--dry-run');
const VERIFY = process.argv.includes('--verify');
// Canonical states and aliases
const CANONICAL_STATES = ['Evaluated', 'Applied', 'Responded', 'Interview', 'Offer', 'Rejected', 'Discarded', 'SKIP'];
function validateStatus(status) {
const clean = status.replace(/\*\*/g, '').replace(/\s+\d{4}-\d{2}-\d{2}.*$/, '').trim();
const lower = clean.toLowerCase();
for (const valid of CANONICAL_STATES) {
if (valid.toLowerCase() === lower) return valid;
}
// Aliases
const aliases = {
// Spanish → English
'evaluada': 'Evaluated', 'condicional': 'Evaluated', 'hold': 'Evaluated', 'evaluar': 'Evaluated', 'verificar': 'Evaluated',
'aplicado': 'Applied', 'enviada': 'Applied', 'aplicada': 'Applied', 'applied': 'Applied', 'sent': 'Applied',
'respondido': 'Responded',
'entrevista': 'Interview',
'oferta': 'Offer',
'rechazado': 'Rejected', 'rechazada': 'Rejected',
'descartado': 'Discarded', 'descartada': 'Discarded', 'cerrada': 'Discarded', 'cancelada': 'Discarded',
'no aplicar': 'SKIP', 'no_aplicar': 'SKIP', 'skip': 'SKIP', 'monitor': 'SKIP',
'geo blocker': 'SKIP',
};
if (aliases[lower]) return aliases[lower];
// DUPLICADO/Repost → Discarded
if (/^(duplicado|dup|repost)/i.test(lower)) return 'Discarded';
console.warn(`⚠️ Non-canonical status "${status}" → defaulting to "Evaluated"`);
return 'Evaluated';
}
function normalizeCompany(name) {
return name.toLowerCase().replace(/[^a-z0-9]/g, '');
}
function roleFuzzyMatch(a, b) {
const wordsA = a.toLowerCase().split(/\s+/).filter(w => w.length > 3);
const wordsB = b.toLowerCase().split(/\s+/).filter(w => w.length > 3);
const overlap = wordsA.filter(w => wordsB.some(wb => wb.includes(w) || w.includes(wb)));
return overlap.length >= 2;
}
function extractReportNum(reportStr) {
const m = reportStr.match(/\[(\d+)\]/);
return m ? parseInt(m[1]) : null;
}
function parseScore(s) {
const m = s.replace(/\*\*/g, '').match(/([\d.]+)/);
return m ? parseFloat(m[1]) : 0;
}
function parseAppLine(line) {
const parts = line.split('|').map(s => s.trim());
if (parts.length < 9) return null;
const num = parseInt(parts[1]);
if (isNaN(num) || num === 0) return null;
return {
num, date: parts[2], company: parts[3], role: parts[4],
score: parts[5], status: parts[6], pdf: parts[7], report: parts[8],
notes: parts[9] || '', raw: line,
};
}
/**
* Parse a TSV file content into a structured addition object.
* Handles: 9-col TSV, 8-col TSV, pipe-delimited markdown.
*/
function parseTsvContent(content, filename) {
content = content.trim();
if (!content) return null;
let parts;
let addition;
// Detect pipe-delimited (markdown table row)
if (content.startsWith('|')) {
parts = content.split('|').map(s => s.trim()).filter(Boolean);
if (parts.length < 8) {
console.warn(`⚠️ Skipping malformed pipe-delimited ${filename}: ${parts.length} fields`);
return null;
}
// Format: num | date | company | role | score | status | pdf | report | notes
addition = {
num: parseInt(parts[0]),
date: parts[1],
company: parts[2],
role: parts[3],
score: parts[4],
status: validateStatus(parts[5]),
pdf: parts[6],
report: parts[7],
notes: parts[8] || '',
};
} else {
// Tab-separated
parts = content.split('\t');
if (parts.length < 8) {
console.warn(`⚠️ Skipping malformed TSV ${filename}: ${parts.length} fields`);
return null;
}
// Detect column order: some TSVs have (status, score), others have (score, status)
// Heuristic: if col4 looks like a score and col5 looks like a status, they're swapped
const col4 = parts[4].trim();
const col5 = parts[5].trim();
const col4LooksLikeScore = /^\d+\.?\d*\/5$/.test(col4) || col4 === 'N/A' || col4 === 'DUP';
const col5LooksLikeScore = /^\d+\.?\d*\/5$/.test(col5) || col5 === 'N/A' || col5 === 'DUP';
const col4LooksLikeStatus = /^(evaluated|applied|responded|interview|offer|rejected|discarded|skip|evaluada|aplicado|respondido|entrevista|oferta|rechazado|descartado|no aplicar|cerrada|duplicado|repost|condicional|hold|monitor)/i.test(col4);
const col5LooksLikeStatus = /^(evaluated|applied|responded|interview|offer|rejected|discarded|skip|evaluada|aplicado|respondido|entrevista|oferta|rechazado|descartado|no aplicar|cerrada|duplicado|repost|condicional|hold|monitor)/i.test(col5);
let statusCol, scoreCol;
if (col4LooksLikeStatus && !col4LooksLikeScore) {
// Standard format: col4=status, col5=score
statusCol = col4; scoreCol = col5;
} else if (col4LooksLikeScore && col5LooksLikeStatus) {
// Swapped format: col4=score, col5=status
statusCol = col5; scoreCol = col4;
} else if (col5LooksLikeScore && !col4LooksLikeScore) {
// col5 is definitely score → col4 must be status
statusCol = col4; scoreCol = col5;
} else {
// Default: standard format (status before score)
statusCol = col4; scoreCol = col5;
}
addition = {
num: parseInt(parts[0]),
date: parts[1],
company: parts[2],
role: parts[3],
status: validateStatus(statusCol),
score: scoreCol,
pdf: parts[6],
report: parts[7],
notes: parts[8] || '',
};
}
if (isNaN(addition.num) || addition.num === 0) {
console.warn(`⚠️ Skipping ${filename}: invalid entry number`);
return null;
}
return addition;
}
// ---- Main ----
// Read applications.md
if (!existsSync(APPS_FILE)) {
console.log('No applications.md found. Nothing to merge into.');
process.exit(0);
}
const appContent = readFileSync(APPS_FILE, 'utf-8');
const appLines = appContent.split('\n');
const existingApps = [];
let maxNum = 0;
for (const line of appLines) {
if (line.startsWith('|') && !line.includes('---') && !line.includes('Empresa')) {
const app = parseAppLine(line);
if (app) {
existingApps.push(app);
if (app.num > maxNum) maxNum = app.num;
}
}
}
console.log(`📊 Existing: ${existingApps.length} entries, max #${maxNum}`);
// Read tracker additions
if (!existsSync(ADDITIONS_DIR)) {
console.log('No tracker-additions directory found.');
process.exit(0);
}
const tsvFiles = readdirSync(ADDITIONS_DIR).filter(f => f.endsWith('.tsv'));
if (tsvFiles.length === 0) {
console.log('✅ No pending additions to merge.');
process.exit(0);
}
// Sort files numerically for deterministic processing
tsvFiles.sort((a, b) => {
const numA = parseInt(a.replace(/\D/g, '')) || 0;
const numB = parseInt(b.replace(/\D/g, '')) || 0;
return numA - numB;
});
console.log(`📥 Found ${tsvFiles.length} pending additions`);
let added = 0;
let updated = 0;
let skipped = 0;
const newLines = [];
for (const file of tsvFiles) {
const content = readFileSync(join(ADDITIONS_DIR, file), 'utf-8').trim();
const addition = parseTsvContent(content, file);
if (!addition) { skipped++; continue; }
// Check for duplicate by:
// 1. Exact report number match
// 2. Company + role fuzzy match
const reportNum = extractReportNum(addition.report);
let duplicate = null;
if (reportNum) {
// Check if this report number already exists
duplicate = existingApps.find(app => {
const existingReportNum = extractReportNum(app.report);
return existingReportNum === reportNum;
});
}
if (!duplicate) {
// Exact entry number match
duplicate = existingApps.find(app => app.num === addition.num);
}
if (!duplicate) {
// Company + role fuzzy match
const normCompany = normalizeCompany(addition.company);
duplicate = existingApps.find(app => {
if (normalizeCompany(app.company) !== normCompany) return false;
return roleFuzzyMatch(addition.role, app.role);
});
}
if (duplicate) {
const newScore = parseScore(addition.score);
const oldScore = parseScore(duplicate.score);
if (newScore > oldScore) {
console.log(`🔄 Update: #${duplicate.num} ${addition.company} — ${addition.role} (${oldScore}→${newScore})`);
const lineIdx = appLines.indexOf(duplicate.raw);
if (lineIdx >= 0) {
const updatedLine = `| ${duplicate.num} | ${addition.date} | ${addition.company} | ${addition.role} | ${addition.score} | ${duplicate.status} | ${duplicate.pdf} | ${addition.report} | Re-eval ${addition.date} (${oldScore}→${newScore}). ${addition.notes} |`;
appLines[lineIdx] = updatedLine;
updated++;
}
} else {
console.log(`⏭️ Skip: ${addition.company} — ${addition.role} (existing #${duplicate.num} ${oldScore} >= new ${newScore})`);
skipped++;
}
} else {
// New entry — use the number from the TSV
const entryNum = addition.num > maxNum ? addition.num : ++maxNum;
if (addition.num > maxNum) maxNum = addition.num;
const newLine = `| ${entryNum} | ${addition.date} | ${addition.company} | ${addition.role} | ${addition.score} | ${addition.status} | ${addition.pdf} | ${addition.report} | ${addition.notes} |`;
newLines.push(newLine);
added++;
console.log(`➕ Add #${entryNum}: ${addition.company} — ${addition.role} (${addition.score})`);
}
}
// Insert new lines after the header (line index of first data row)
if (newLines.length > 0) {
// Find header separator (|---|...) and insert after it
let insertIdx = -1;
for (let i = 0; i < appLines.length; i++) {
if (appLines[i].includes('---') && appLines[i].startsWith('|')) {
insertIdx = i + 1;
break;
}
}
if (insertIdx >= 0) {
appLines.splice(insertIdx, 0, ...newLines);
}
}
// Write back
if (!DRY_RUN) {
writeFileSync(APPS_FILE, appLines.join('\n'));
// Move processed files to merged/
if (!existsSync(MERGED_DIR)) mkdirSync(MERGED_DIR, { recursive: true });
for (const file of tsvFiles) {
renameSync(join(ADDITIONS_DIR, file), join(MERGED_DIR, file));
}
console.log(`\n✅ Moved ${tsvFiles.length} TSVs to merged/`);
}
console.log(`\n📊 Summary: +${added} added, 🔄${updated} updated, ⏭️${skipped} skipped`);
if (DRY_RUN) console.log('(dry-run — no changes written)');
// Optional verify
if (VERIFY && !DRY_RUN) {
console.log('\n--- Running verification ---');
const { execSync } = await import('child_process');
try {
execSync(`node ${join(CAREER_OPS, 'verify-pipeline.mjs')}`, { stdio: 'inherit' });
} catch (e) {
process.exit(1);
}
}