Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,13 @@ modes/_profile.md

# Generated
.resolved-prompt-*
.batch-context-*.md
node_modules/
bun.lock

# Comp research cache (personal salary data, local only)
data/comp-cache.yml

# OS
.DS_Store
*.mov
Expand Down
466 changes: 255 additions & 211 deletions batch/batch-prompt.md

Large diffs are not rendered by default.

59 changes: 59 additions & 0 deletions batch/batch-runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ STATE_LOCK_DIR="$BATCH_DIR/.batch-state.lock"
STATE_LOCK_PID_FILE="$STATE_LOCK_DIR/pid"
STATE_LOCK_TIMEOUT_SECONDS=30
MAIN_PID="${BASHPID:-$$}"
BATCH_CONTEXT_FILE="$BATCH_DIR/.batch-context-$$.md"

# Defaults
PARALLEL=1
Expand Down Expand Up @@ -100,6 +101,7 @@ release_lock() {
return
fi
rm -f "$LOCK_FILE"
rm -f "$BATCH_CONTEXT_FILE"
}

trap release_lock EXIT
Expand All @@ -124,6 +126,42 @@ check_prerequisites() {
mkdir -p "$LOGS_DIR" "$TRACKER_DIR" "$REPORTS_DIR"
}

# Pre-build shared context (cv.md + profile.yml) once for all workers.
# Workers reference {{CONTEXT_PRELOADED}} which gets substituted with this content,
# so they don't need to issue separate Read tool calls for these static files.
build_context() {
local cv_file="$PROJECT_DIR/cv.md"
local profile_file="$PROJECT_DIR/config/profile.yml"
local cv_content=""
local profile_content=""

[[ -f "$cv_file" ]] && cv_content=$(cat "$cv_file")
[[ -f "$profile_file" ]] && profile_content=$(cat "$profile_file")

if [[ -z "$cv_content" && -z "$profile_content" ]]; then
# No files to preload — substitute with empty string so placeholder is removed
echo "" > "$BATCH_CONTEXT_FILE"
return
fi

cat > "$BATCH_CONTEXT_FILE" <<CONTEXT_EOF

## CV y Perfil Pre-cargados

### cv.md
\`\`\`markdown
${cv_content}
\`\`\`

### config/profile.yml
\`\`\`yaml
${profile_content}
\`\`\`

CONTEXT_EOF
echo "📋 Pre-loaded context: cv.md (${#cv_content} chars) + profile.yml (${#profile_content} chars)"
}

# Initialize state file if it doesn't exist
init_state() {
if [[ ! -f "$STATE_FILE" ]]; then
Expand Down Expand Up @@ -339,6 +377,8 @@ process_offer() {
esc_report_num="${report_num//|/\\|}"
esc_date="${date//|/\\|}"
esc_id="${id//|/\\|}"
local context_content=""
[[ -f "$BATCH_CONTEXT_FILE" ]] && context_content=$(cat "$BATCH_CONTEXT_FILE")
sed \
-e "s|{{URL}}|${esc_url}|g" \
-e "s|{{JD_FILE}}|${esc_jd_file}|g" \
Expand All @@ -347,6 +387,24 @@ process_offer() {
-e "s|{{ID}}|${esc_id}|g" \
"$PROMPT_FILE" > "$resolved_prompt"

# Inline the pre-built context — sed can't handle large multi-line substitutions safely,
# so we do a two-pass approach: replace the placeholder line with the context file content.
if [[ -s "$BATCH_CONTEXT_FILE" ]]; then
local tmp_prompt="$BATCH_DIR/.resolved-prompt-${id}-ctx.md"
awk -v ctx_file="$BATCH_CONTEXT_FILE" '
/\{\{CONTEXT_PRELOADED\}\}/ {
while ((getline line < ctx_file) > 0) print line
close(ctx_file)
next
}
{ print }
' "$resolved_prompt" > "$tmp_prompt"
mv "$tmp_prompt" "$resolved_prompt"
else
# No context file — just remove the placeholder
sed -i 's/{{CONTEXT_PRELOADED}}//g' "$resolved_prompt"
fi

# Launch claude -p worker (uses default model from Claude Max subscription)
local exit_code=0
claude -p \
Expand Down Expand Up @@ -437,6 +495,7 @@ main() {
fi

init_state
build_context

# Count input offers (skip header, ignore blank lines)
local total_input
Expand Down
283 changes: 283 additions & 0 deletions comp-cache.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,283 @@
#!/usr/bin/env node
/**
* comp-cache.mjs — Compensation research cache for career-ops
*
* Caches salary data from WebSearch (Glassdoor, Levels.fyi, Blind) to avoid
* repeating the same searches across evaluations. Cache TTL is 60 days.
*
* Cache file: data/comp-cache.yml (gitignored, local only)
*
* CLI usage:
* node comp-cache.mjs lookup "senior-ai-engineer" "series-b" "remote"
* node comp-cache.mjs save "senior-ai-engineer" "series-b" "remote" '{"p25":180000,"p50":210000,"p75":260000,"sources":["glassdoor"]}'
* node comp-cache.mjs list
* node comp-cache.mjs purge # Remove expired entries
*
* Returns (stdout):
* lookup: JSON data if hit, "miss" if not found or expired
* save: "saved" on success
* list: JSON array of all cache keys with status
* purge: "purged N entries"
*/

import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const CACHE_FILE = join(__dirname, 'data', 'comp-cache.yml');
const DEFAULT_TTL_DAYS = 60;

// ---------------------------------------------------------------------------
// YAML helpers — minimal serializer/parser for flat key-value YAML.
// No external dependency (avoids adding js-yaml to package.json).
// Format: only supports string, number, and string[] leaf values.
// ---------------------------------------------------------------------------

/**
* Parse the custom comp-cache YAML format into a JS object.
* Format:
* entries:
* key-name:
* field: value
* sources: glassdoor,levels.fyi
*/
function parseCache(raw) {
const result = { entries: {} };
if (!raw || !raw.trim()) return result;

const lines = raw.split('\n');
let currentKey = null;

for (const line of lines) {
if (!line.trim() || line.trim().startsWith('#')) continue;

// Top-level "entries:" declaration
if (/^entries:\s*$/.test(line)) continue;

// Second-level key (2 spaces indent)
const keyMatch = line.match(/^ ([^:]+):\s*$/);
if (keyMatch) {
currentKey = keyMatch[1].trim();
result.entries[currentKey] = {};
continue;
}

// Third-level field (4 spaces indent)
if (currentKey) {
const fieldMatch = line.match(/^ ([^:]+):\s*(.*)$/);
if (fieldMatch) {
const field = fieldMatch[1].trim();
const value = fieldMatch[2].trim().replace(/^"(.*)"$/, '$1'); // strip quotes
// Try to parse numbers
const num = Number(value);
result.entries[currentKey][field] = isNaN(num) || value === '' ? value : num;
}
}
}

return result;
}

/**
* Serialize the cache object back to YAML string.
*/
function serializeCache(cache) {
const lines = ['entries:'];
for (const [key, entry] of Object.entries(cache.entries)) {
lines.push(` ${key}:`);
for (const [field, value] of Object.entries(entry)) {
const v = typeof value === 'string' && (value.includes(' ') || value === '')
? `"${value}"`
: value;
lines.push(` ${field}: ${v}`);
}
}
return lines.join('\n') + '\n';
}

// ---------------------------------------------------------------------------
// Key building
// ---------------------------------------------------------------------------

/**
* Build a normalized cache key from role, level, company stage, location.
* Format: {role-level}-{stage}-{location}-{YYYY-QN}
*/
function buildKey(role, stage, location) {
const normalize = (s) => s.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, '');

const now = new Date();
const quarter = Math.ceil((now.getMonth() + 1) / 3);
const period = `${now.getFullYear()}-Q${quarter}`;

return [normalize(role), normalize(stage), normalize(location), period]
.filter(Boolean)
.join('-');
}

// ---------------------------------------------------------------------------
// TTL check
// ---------------------------------------------------------------------------

/**
* Returns true if the cache entry is expired (past its TTL).
*/
function isExpired(entry) {
if (!entry.fetched) return true;
const ttlDays = entry.ttl_days || DEFAULT_TTL_DAYS;
const fetched = new Date(entry.fetched);
if (isNaN(fetched.getTime())) return true;
const now = new Date();
const ageDays = (now - fetched) / (1000 * 60 * 60 * 24);
return ageDays > ttlDays;
}

// ---------------------------------------------------------------------------
// Read / Write cache
// ---------------------------------------------------------------------------

function readCache() {
if (!existsSync(CACHE_FILE)) return { entries: {} };
try {
const raw = readFileSync(CACHE_FILE, 'utf-8');
return parseCache(raw);
} catch {
return { entries: {} };
}
}

function writeCache(cache) {
const dir = dirname(CACHE_FILE);
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
writeFileSync(CACHE_FILE, serializeCache(cache), 'utf-8');
}

// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------

/**
* Look up comp data by key. Returns parsed entry object or null if miss/expired.
*/
function lookup(key) {
const cache = readCache();
const entry = cache.entries[key];
if (!entry) return null;
if (isExpired(entry)) return null;
return entry;
}

/**
* Save comp data to cache.
* @param {string} key - Cache key
* @param {object} data - { p25, p50, p75, currency, sources: string[] }
*/
function save(key, data) {
const cache = readCache();
const today = new Date().toISOString().split('T')[0];
cache.entries[key] = {
p25: data.p25 ?? null,
p50: data.p50 ?? null,
p75: data.p75 ?? null,
currency: data.currency ?? 'USD',
sources: Array.isArray(data.sources) ? data.sources.join(',') : (data.sources ?? ''),
fetched: today,
ttl_days: DEFAULT_TTL_DAYS,
};
writeCache(cache);
}

/**
* List all cache keys with their status (fresh/expired).
*/
function listEntries() {
const cache = readCache();
return Object.entries(cache.entries).map(([key, entry]) => ({
key,
fetched: entry.fetched,
expired: isExpired(entry),
p50: entry.p50,
currency: entry.currency,
}));
}

/**
* Remove expired entries from cache.
* @returns {number} Count of purged entries
*/
function purge() {
const cache = readCache();
let count = 0;
for (const key of Object.keys(cache.entries)) {
if (isExpired(cache.entries[key])) {
delete cache.entries[key];
count++;
}
}
if (count > 0) writeCache(cache);
return count;
}

export { buildKey, lookup, save, listEntries, purge, isExpired, parseCache, serializeCache };

// ---------------------------------------------------------------------------
// CLI
// ---------------------------------------------------------------------------

if (process.argv[1] === fileURLToPath(import.meta.url)) {
const [,, command, ...args] = process.argv;

switch (command) {
case 'lookup': {
if (args.length < 3) {
console.error('Usage: comp-cache.mjs lookup <role> <stage> <location>');
process.exit(1);
}
const key = buildKey(args[0], args[1], args[2]);
const entry = lookup(key);
if (entry) {
console.log(JSON.stringify({ hit: true, key, ...entry }));
} else {
console.log('miss');
}
break;
}

case 'save': {
if (args.length < 4) {
console.error('Usage: comp-cache.mjs save <role> <stage> <location> <json-data>');
process.exit(1);
}
const key = buildKey(args[0], args[1], args[2]);
let data;
try {
data = JSON.parse(args[3]);
} catch {
console.error('Invalid JSON data');
process.exit(1);
}
save(key, data);
console.log(`saved:${key}`);
break;
}

case 'list': {
const entries = listEntries();
console.log(JSON.stringify(entries, null, 2));
break;
}

case 'purge': {
const count = purge();
console.log(`purged ${count} entries`);
break;
}

default:
console.error('Commands: lookup, save, list, purge');
process.exit(1);
}
}
Loading