resume-classifier/server.js at main · hellogustav/resume-classifier · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import 'dotenv/config';
import express from 'express';
import multer from 'multer';
import path from 'path';
import { fileURLToPath } from 'url';

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

const app = express();
const PORT = process.env.PORT || 3000;

// Configure multer for file uploads
const upload = multer({
  storage: multer.memoryStorage(),
  limits: { fileSize: 10 * 1024 * 1024 } // 10MB limit
});

// Middleware
app.use(express.json());
app.use(express.static('public'));

// Classification prompt
const classificationPrompt = `You are a binary document classifier for an applicant tracking system (ATS).
Your ONLY valid output is a single JSON object with exactly one boolean field:
{"isResume": true} or {"isResume": false}
Do not output explanations, extra fields, code fences, or any other text.

Primary objective (risk-averse):
• Return true ONLY when the content is clearly a candidate resume/CV.
• If uncertain in any way, return false.

Positive evidence (need multiple signals unless overwhelmingly obvious):
• Section headers (any language): experience/work history/employment; education/formation/ausbildung; skills/competences; summary/profile; certifications; languages; projects; publications; references.
• Repeated employment date spans across roles (e.g., "2019–2023", "03/2020 - 07/2023", "Mar 2019 – Present").
• Person's name with contact info (email/phone) near the top.
• ROLE — COMPANY lines with bullet points describing responsibilities.
• Markdown headings (#, ##) and bullets that structure those sections.

Strong NOT-resume archetypes (return false even if a name/email appears):
• Portfolios, personal sites, GitHub/Behance/Dribbble pages, project case studies, design write-ups.
• Job descriptions / postings ("we are hiring", "apply now", "your responsibilities", "requirements", "benefits", "salary/compensation", "equal opportunity", "about the company", careers links).
• Identification or civil docs: passports, national IDs, driver's licenses, visas, SSNs.
• Single certificates or training confirmations (e.g., AWS/PMP certificate), badges, course completions.
• Academic records: transcripts, diplomas, grade reports.
• Letters: cover letters, recommendations, offer letters, contracts/NDAs.
• Admin/HR: timesheets, payslips, background checks, onboarding forms.
• Candidate marketing sheets, multi-candidate lists, skills matrices, intake forms.
• Blog posts, bios, LinkedIn "About", news articles, legal docs, code files.

Short text rule:
• If visible content < 200 characters, return false unless it clearly states CV/Resume (e.g., "Curriculum Vitae"/"Lebenslauf"/"Резюме"/"履歴書") AND also includes contact info. When in doubt, return false.

General instructions:
• Treat Markdown symbols (#, -, *) as structure cues; ignore filenames/metadata/links when deciding.
• Non-text or empty → false.
• Output format is STRICT: return ONLY {"isResume": true} or {"isResume": false} with no surrounding whitespace.`;

// Response JSON schema
const responseSchema = {
  name: "resume_verdict",
  strict: true,
  schema: {
    type: "object",
    properties: {
      isResume: {
        type: "boolean",
        description: "True if the input is a resume, otherwise false."
      }
    },
    required: ["isResume"],
    additionalProperties: false
  }
}

// OpenAI API client
async function classifyWithOpenAI(text) {
  const OPENAI_API_KEY = process.env.OPENAI_API_KEY;

  if (!OPENAI_API_KEY) {
    throw new Error('OPENAI_API_KEY environment variable is not set');
  }

  const startTime = Date.now();

  try {
    const response = await fetch('https://api.openai.com/v1/responses', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${OPENAI_API_KEY}`
      },
      body: JSON.stringify({
        model: 'gpt-5-nano',
        input: text,
        instructions: classificationPrompt,
        reasoning: {
          effort: 'minimal'
        },
        text: {
          verbosity: 'low',
          format: {
            type: 'json_schema',
            name: responseSchema.name,
            schema: responseSchema.schema,
            strict: responseSchema.strict
          }
        }
      })
    });

    const duration = (Date.now() - startTime) / 1000; // Convert to seconds

    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(`OpenAI API error: ${response.status} ${errorText}`);
    }

    const data = await response.json();

    // Extract the classification result from the response
    let isResume = false;
    if (data.output && data.output.length > 0) {
      const message = data.output.find(item => item.type === 'message');
      if (message && message.content && message.content.length > 0) {
        const textContent = message.content.find(c => c.type === 'output_text');
        if (textContent && textContent.text) {
          try {
            const parsed = JSON.parse(textContent.text);
            isResume = parsed.isResume;
          } catch (e) {
            console.error('Error parsing response text:', e);
          }
        }
      }
    }

    // Extract token usage and calculate cost
    const usage = data.usage || {};
    const inputTokens = usage.input_tokens || 0;
    const cachedTokens = usage.input_tokens_details?.cached_tokens || 0;
    const outputTokens = usage.output_tokens || 0;

    // Pricing per 1M tokens
    const INPUT_PRICE_PER_1M = 0.05;
    const CACHED_PRICE_PER_1M = 0.005;
    const OUTPUT_PRICE_PER_1M = 0.40;

    const inputCost = ((inputTokens - cachedTokens) * INPUT_PRICE_PER_1M) / 1000000;
    const cachedCost = (cachedTokens * CACHED_PRICE_PER_1M) / 1000000;
    const outputCost = (outputTokens * OUTPUT_PRICE_PER_1M) / 1000000;
    const totalCost = inputCost + cachedCost + outputCost;

    return {
      isResume,
      duration,
      inputTokens,
      outputTokens,
      cost: totalCost,
      rawResponse: data
    };
  } catch (error) {
    return {
      error: error.message,
      duration: (Date.now() - startTime) / 1000 // Convert to seconds
    };
  }
}

// API endpoints
app.post('/api/classify', upload.array('files'), async (req, res) => {
  try {
    if (!req.files || req.files.length === 0) {
      return res.status(400).json({ error: 'No files uploaded' });
    }

    // Process all files in parallel
    const results = await Promise.all(
      req.files.map(async (file) => {
        const text = file.buffer.toString('utf-8');
        const result = await classifyWithOpenAI(text);

        return {
          filename: file.originalname,
          isResume: result.isResume,
          duration: result.duration,
          inputTokens: result.inputTokens,
          outputTokens: result.outputTokens,
          cost: result.cost,
          error: result.error,
          size: file.size
        };
      })
    );

    res.json({ results });
  } catch (error) {
    console.error('Error processing files:', error);
    res.status(500).json({ error: error.message });
  }
});

// Health check endpoint
app.get('/api/health', (req, res) => {
  res.json({ status: 'ok' });
});

// Start server
app.listen(PORT, () => {
  console.log(`Server running on http://localhost:${PORT}`);
  console.log(`Make sure to set OPENAI_API_KEY environment variable`);
});