-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
104 lines (82 loc) · 2.71 KB
/
main.py
File metadata and controls
104 lines (82 loc) · 2.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from pathlib import Path
from flask import Flask, request, render_template, send_from_directory
import os
from pdfminer.high_level import extract_text
import subprocess
import json
import sys
import fitz
import re
def normalize_text(text):
return re.sub(r'[^\w\s]', '', text.lower()).split()
def fuzzy_match(a, b, threshold=0.1):
a_words = normalize_text(a)
b_words = normalize_text(b)
if not a_words or not b_words:
return False
overlap = len(set(a_words) & set(b_words))
ratio = overlap / max(len(a_words), len(b_words))
return ratio >= threshold
def get_color(labels):
score = 0
if 'Metrics' in labels: score += 1
if 'Actionable' in labels: score += 1
if 'Domain-Specific' in labels: score += 1
if score == 0:
return (1, 0, 0) # Red
elif score == 1:
return (1, 0.5, 0) # Orange
elif score == 2:
return (1, 1, 0) # Yellow
else:
return (0, 1, 0) # Green
app = Flask(__name__)
app.config['TEMP_FOLDER'] = 'temp'
os.makedirs(app.config['TEMP_FOLDER'], exist_ok=True)
def return_text(file_path):
return extract_text(file_path)
@app.route('/')
def index():
return render_template('home.html')
@app.route('/about')
def about():
return render_template('about.html')
@app.route('/upload')
def upload():
for child in Path(app.config['TEMP_FOLDER']).iterdir():
os.remove(child)
return render_template('upload.html')
@app.route('/upload', methods=['POST'])
def upload_file():
if 'pdf_file' not in request.files:
return 'No file part'
file = request.files['pdf_file']
if file.filename == '':
return 'No selected file'
if file and file.filename.lower().endswith('.pdf'):
temp_path = os.path.join(app.config['TEMP_FOLDER'], file.filename)
file.save(temp_path)
resume_text = return_text(temp_path)
python_executable = sys.executable
result = subprocess.run(
[python_executable, 'scan.py'],
capture_output=True,
text=True
)
output = result.stdout.strip()
image_dir = 'temp'
image_files = [f for f in os.listdir(image_dir) if f.startswith("page_") and f.endswith(".png")]
num_pages = len(image_files)
return render_template(
'result.html',
pdf_filename=file.filename,
resume_text=resume_text,
scanned_result = json.loads(output),
num_pages = num_pages
)
return 'Invalid file type'
@app.route('/temp/<path:filename>')
def serve_temp_file(filename):
return send_from_directory(app.config['TEMP_FOLDER'], filename)
if __name__ == '__main__':
app.run(debug=True)