-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain_logic.py
More file actions
90 lines (67 loc) · 2.33 KB
/
main_logic.py
File metadata and controls
90 lines (67 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import pymupdf
import re
import os
import locale
import tempfile
from pypdf import PdfReader, PdfWriter
def merge_pdfs(pdf_files):
merge_writer = PdfWriter()
# Use the provided list of file paths directly instead of scanning a folder
for filepath in pdf_files:
reader = PdfReader(filepath)
for page in reader.pages:
merge_writer.add_page(page)
# Create a temporary file for the merged PDF
with tempfile.NamedTemporaryFile(delete=False, suffix="_merged.pdf") as temp_file:
temp_path = temp_file.name
merge_writer.write(temp_file)
# Return the path to the temporary merged file
return temp_path
def find_name(text):
try:
beginning = text.find("\nSoyadı\xa0/\xa0Ünvanı\n:\n")
end = text.find("\nAdı\n:\n")
return text[beginning + 19:end]
except:
return "Null"
def page_traverse(pdf_path):
global pages
global_page_number = 0
doc = pymupdf.open(pdf_path)
pages = []
for page in doc:
receiver_name = find_name(page.get_text())
page = {
"page_number": global_page_number,
"page_text": page.get_text("text"),
"receiver": receiver_name
}
pages.append(page)
global_page_number += 1
def sort_pages(pages):
return sorted(pages, key=lambda p: locale.strxfrm(p['receiver']))
def sort_and_save(merged_pdf_path, output_path):
sorted_pages = sort_pages(pages)
page_order = []
for page in sorted_pages:
page_order.append(page['page_number'])
reader = PdfReader(merged_pdf_path)
writer = PdfWriter()
for i in page_order:
writer.add_page(reader.pages[i])
with open(output_path, "wb") as f:
writer.write(f)
def main(pdf_files, output_path):
locale.setlocale(locale.LC_COLLATE, 'tr_TR.UTF-8')
# Merge the input PDFs into a temporary merged file
merged_pdf_path = merge_pdfs(pdf_files)
# Process the merged PDF
page_traverse(merged_pdf_path)
# Sort the pages and save to the output path
sort_and_save(merged_pdf_path, output_path)
# Clean up the temporary merged PDF file
try:
os.unlink(merged_pdf_path)
except Exception as e:
print(f"Could not delete temporary merged file {merged_pdf_path}: {e}")
return output_path