-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathalcreatures.py
More file actions
155 lines (140 loc) · 5.41 KB
/
alcreatures.py
File metadata and controls
155 lines (140 loc) · 5.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from email.mime import image
from genericpath import isfile
from pdfminer.high_level import extract_pages
from pdfminer.layout import LTTextContainer, LTChar
from pathlib import Path
import requests
import string
import re
import sys
import hashlib
from openai import OpenAI
import os
def download_image(image_url, local_filename):
response = requests.get(image_url)
if response.status_code == 200:
with open(local_filename, 'wb') as file:
file.write(response.content)
else:
print(f"Failed to download image. Status code: {response.status_code}")
size_radius = {
'tiny': 0.5,
'small': 1,
'medium': 1,
'large': 2,
'huge': 3,
'gargantuan': 4
}
def find_count_before_creature(text, creature):
count_values = {
'a': 1,
'an': 1,
'one': 1,
'two': 2,
'three': 3,
'four': 4,
'five': 5,
'six': 6,
'seven': 7,
'eight': 8,
'nine': 9,
'ten': 10
}
pattern = r'\b(a|an|one|two|three|four|five|six|seven|eight|nine|ten)\s+' + re.escape(creature) + r's?\b'
match = re.search(pattern, text, re.IGNORECASE)
if match:
return count_values[match.group(1).lower()]
else:
return None
def normalize_and_hash(text):
normalized_text = re.sub(r'\s+', ' ', text).strip().lower()
return hashlib.md5(normalized_text.encode('utf-8')).hexdigest()
def is_header(s: str) -> bool:
lines = s.split('\n')
return lines[0] == lines[1]
def size_category(s: str) -> str:
pattern = r'\b(Tiny|Large|Medium|Small|Gargantuan|Huge)\b'
match = re.search(pattern, s, re.IGNORECASE)
return match.group().lower() if match else ''
def type_category(input_string):
cleaned_input = input_string.translate(str.maketrans('', '', string.punctuation))
words = cleaned_input.lower().split()
return words[1] if len(words) > 1 else ""
def count_creatures(pdf_path, creatures):
creature_names = [c['name'].lower() for c in creatures if 'name' in c]
seen_hashes = set()
e = 0
for page_layout in extract_pages(pdf_path):
for element in page_layout:
e += 1
if isinstance(element, LTTextContainer):
text = element.get_text().strip().lower()
# skip if we've seen this line before
text_hash = normalize_and_hash(text)
if text_hash in seen_hashes:
continue
seen_hashes.add(text_hash)
for creature in creatures:
name = creature['name'].lower()
if name in text:
if "adjustment" in text:
continue
creature_count = find_count_before_creature(text, name)
if creature_count:
creature['count'] += creature_count
return creatures
def extract_creatures(pdf_path):
creatures = []
pages = extract_pages(pdf_path)
for page_layout in pages:
# Convert page_layout to an iterator to manually control iteration
elements = iter(page_layout)
try:
current_element = next(elements)
while True:
next_element = next(elements)
if isinstance(current_element, LTTextContainer):
text = current_element.get_text()
if is_header(text):
# Creature statblocks begin with two lines of the form:
# <creature name header>
# <size> <type>, <alignment>
if isinstance(next_element, LTTextContainer):
next_text = next_element.get_text()
size = size_category(next_text)
if size:
creature = text.split('\n')[0]
s = size.lower()
filename = f'{creature.lower().replace(" ", "_")}_{s}_{size_radius[s]}in'
type = type_category(next_text)
creatures.append({"name": creature,
"size": size,
"type": type,
"filename": filename,
"count": 1})
else:
break
current_element = next_element
except StopIteration:
pass
return creatures
# Usage
if len(sys.argv) < 2:
print("Usage: python script_name.py <pdf_file_path>")
sys.exit(1)
pdf_path = sys.argv[1]
print('Extracting creatures...')
creatures = extract_creatures(pdf_path)
print(f'Found {len(creatures)} creatures.')
print('Adjusting creature counts...')
creatures = count_creatures(pdf_path, creatures)
client = OpenAI()
for creature in creatures:
creature['filename'] += f'_{creature["count"]}ct.webp'
if os.path.isfile(creature["filename"]):
print(f'Found {creature["filename"]}.')
continue
print(f'Generating {creature["filename"]}...')
image = client.images.generate(prompt=f'a {creature["size"]} {creature["type"]}, similar to a {creature["name"]}, in a gritty fantasy style. Dramatic full-body shot.', model='dall-e-3')
download_image(image.data[0].url, creature["filename"])
print('Done.')