Skip to content

Commit 028b539

Browse files
committed
Improved Comic Vine matching.
1 parent e63923b commit 028b539

1 file changed

Lines changed: 55 additions & 10 deletions

File tree

comics/utils/comicimporter.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@ def __init__(self):
3939
self.series_fields = 'api_detail_url,deck,description,id,name,publisher,site_detail_url,start_year'
4040
self.team_fields = 'characters,deck,description,id,image,name,site_detail_url'
4141

42+
# International reprint publishers
43+
# Ordered by # of issues (est.) for quick matching.
44+
self.int_pubs = [
45+
2350, # Panini (21.5k)
46+
2812, # Marvel UK (4.2k)
47+
2094, # Abril (2.1k)
48+
2319, # Planeta DeAgostini (2.1k)
49+
2903, # Ediciones Zinco (0.7k)
50+
1133, # Semic As (0.3k)
51+
2961, # Marvel Italia (0.04k)
52+
]
53+
4254
#==================================================================================================
4355

4456
def import_comic_files(self):
@@ -158,6 +170,7 @@ def _find_issue_match(self, filename):
158170
item_year = datetime.date.today().year
159171
item_number = 1
160172
item_name = ''
173+
item_pub_id = ''
161174

162175
if 'cover_date' in issue:
163176
if issue['cover_date']:
@@ -171,16 +184,48 @@ def _find_issue_match(self, filename):
171184
item_name = issue['volume']['name']
172185
item_name = utils.remove_special_characters(item_name)
173186

174-
if series_name and issue_number:
175-
score = (fuzz.ratio(item_name.lower(), series_name.lower()) + fuzz.partial_ratio(item_name.lower(), series_name.lower())) / 2
176-
if score >= 90:
177-
if item_number == issue_number:
178-
if item_year == issue_year:
179-
best_option_list.insert(0, issue)
180-
break
181-
best_option_list.insert(0, issue)
182-
183-
found_issue = best_option_list[0] if best_option_list else None
187+
# Get publisher ID
188+
pub_check_params = self.base_params
189+
pub_check_params['field_list'] = 'publisher'
190+
pub_check_response = requests.get(
191+
self.baseurl + 'volume/4050-' + str(issue['volume']['id']),
192+
params=pub_check_params,
193+
headers=self.headers,
194+
).json()
195+
196+
if 'publisher' in pub_check_response['results']:
197+
if pub_check_response['results']['publisher'] is not None:
198+
item_pub_id = pub_check_response['results']['publisher']['id']
199+
200+
# Get the match score (0-5)
201+
if series_name:
202+
# Fuzzy match the series name.
203+
# Gives a score between 0 and 2.
204+
score = (fuzz.ratio(item_name.lower(), series_name.lower()) + fuzz.partial_ratio(item_name.lower(), series_name.lower())) / 100
205+
206+
# If the issue number is the same, add 1 point.
207+
if item_number == issue_number:
208+
score += 1
209+
210+
# If the year is the same, add 2 points.
211+
if issue_year != '':
212+
if item_year == issue_year:
213+
score += 2
214+
215+
# If the publisher is an international reprint, subtract a point.
216+
if item_pub_id != '':
217+
if item_pub_id in self.int_pubs:
218+
score = score - 1 if score > 1 else 0
219+
220+
# Add the issue and it's score to the list.
221+
best_option_list.insert(0, {
222+
'score': score,
223+
'issue': issue,
224+
})
225+
226+
# Sort the list by score, and pick the top scoring issue.
227+
best_option_list.sort(key=lambda x: x['score'], reverse=True)
228+
found_issue = best_option_list[0]['issue'] if best_option_list else None
184229

185230
cvid = found_issue['id'] if found_issue else ''
186231

0 commit comments

Comments
 (0)