@@ -39,6 +39,18 @@ def __init__(self):
3939 self .series_fields = 'api_detail_url,deck,description,id,name,publisher,site_detail_url,start_year'
4040 self .team_fields = 'characters,deck,description,id,image,name,site_detail_url'
4141
42+ # International reprint publishers
43+ # Ordered by # of issues (est.) for quick matching.
44+ self .int_pubs = [
45+ 2350 , # Panini (21.5k)
46+ 2812 , # Marvel UK (4.2k)
47+ 2094 , # Abril (2.1k)
48+ 2319 , # Planeta DeAgostini (2.1k)
49+ 2903 , # Ediciones Zinco (0.7k)
50+ 1133 , # Semic As (0.3k)
51+ 2961 , # Marvel Italia (0.04k)
52+ ]
53+
4254 #==================================================================================================
4355
4456 def import_comic_files (self ):
@@ -158,6 +170,7 @@ def _find_issue_match(self, filename):
158170 item_year = datetime .date .today ().year
159171 item_number = 1
160172 item_name = ''
173+ item_pub_id = ''
161174
162175 if 'cover_date' in issue :
163176 if issue ['cover_date' ]:
@@ -171,16 +184,48 @@ def _find_issue_match(self, filename):
171184 item_name = issue ['volume' ]['name' ]
172185 item_name = utils .remove_special_characters (item_name )
173186
174- if series_name and issue_number :
175- score = (fuzz .ratio (item_name .lower (), series_name .lower ()) + fuzz .partial_ratio (item_name .lower (), series_name .lower ())) / 2
176- if score >= 90 :
177- if item_number == issue_number :
178- if item_year == issue_year :
179- best_option_list .insert (0 , issue )
180- break
181- best_option_list .insert (0 , issue )
182-
183- found_issue = best_option_list [0 ] if best_option_list else None
187+ # Get publisher ID
188+ pub_check_params = self .base_params
189+ pub_check_params ['field_list' ] = 'publisher'
190+ pub_check_response = requests .get (
191+ self .baseurl + 'volume/4050-' + str (issue ['volume' ]['id' ]),
192+ params = pub_check_params ,
193+ headers = self .headers ,
194+ ).json ()
195+
196+ if 'publisher' in pub_check_response ['results' ]:
197+ if pub_check_response ['results' ]['publisher' ] is not None :
198+ item_pub_id = pub_check_response ['results' ]['publisher' ]['id' ]
199+
200+ # Get the match score (0-5)
201+ if series_name :
202+ # Fuzzy match the series name.
203+ # Gives a score between 0 and 2.
204+ score = (fuzz .ratio (item_name .lower (), series_name .lower ()) + fuzz .partial_ratio (item_name .lower (), series_name .lower ())) / 100
205+
206+ # If the issue number is the same, add 1 point.
207+ if item_number == issue_number :
208+ score += 1
209+
210+ # If the year is the same, add 2 points.
211+ if issue_year != '' :
212+ if item_year == issue_year :
213+ score += 2
214+
215+ # If the publisher is an international reprint, subtract a point.
216+ if item_pub_id != '' :
217+ if item_pub_id in self .int_pubs :
218+ score = score - 1 if score > 1 else 0
219+
220+ # Add the issue and it's score to the list.
221+ best_option_list .insert (0 , {
222+ 'score' : score ,
223+ 'issue' : issue ,
224+ })
225+
226+ # Sort the list by score, and pick the top scoring issue.
227+ best_option_list .sort (key = lambda x : x ['score' ], reverse = True )
228+ found_issue = best_option_list [0 ]['issue' ] if best_option_list else None
184229
185230 cvid = found_issue ['id' ] if found_issue else ''
186231
0 commit comments