diff --git a/Contents/Code/AniDBVectorSearch.py b/Contents/Code/AniDBVectorSearch.py new file mode 100644 index 00000000..4ac7ea4d --- /dev/null +++ b/Contents/Code/AniDBVectorSearch.py @@ -0,0 +1,28 @@ +import urllib +import json + +def compute_integer_scores_for_vector_search(items): + sorted_items = sorted(items, key=lambda item: item['score'], reverse=True) + previous_score = 101 + for item in sorted_items: + raw_score = int(round(item['score'] * 100)) + + if raw_score >= previous_score: + # Reduce down to previous score less 1 if collision or higher + raw_score = previous_score - 1 + + item['score'] = raw_score + previous_score = raw_score + + return sorted_items + +def get_results_from_vector_search(orig_title): + api_url = "%s?name=%s" % (Prefs["vector_search_api"], urllib.quote(orig_title)) + + try: + response = HTTP.Request(api_url, cacheTime=CACHE_1DAY).content + response_dict = json.loads(response) + return compute_integer_scores_for_vector_search(response_dict) + except Exception as e: + Log.Debug("Got issue querying vector search API: " + str(e)) + return None \ No newline at end of file diff --git a/Contents/Code/__init__.py b/Contents/Code/__init__.py index 5b3cb99e..50234181 100644 --- a/Contents/Code/__init__.py +++ b/Contents/Code/__init__.py @@ -9,6 +9,8 @@ import re import os import datetime +import json +import urllib # HAMA Modules # import common # Functions: GetPlexLibraries, write_logs, UpdateMeta Variables: PlexRoot, FieldListMovies, FieldListSeries, FieldListEpisodes, DefaultPrefs, SourceList @@ -26,6 +28,7 @@ import AniList # Functions: GetMetadata Variables: None import Local # Functions: GetMetadata Variables: None import anidb34 # Functions: AdjustMapping Variables: None +import AniDBVectorSearch ### Variables ### @@ -99,6 +102,28 @@ def Search(results, media, lang, manual, movie): results.Append(MetadataSearchResult(id='clear-cache', name='Plex web cache cleared', year=media.year, lang=lang, score=0)) return + # Shortcut other search methods and use vector search if enabled + if Prefs["vector_search_enabled"] and Prefs["vector_search_api"] is not None: + Log.Debug("Searching for '%s' using vector search API" % (orig_title)) + + response_content = AniDBVectorSearch.get_results_from_vector_search(orig_title) + if response_content is None: + Log.Debug("Got no result from vector search API.") + elif "error" in response_content: + Log.Debug("Got error result from vector search API: %s" % (response_content["error"])) + else: + for response_entry in response_content: + name = "%s [%s]" % (response_entry["name"], response_entry["id"]) + Log.Debug("Got result from vector search API: %s" % name) + results.Append(MetadataSearchResult(id=response_entry["id"], + name=name, + year=media.year, + lang=lang, + score=response_entry["score"])) + Log.Close() + return + + # Continue with normal search as we failed to use vector search (or it's not enabled) ### Check if a guid is specified "Show name [anidb-id]" ### Log.Info('--- force id ---'.ljust(157, '-')) if orig_title and orig_title.isdigit(): orig_title = "xxx [tvdb-{}]".format(orig_title) #Support tvdbid as title, allow to support Xattr from FileBot with tvdbid filled in diff --git a/Contents/DefaultPrefs.json b/Contents/DefaultPrefs.json index 9e8e3cd6..373b00f1 100644 --- a/Contents/DefaultPrefs.json +++ b/Contents/DefaultPrefs.json @@ -1,47 +1,283 @@ [ - { "id": "reset_to_defaults", "label": "Reset to Defaults", "type": "bool", "default": "false" }, - { "id": "SerieLanguagePriority", "label": "Serie Language Priority", "type": "text", "default": "main, en, ja" }, - { "id": "EpisodeLanguagePriority", "label": "Episode Language Priority", "type": "text", "default": "main, en, ja" }, - { "id": "PosterLanguagePriority", "label": "TheTVDB Poster Language Priority","type": "text", "default": "en" }, - { "id": "season_poster_failover", "label": "Season Poster failover", "type": "enum", "default": "None", "values" : ["None","series","series different poster"] }, - { "id": "load_all_poster_sources", "label": "Load all poster metadata sources","type": "bool", "default": "false" }, - { "id": "AnidbGenresAddWeights", "label": "AniDB include weighted genres", "type": "bool", "default": "false" }, - { "id": "MinimumWeight", "label": "AniDB genre minimum weight", "type": "enum", "default": "400", "values" : ["600","500","400","300","200","100","0"] }, - { "id": "adult", "label": "Include adult content", "type": "bool", "default": "false" }, - { "id": "OMDbApiKey", "label": "OMDb Api Key", "type": "text", "default": "None", "option": "hidden", "secure": "true" }, - { "id": "title", "label": "T-EM 'title'", "type": "text", "default": "AniDB, TheTVDB | TheTVDB, AniDB" }, - { "id": "title_sort", "label": "T-EM 'title_sort'", "type": "text", "default": "AniDB,TheTVDB" }, - { "id": "originally_available_at", "label": "T-EM 'originally_available_at'", "type": "text", "default": "AniDB,TheTVDB" }, - { "id": "content_rating", "label": "T-EM 'content_rating'", "type": "text", "default": "AniDB,TheTVDB" }, - { "id": "content_rating_age", "label": "--E- 'content_rating_age'", "type": "text", "default": "None" }, - { "id": "original_title", "label": "T-EM 'original_title'", "type": "text", "default": "AniDB,TheTVDB" }, - { "id": "studio", "label": "T--M 'studio'", "type": "text", "default": "AnimeLists, AniDB, TheMovieDb" }, - { "id": "tagline", "label": "T--M 'tagline'", "type": "text", "default": "TheMovieDb" }, - { "id": "summary", "label": "TSEM 'summary'", "type": "text", "default": "TheTVDB, AniDB" }, - { "id": "directors", "label": "--EM 'directors'", "type": "text", "default": "AniDB,TheTVDB" }, - { "id": "producers", "label": "--EM 'producers'", "type": "text", "default": "AniDB,TheTVDB" }, - { "id": "countries", "label": "T--M 'countries", "type": "text", "default": "TheMovieDb" }, - { "id": "genres", "label": "T--M 'genres'", "type": "text", "default": "TheTVDB, AniDB, MyAnimeList, TheMovieDb, OMDb" }, - { "id": "tags", "label": "T--M 'tags'", "type": "text", "default": "MyAnimeList" }, - { "id": "writers", "label": "T-EM 'writers'", "type": "text", "default": "AniDB,TheTVDB" }, - { "id": "collections", "label": "T--M 'collections'", "type": "text", "default": "Local, AniDB, TheMovieDb, AnimeLists" }, - { "id": "duration", "label": "T--M 'duration'", "type": "text", "default": "TheTVDB, AniDB, MyAnimeList, TheMovieDb, OMDb" }, - { "id": "roles", "label": "T--M 'roles'", "type": "text", "default": "AniDB, TheTVDB" }, - { "id": "year", "label": "---M 'year'", "type": "text", "default": "AniDB" }, - { "id": "trivia", "label": "---M 'trivia'", "type": "text", "default": "None" }, - { "id": "quotes", "label": "---M 'quotes'", "type": "text", "default": "None" }, - { "id": "themes", "label": "T--M 'themes'", "type": "text", "default": "TVTunes, Plex" }, - { "id": "rating", "label": "T-EM 'rating'", "type": "text", "default": "AniDB, TheTVDB, MyAnimeList, TheMovieDb, OMDb | TheTVDB, AniDB" }, - { "id": "rating_image", "label": "T--M 'rating_image'", "type": "text", "default": "None" }, - { "id": "audience_rating", "label": "T--M 'audience_rating'", "type": "text", "default": "None" }, - { "id": "audience_rating_image", "label": "T--M 'audience_rating_image'", "type": "text", "default": "None" }, - { "id": "guest_stars", "label": "T--M 'guest_stars'", "type": "text", "default": "TheTVDB" }, - { "id": "posters", "label": "TS-M 'poster'", "type": "text", "default": "tvdb4, TheMovieDb, TheTVDB, AniList, FanartTV, AniDB" }, - { "id": "art", "label": "T--M 'art'", "type": "text", "default": "TheTVDB, TheMovieDb, FanartTV" }, - { "id": "banners", "label": "TS-- 'banners'", "type": "text", "default": "TheTVDB, AniList" }, - { "id": "thumbs", "label": "--E- 'thumbs'", "type": "text", "default": "TheTVDB" }, - { "id": "reviews", "label": "T--M 'reviews'", "type": "text", "default": "None" }, - { "id": "extras", "label": "T--M 'extras'", "type": "text", "default": "None" }, - { "id": "rating_count", "label": "T--M 'rating_count'", "type": "text", "default": "None" }, - { "id": "absolute_index", "label": "--E- 'absolute_index'", "type": "text", "default": "TheTVDB" } + { + "id": "reset_to_defaults", + "label": "Reset to Defaults", + "type": "bool", + "default": "false" + }, + { + "id": "SerieLanguagePriority", + "label": "Serie Language Priority", + "type": "text", + "default": "main, en, ja" + }, + { + "id": "EpisodeLanguagePriority", + "label": "Episode Language Priority", + "type": "text", + "default": "main, en, ja" + }, + { + "id": "PosterLanguagePriority", + "label": "TheTVDB Poster Language Priority", + "type": "text", + "default": "en" + }, + { + "id": "season_poster_failover", + "label": "Season Poster failover", + "type": "enum", + "default": "None", + "values": ["None", "series", "series different poster"] + }, + { + "id": "load_all_poster_sources", + "label": "Load all poster metadata sources", + "type": "bool", + "default": "false" + }, + { + "id": "AnidbGenresAddWeights", + "label": "AniDB include weighted genres", + "type": "bool", + "default": "false" + }, + { + "id": "MinimumWeight", + "label": "AniDB genre minimum weight", + "type": "enum", + "default": "400", + "values": ["600", "500", "400", "300", "200", "100", "0"] + }, + { + "id": "adult", + "label": "Include adult content", + "type": "bool", + "default": "false" + }, + { + "id": "OMDbApiKey", + "label": "OMDb Api Key", + "type": "text", + "default": "None", + "option": "hidden", + "secure": "true" + }, + { + "id": "title", + "label": "T-EM 'title'", + "type": "text", + "default": "AniDB, TheTVDB | TheTVDB, AniDB" + }, + { + "id": "title_sort", + "label": "T-EM 'title_sort'", + "type": "text", + "default": "AniDB,TheTVDB" + }, + { + "id": "originally_available_at", + "label": "T-EM 'originally_available_at'", + "type": "text", + "default": "AniDB,TheTVDB" + }, + { + "id": "content_rating", + "label": "T-EM 'content_rating'", + "type": "text", + "default": "AniDB,TheTVDB" + }, + { + "id": "content_rating_age", + "label": "--E- 'content_rating_age'", + "type": "text", + "default": "None" + }, + { + "id": "original_title", + "label": "T-EM 'original_title'", + "type": "text", + "default": "AniDB,TheTVDB" + }, + { + "id": "studio", + "label": "T--M 'studio'", + "type": "text", + "default": "AnimeLists, AniDB, TheMovieDb" + }, + { + "id": "tagline", + "label": "T--M 'tagline'", + "type": "text", + "default": "TheMovieDb" + }, + { + "id": "summary", + "label": "TSEM 'summary'", + "type": "text", + "default": "TheTVDB, AniDB" + }, + { + "id": "directors", + "label": "--EM 'directors'", + "type": "text", + "default": "AniDB,TheTVDB" + }, + { + "id": "producers", + "label": "--EM 'producers'", + "type": "text", + "default": "AniDB,TheTVDB" + }, + { + "id": "countries", + "label": "T--M 'countries", + "type": "text", + "default": "TheMovieDb" + }, + { + "id": "genres", + "label": "T--M 'genres'", + "type": "text", + "default": "TheTVDB, AniDB, MyAnimeList, TheMovieDb, OMDb" + }, + { + "id": "tags", + "label": "T--M 'tags'", + "type": "text", + "default": "MyAnimeList" + }, + { + "id": "writers", + "label": "T-EM 'writers'", + "type": "text", + "default": "AniDB,TheTVDB" + }, + { + "id": "collections", + "label": "T--M 'collections'", + "type": "text", + "default": "Local, AniDB, TheMovieDb, AnimeLists" + }, + { + "id": "duration", + "label": "T--M 'duration'", + "type": "text", + "default": "TheTVDB, AniDB, MyAnimeList, TheMovieDb, OMDb" + }, + { + "id": "roles", + "label": "T--M 'roles'", + "type": "text", + "default": "AniDB, TheTVDB" + }, + { "id": "year", "label": "---M 'year'", "type": "text", "default": "AniDB" }, + { + "id": "trivia", + "label": "---M 'trivia'", + "type": "text", + "default": "None" + }, + { + "id": "quotes", + "label": "---M 'quotes'", + "type": "text", + "default": "None" + }, + { + "id": "themes", + "label": "T--M 'themes'", + "type": "text", + "default": "TVTunes, Plex" + }, + { + "id": "rating", + "label": "T-EM 'rating'", + "type": "text", + "default": "AniDB, TheTVDB, MyAnimeList, TheMovieDb, OMDb | TheTVDB, AniDB" + }, + { + "id": "rating_image", + "label": "T--M 'rating_image'", + "type": "text", + "default": "None" + }, + { + "id": "audience_rating", + "label": "T--M 'audience_rating'", + "type": "text", + "default": "None" + }, + { + "id": "audience_rating_image", + "label": "T--M 'audience_rating_image'", + "type": "text", + "default": "None" + }, + { + "id": "guest_stars", + "label": "T--M 'guest_stars'", + "type": "text", + "default": "TheTVDB" + }, + { + "id": "posters", + "label": "TS-M 'poster'", + "type": "text", + "default": "tvdb4, TheMovieDb, TheTVDB, AniList, FanartTV, AniDB" + }, + { + "id": "art", + "label": "T--M 'art'", + "type": "text", + "default": "TheTVDB, TheMovieDb, FanartTV" + }, + { + "id": "banners", + "label": "TS-- 'banners'", + "type": "text", + "default": "TheTVDB, AniList" + }, + { + "id": "thumbs", + "label": "--E- 'thumbs'", + "type": "text", + "default": "TheTVDB" + }, + { + "id": "reviews", + "label": "T--M 'reviews'", + "type": "text", + "default": "None" + }, + { + "id": "extras", + "label": "T--M 'extras'", + "type": "text", + "default": "None" + }, + { + "id": "rating_count", + "label": "T--M 'rating_count'", + "type": "text", + "default": "None" + }, + { + "id": "absolute_index", + "label": "--E- 'absolute_index'", + "type": "text", + "default": "TheTVDB" + }, + { + "id": "vector_search_enabled", + "label": "Use unofficial AniDB vector search API to match anime series/movies and circumvent other searching methods.", + "type": "bool", + "default": "false" + }, + { + "id": "vector_search_api", + "label": "URL of the AniDB vector search endpoint API. Visit https://github.com/khell/anidb-semantic-search-api to optionally host it yourself.\n\nIf you choose to use the default hosted API here. no warranties or guarantees are implied of any kind, and all queries will be transiently logged on the API server (until the next server reboot). It is strongly recommended to host the API yourself.", + "type": "text", + "default": "https://anidb.khell.net/api/anidb/id" + } ] diff --git a/README.md b/README.md index b7cfddda..dc255dc9 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,99 @@ You can use anidb.id file in series or Series/Extras folder or in the serie name Agents' update() method is called only when adding new items to your library or when doing a "Force Refresh" or a "Fix Incorrect Match". +AniDB Vector Search +============= + +You can use Khell's [AniDB vector search](https://github.com/khell/anidb-semantic-search-api) as an alternative to standard matching. To opt in, scroll down to the bottom of the agent configuration to find the settings to enable AniDB vector search. This will send your names from the scanner to the configured AniDB vector search endpoint to match it against an AniDB id. Using this means that you do not need to follow any particular naming rules for your folders, as it will use a machine-learning semantic search model to match your series name to the closest AniDB match. For example, I like to name my directories with both English and Japanese names, like so: + +``` +Sanzoku no Musume Ronja「山賊の娘ローニャ」 +``` + +Previously, this sort of naming would require the suffixing of a source id to the end of the folder name, such as: + +``` +Sanzoku no Musume Ronja「山賊の娘ローニャ」[anidb-10421] +``` + +Without this prefix, the automatic matching tends to break and requires a manual Fix Match unless you follow standard naming conventions. Using this vector search, such issues are no longer a problem, as the API will return to the agent (again for the given example): + +``` +[ + { + "id": "anidb-10421", + "language": "ja", + "name": "山賊の娘ローニャ", + "score": 0.8278497457504272 + }, + { + "id": "anidb-10421", + "language": "x-jat", + "name": "Sanzoku no Musume Ronja", + "score": 0.7398622035980225 + }, + { + "id": "anidb-5467", + "language": "ja", + "name": "山姫の実", + "score": 0.7317830324172974 + }, + { + "id": "anidb-2309", + "language": "ja", + "name": "山ねずみ ロッキーチャック", + "score": 0.7189916968345642 + }, + { + "id": "anidb-16528", + "language": "ja", + "name": "狂気山脈 ネイキッド・ピーク", + "score": 0.7166963815689087 + } +] +``` + +The ML model currently running has additionally been trained on a large corpus of English data, which means (among other things) even partial matches or related words can work. For example, the query `Raeliana noble` will return: + +``` +[ + { + "id": "anidb-17498", + "language": "en", + "name": "Why Raeliana Ended Up at the Duke`s Mansion", + "score": 0.5921987891197205 + }, + { + "id": "anidb-17498", + "language": "en", + "name": "The Reason Why Raeliana Ended Up at the Duke`s Mansion", + "score": 0.5916491746902466 + }, + { + "id": "anidb-17498", + "language": "de", + "name": "Raeliana – Warum sie die Verlobte des Dukes wurde", + "score": 0.565862238407135 + }, + { + "id": "anidb-6384", + "language": "ja", + "name": "Rispara", + "score": 0.5636906623840332 + }, + { + "id": "anidb-10421", + "language": "fi", + "name": "Ronja ryövärintytär", + "score": 0.5635910034179688 + } +] +``` + +As a `duke` is a type of `noble`. + +Please note that if you choose to use the default hosted API by Khell that no warranties or guarantees are implied of any kind, and your queries will be transiently logged on the API server (until the next server reboot). It is strongly recommended you host the API yourself. + Configuration =============