diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 9f49b06..5ec2950 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -43,17 +43,17 @@ jobs: - name: Test changes with full tests 1/3 run: | bash run_tests.sh full --test-group-count 3 --test-group=1 --reruns 3 --reruns-delay 15 - if: github.event.pull_request.base.ref == 'main' + if: ${{ always() && github.event.pull_request.base.ref == 'main'}} continue-on-error: true - name: Test changes with full tests 2/3 run: | bash run_tests.sh full --test-group-count 3 --test-group=2 --reruns 3 --reruns-delay 15 - if: github.event.pull_request.base.ref == 'main' + if: ${{ always() && github.event.pull_request.base.ref == 'main'}} continue-on-error: true - name: Test changes with full tests 3/3 run: | bash run_tests.sh full --test-group-count 3 --test-group=3 --reruns 3 --reruns-delay 15 - if: github.event.pull_request.base.ref == 'main' + if: ${{ always() && github.event.pull_request.base.ref == 'main'}} continue-on-error: true - name: Upload test results uses: actions/upload-artifact@v4 diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 16b77d7..239a80e 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -8,6 +8,24 @@ The page attempt to keep a clear list of breaking/non-breaking changes and new f :local: :backlinks: none +v0.15.0 +---------- +Bug Fixes +########### +* Missing Rank information in page profiles no longer causes an exception during parsing +* Already following a page no longer causes an exception during parsing +* + +New Features +############## +* Page name and url parsing is now more reliable +* More pages now have the `url` attribute +* Added `get_tags` to `Article` +* Added `get_tags` to `Job` +* New enum `PlatformCategory` +* New `Client` methods `Client.upload_addon`, `Client.get_mutable_addon`, `Client.edit_addon` + + v0.14.0 ----------- Bug Fixes diff --git a/docs/source/mutables.rst b/docs/source/mutables.rst new file mode 100644 index 0000000..5e70744 --- /dev/null +++ b/docs/source/mutables.rst @@ -0,0 +1,26 @@ +.. currentmodule:: moddb.mutables + +Mutables +============ +Mutables are version of Moddb models which you can change and then send back to +edit an entity on the website. + +.. contents:: Table of Contents + :local: + :backlinks: none + + +MutableAddon +---- +.. autoclass:: moddb.mutable.MutableAddon + :members: + :inherited-members: + + +MutableFile +---- +.. autoclass:: moddb.mutable.MutableFile + :members: + :inherited-members: + + \ No newline at end of file diff --git a/moddb/__init__.py b/moddb/__init__.py index 2a925c4..5d59b6c 100644 --- a/moddb/__init__.py +++ b/moddb/__init__.py @@ -4,6 +4,7 @@ from .base import front_page, login, logout, parse_page, parse_results, rss, search, search_tags from .client import Client, TwoFactorAuthClient, Thread from .enums import * +from .mutables import MutableAddon, MutableFile from .pages import * from .utils import BASE_URL, LOGGER, Object, get_page, request, soup @@ -31,4 +32,6 @@ "get_page", "request", "soup", + "MutableAddon", + "MutableFile", ] diff --git a/moddb/boxes.py b/moddb/boxes.py index 8b49c40..293d944 100644 --- a/moddb/boxes.py +++ b/moddb/boxes.py @@ -2,6 +2,7 @@ import collections import datetime +import json import logging import re import sys @@ -118,8 +119,14 @@ def __init__(self, html: BeautifulSoup): self.visits, self.today = get_views(visits) rank = normalize(html.find("h5", string="Rank").parent.a.string).split("of") - self.rank = int(rank[0].replace(",", "")) - self.total = int(rank[1].replace(",", "")) + + try: + self.rank = int(rank[0].replace(",", "")) + self.total = int(rank[1].replace(",", "")) + except ValueError: + LOGGER.info("No rank detected") + self.rank = 0 + self.total = 0 try: self.updated = get_date(html.find("time", itemprop="dateModified")["datetime"]) @@ -208,8 +215,7 @@ def __init__(self, html: BeautifulSoup): "div", class_="table tablemenu" ) self.contact = join(html.find("h5", string="Contact").parent.span.a["href"]) - - self.follow = join(html.find("a", title="Follow")["href"]) + self.follow = join(html.find("a", title=("Follow", "Unfollow"))["href"]) try: share = profile_raw.find("h5", string="Share").parent.span.find_all("a") @@ -792,6 +798,8 @@ class MemberProfile: ----------- name : str Name of the member + url : str + Link to the member level : int Current level progress : float @@ -815,12 +823,14 @@ class MemberProfile: """ def __init__(self, html: BeautifulSoup): + breadcrumbs = json.loads(html.find("script", type="application/ld+json").string) + self.name = breadcrumbs["itemListElement"][-1]["Item"]["name"] + self.url = breadcrumbs["itemListElement"][-1]["Item"]["@id"] + profile_raw = html.find("span", string="Profile").parent.parent.parent.find( "div", class_="table tablemenu" ) level_raw = profile_raw.find("h5", string="Level").parent.span.div - self.name = html.find("meta", property="og:title")["content"] - self.level = int(level_raw.find("span", class_="level").string) self.progress = float( "0." + level_raw.find("span", class_="info").strong.string.replace("%", "") @@ -863,7 +873,7 @@ def __init__(self, html: BeautifulSoup): ) try: - self.follow = join(html.find("a", title="Follow")["href"]) + self.follow = join(html.find("a", title=("Follow", "Unfollow"))["href"]) except TypeError: LOGGER.info( "Can't watch yourself, narcissist...", exc_info=LOGGER.level >= logging.DEBUG diff --git a/moddb/client.py b/moddb/client.py index f29efb4..b608de1 100644 --- a/moddb/client.py +++ b/moddb/client.py @@ -14,13 +14,16 @@ from .boxes import ResultList, Thumbnail, _parse_results from .enums import Status, ThumbnailType from .errors import ModdbException -from .pages import Member +from .mutables import MutableAddon +from .pages import Member, Addon from .utils import ( BASE_URL, COMMENT_LIMITER, GLOBAL_LIMITER, GLOBAL_THROTLE, LOGGER, + HasUrl, + Object, concat_docs, create_login_payload, generate_hash, @@ -329,7 +332,12 @@ def _request(self, method, url, **kwargs): } req = requests.Request( - method, url, headers=headers, cookies=cookies, data=kwargs.pop("data", {}) + method, + url, + headers=headers, + cookies=cookies, + data=kwargs.pop("data", {}), + files=kwargs.pop("files", {}), ) prepped = self._session.prepare_request(req) LOGGER.info("Request: %s", prepped.url) @@ -1179,6 +1187,152 @@ def downvote_tag(self, tag: Tag) -> bool: """ return self._vote_tag(tag, 1) + def upload_addon(self, addon: MutableAddon) -> Addon: + """Upload a new addon + + Parameters + ------------ + addon: MutableAddon + The addon to upload + + Returns + -------- + Addon + The uploaded addon + """ + upload_url = join("/addons/add") + html = soup(self._request("GET", upload_url).text) + + formhash = html.find("input", {"name": "formhash"})["value"] + file_name = self._upload_file(formhash, addon) + + logo_file = {"logo": (addon.thumbnail.filename, addon.thumbnail.fp)} + + data = { + "formhash": formhash, + "legacy": 0, + "platformstemp": 1, + "filedataUp": file_name, + "category": addon.category.value, + "licence": addon.licence.value, + "credit": addon.credits if addon.credits is not None else "", + "tags": ",".join(addon.tags), + "name": addon.name, + "summary": addon.summary, + "description": addon.description if addon.description is not None else "", + "links[]": [platform.value for platform in addon.platforms] + + [f"{parent.name}|{parent.entity_type}s{parent.id}" for parent in addon.links], + "downloads": "Please wait uploading file", + } + + resp = self._request("POST", upload_url, data=data, files=logo_file) + self._validate_post_response(resp.text) + addon.thumbnail.fp.close() + + return Addon(soup(resp.text)) + + def get_mutable_addon(self, addon: Union[Addon, Object[HasUrl]]) -> MutableAddon: + """Get the mutable version of an addon for editing purpose. + + Parameters + ----------- + addon: Union[Addon,Object[HasUrl]] + The addon or an object with an url attribute to retrieve + + Returns + -------- + MutableAddon + The mutable addon retrieved + """ + edit_url = f"{addon.url}/edit" + html = soup(self._request("GET", edit_url).text) + + if not html.find("input", {"name": "formhash"}): + raise ModdbException("You do not have permission to edit the requested addon") + + return MutableAddon._from_html(html) + + def edit_addon(self, addon: MutableAddon): + """Edit an existing addon. The MutableAddon passed to this + function should be retrieved through `Client.get_mutable_addon` + + Parameters + ----------- + addon: MutableAddon + The mutable addon to edit + """ + logo_file = {} + data = { + "formhash": addon._form_hash, + "legacy": 0, + "platformstemp": 1, + "category": addon.category.value, + "licence": addon.licence.value, + "credit": addon.credits if addon.credits is not None else "", + "tags": ",".join(addon.tags), + "name": addon.name, + "nameid": addon.name_id, + "summary": addon.summary, + "description": addon.description if addon.description is not None else "", + "links[]": [platform.value for platform in addon.platforms] + + [f"{parent.name}|{parent.entity_type}s{parent.id}" for parent in addon.links], + "downloads": "Please wait uploading file", + } + + if addon.file_file is not None or addon.file_url is not None: + file_name = self._upload_file(addon._form_hash, addon) + data["filedataUp"] = file_name + + if addon.thumbnail is not None: + logo_file["logo"] = (addon.thumbnail.filename, addon.thumbnail.fp) + + resp = self._request("POST", addon.url, data=data, files=logo_file) + self._validate_post_response(resp.text) + + if addon.thumbnail is not None: + addon.thumbnail.fp.close() + + def _upload_file(self, hash: str, addon: MutableAddon): + url = f"https://upload.moddb.com/downloads/ajax/upload/{hash}" + resp = None + + if addon.file_file is not None: + resp = self._request( + "POST", + url, + data={"filename": addon.file_file.filename}, + files={"filedata": addon.file_file.fp}, + ) + addon.file_file.fp.close() + + if addon.file_url is not None: + resp = self._request("POST", url, json={"wget": "t", "filedataWget": addon.file_url}) + + if resp is not None: + error = resp.json()["error"] + if error: + raise ModdbException( + f"An error occurred while trying to upload the add-on: {error}" + ) + + return resp.json()["text"] + + def _validate_post_response(self, html_str: str): + soup_obj = soup(html_str) + if soup_obj.find("a", id="downloadmirrorstoggle"): + return # Upload successful + + # We are still on the upload form + error_tooltip = soup_obj.find("div", class_="tooltip errortooltip clear") + if error_tooltip: + if error_tooltip.ul: + error_list = error_tooltip.ul.find_all("li", recursive=False) + errors = "\n".join([f"- {error.text}" for error in error_list]) + else: + # p-tag contains a space at the beginning and a new line at the end + errors = f"- {error_tooltip.p.text.strip()}" + raise ModdbException(f"Please correct the following: \n{errors}") + class TwoFactorAuthClient(Client): """A subclass of client to be used when facing 2FA requirements.""" diff --git a/moddb/enums.py b/moddb/enums.py index 4f59f44..da3863a 100644 --- a/moddb/enums.py +++ b/moddb/enums.py @@ -170,6 +170,44 @@ class Month(enum.Enum): december = "12" +class PlatformCategory(enum.Enum): + """The category of the platform""" + + windows = "Windows|platforms1" + mac = "Mac|platforms8" + linux = "Linux|platforms7" + vr = "VR|platforms35" + ar = "AR|platforms36" + web = "Web|platforms24" + rtx = "RTX|platforms40" + flash = "Flash|platforms23" + dos = "DOS|platforms19" + steamdeck = "SteamDeck|platforms41" + ios = "iOS|platforms20" + android = "Android|platforms22" + metro = "Metro|platforms25" + xsx = "XSX|platforms39" + xone = "XONE|platforms34" + x360 = "X360|platforms2" + xbox = "XBOX|platforms18" + ps5 = "PS5|platforms38" + ps4 = "PS4|platforms32" + ps3 = "PS3|platforms4" + ps2 = "PS2|platforms17" + ps1 = "PS1|platforms16" + vita = "VITA|platforms28" + psp = "PSP|platforms5" + switch = "Switch|platforms37" + wiiu = "WiiU|platforms31" + wii = "Wii|platforms3" + gcn = "GCN|platforms15" + n64 = "N64|platforms14" + snes = "SNES|platforms13" + nes = "NES|platforms12" + ds = "DS|platforms6" + gba = "GBA|platforms11" + + # BELOW THIS LINE ENUMS ARE GENERATED AUTOMATICALLY # PR changes to scripts/generate_enums.py if you want to # change something diff --git a/moddb/errors.py b/moddb/errors.py index 5682492..bb39f2c 100644 --- a/moddb/errors.py +++ b/moddb/errors.py @@ -28,3 +28,9 @@ class AuthError(ModdbException): """ pass + + +class ValidationError(ModdbException): + """A client side validation has failed.""" + + pass diff --git a/moddb/mutables.py b/moddb/mutables.py new file mode 100644 index 0000000..42e4138 --- /dev/null +++ b/moddb/mutables.py @@ -0,0 +1,465 @@ +import io +import json +import os +import re +from typing import Any, Optional, Union + +import bs4 +from typing_extensions import Self + +from .enums import AddonCategory, Licence, PlatformCategory +from .errors import ValidationError +from .pages import Game, Group, Mod +from .utils import NamedEntity, Object + +file_extensions = ( + ".psd", + ".avi", + ".asf", + ".mpg", + ".mpeg", + ".mov", + ".mp4", + ".wmv", + ".iso", + ".divx", + ".webm", + ".f4v", + ".flv", + ".m4v", + ".mp3", + ".wma", + ".mid", + ".midi", + ".pk4", + ".wav", + ".doc", + ".exe", + ".gz", + ".dmg", + ".bin", + ".mpkg", + ".pkg", + ".deb", + ".jar", + ".ace", + ".msi", + ".bz2", + ".flmod", + ".torrent", + ".jam", + ".run", + ".bin", + ".pdf", + ".eps", + ".ai", + ".xls", + ".ppt", + ".rpm", + ".tgz", + ".tar", + ".bz2", + ".xml", + ".conf", + ".zip", + ".rar", + ".apk", + ".7z", + ".love", + ".odt", + ".ods", + ".odp", + ".odg", + ".kfm", + ".hqx", +) + +thumbnail_extensions = (".gif", ".jpg", ".jpeg", ".png") + + +class MutableFile: + """An object used for passing files to other functions of the library. This + file can only be used once, after that it will need to be recreated. + + Parameters + ----------- + fp: Union[os.PathLike, io.BufferedIOBase] + A file-like object opened in binary mode and read mode + or a fiepath representing a file in the hard drive to + open. + filename: Optional[str] + Optional filename, must pass if you are passing a file-like + """ + + def __init__( + self, + fp: Union[str, bytes, os.PathLike[Any], io.BufferedIOBase], + filename: Optional[str] = None, + ): + if isinstance(fp, io.IOBase): + if not (fp.seekable() and fp.readable()): + raise ValidationError(f"File buffer {fp!r} must be seekable and readable") + self.fp: io.BufferedIOBase = fp + else: + self.fp = open(fp, "rb") + + self.filename = filename + if filename is None: + if isinstance(fp, str): + _, self.filename = os.path.split(fp) + else: + self.filename = getattr(fp, "name") + if self.filename is None: + raise ValidationError("No filename found for the passed file") + + +class MutableAddon: + """This represents an addon that can be edited. You should + use the various methods to set up the addon to look as you + wish. + """ + + def __init__(self, **kwargs): + self.name = kwargs.get("name") + self.summary = kwargs.get("summary") + self.description = kwargs.get("description") + + self.tags = kwargs.get("tags", []) + + self.thumbnail = None + + self.file_file = None + self.file_url = None + + self.licence = kwargs.get("licence", Licence.proprietary) + self.credits = kwargs.get("credits") + self.platforms = kwargs.get("platforms", []) + self.links = kwargs.get("links", []) + self.category = kwargs.get("category") + + # these are attributes only available when we edit + self.name_id = kwargs.get("name_id") + self._form_hash = kwargs.get("form_hash") + self.url = kwargs.get("url") + + def __repr__(self): + return f"< MutableAddon name={self.name} >" + + @classmethod + def _from_html(cls, html: bs4.BeautifulSoup): + category = AddonCategory( + int( + html.find("select", id="downloadscategory").find_all( + "option", {"selected": "selected"} + )[0]["value"] + ) + ) + name = html.find("input", id="downloadsname")["value"] + summary = html.find("textarea", id="downloadssummary").text + description = html.find("textarea", id="downloadsdescription").text + licence = Licence( + int( + html.find("select", id="downloadslicence").find_all( + "option", {"selected": "selected"} + )[0]["value"] + ) + ) + credits = html.find("input", id="downloadscredit")["value"] + name_id = html.find("input", id="downloadsnameid")["value"] + tags = html.find("input", id="downloadstags")["value"].split(",") + formhash = html.find("input", {"name": "formhash"})["value"] + + breadcrumbs = json.loads(html.find("script", type="application/ld+json").string) + url = breadcrumbs["itemListElement"][-1]["Item"]["@id"] + + platforms = [ + PlatformCategory(platform["value"]) + for platform in html.find(id="downloadsplatforms").find_all( + "option", selected="selected" + ) + ] + links = [] + for link in html.find("select", {"name": "links[]", "class": "right select"}).find_all( + "option", selected="selected" + ): + link_name, link_id, link_type = re.match( + r"([A-Za-z ]*)\|([a-z]*)([0-9]*)", link["value"] + ).groups() + links.append(Object(name=link_name, id=link_id, entity_type=link_type)) + + return cls( + category=category, + name=name, + summary=summary, + description=description, + platforms=platforms, + licence=licence, + credits=credits, + tags=tags, + name_id=name_id, + form_hash=formhash, + url=url, + links=links, + ) + + def set_name(self, value: str) -> Self: + """Set the name of the addon, must be between 1 and 80 characters long. + + Parameters + ----------- + value: str + The value to set as the name of the addon + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + name = str(value) + if 0 >= len(name) > 80: + raise ValidationError("Addon name must be between 1 and 80 characters long") + + self.name = name + + return self + + def set_summary(self, value: str) -> Self: + """Set the summary of the addon, must be between 50 and 1,000 characters. + + Parameters + ---------- + value: str + The value to set as the summary of the addon + + Returns + ------- + MutableAddon + Returns itself for easy chaining + """ + summary = str(value) + if 50 > len(summary) > 1000: + raise ValidationError("Addon summary must be between 50 and 1,000 characters") + + self.summary = summary + + return self + + def set_description(self, value: Union[str, None]) -> Self: + """Set the description of the addon, must be between 50 and 1,000 characters. + + This is an optional field. + + Parameters + ---------- + value: Union[str,None] + The value to set as the description of the addon, or none to clear existing description + + Returns + ------- + MutableAddon + Returns itself for easy chaining + """ + description = None + if value is not None: + description = str(value) + if 100 > len(description): + raise ValidationError("Addon description must be more than 100 characters") + + self.description = description + + return self + + def set_tags(self, values: Union[list[str], None]) -> Self: + """Set the tags of the addon, sum of all tag lengths cannot be more than 400 + + This is an optional field. + + Parameters + ----------- + values: Union[list[str],None] + The valuesto set as the tags of the addon, or none to clear existing tags + + Returns + ------- + MutableAddon + Returns itself for easy chaining + """ + tags = None + if values is not None: + if len("".join([x.strip() for x in values])) > 400: + raise ValidationError("Sum of all tag lengths must be less than 400") + + tags = values + + self.tags = tags + + return self + + def set_thumbnail(self, file: MutableFile) -> Self: + """Set the thumbnail of the addon. You can pass either a file path or a file-like object. + + Parameters + ----------- + file: MutableFile + A file-like of the thumbnail + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + if file is not None: + if not file.filename.endswith(thumbnail_extensions): + raise ValidationError( + f"File extensions must be one of {', '.join(thumbnail_extensions)}" + ) + + self.thumbnail = file + + return self + + def set_file(self, *, file: MutableFile = None, url: str = None) -> Self: + """Set the file of the addon. You can pass either a file path, a url or a file-like object. + + Parameters + ----------- + file: Optional[MutableFile] + A file-like of the file + url: Optional[str] + A url to the file to transfer + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + if sum([bool(file), bool(url)]) != 1: + raise ValidationError("Must specify exactly one of file or url") + + if file is not None: + if not file.filename.endswith(file_extensions): + raise ValidationError( + f"File extensions must be one of {', '.join(file_extensions)}" + ) + + self.file_file = file + self.file_url = url + + return self + + def set_category(self, value: AddonCategory) -> Self: + """Set the addon category for the addon. + + Parameters + ----------- + value: AddonCategory + The category to set + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + self.category = value + + return self + + def set_licence(self, value: Licence) -> Self: + """Set the addon licence for the addon. + + This is Licence.proprietary by default + + Parameters + ----------- + value: Licence + The licence to set + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + self.licence = value + + return self + + def set_credits(self, value: str) -> Self: + """Set the credits of those who helped you for the addon + + This is an optional field. + + Parameters + ---------- + value: Optional[str,None] + The credits to set + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + credits = str(value) + if 0 >= credits > 400: + raise ValidationError("Credits must be between 1 and 400 characters") + + self.credits = credits + + return self + + def set_platform(self, values: list[PlatformCategory]) -> Self: + """Set all the platforms this addon is for + + Parameters + ---------- + values: list[PlatformCategory] + The platforms to set + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + self.platforms = values + + return self + + def set_links(self, values: list[Union[Group, Mod, Game, Object[NamedEntity]]]) -> Self: + """Set the entity this addon is for + + Parameters + ----------- + values: list[Group, Mod, Game] + The links to set + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + self.links = values + + return self + + def set_name_id(self, value: str) -> Self: + """Set the name id for this addon. Update only when necessary as this will + break existing links on blogs and search engines to this content. + + This method is only useful when editing an addon + + Parameters + ----------- + value: str + The name id to set + + Returns + -------- + MutableAddon + Returns itself for easy chaining + """ + name_id = str(value) + if 0 >= len(name_id) > 80: + raise ValidationError("Addon name must be between 1 and 80 characters long") + + self.name_id = name_id + + return self diff --git a/moddb/pages/article.py b/moddb/pages/article.py index 1a7bda2..5d16f41 100644 --- a/moddb/pages/article.py +++ b/moddb/pages/article.py @@ -1,15 +1,17 @@ +import json import logging import bs4 from ..boxes import PartialTag, Profile, Thumbnail from ..enums import ArticleCategory, Difficulty, ThumbnailType, TutorialCategory +from ..pages.mixins import GetTagsMixin from ..utils import LOGGER, concat_docs, get_date, get_views, join from .base import BaseMetaClass @concat_docs -class Article(BaseMetaClass): +class Article(BaseMetaClass, GetTagsMixin): """This object represents an news article, a tutorial or a feature. Parameters @@ -46,6 +48,8 @@ class Article(BaseMetaClass): Whether this article is a news article, a tutorial or a feature name : str The name of the article + url : str + Link to the article profile : Profile The profile object of the moddb model the article is for (engine, game, mod...). Can be none if it is not rattached to anything, such as for site news. @@ -73,11 +77,12 @@ class Article(BaseMetaClass): If the article category is tutorial, this represents how hard the tutorial is. """ + entity_type: str = "article" + def __init__(self, html: bs4.BeautifulSoup): - try: - self.name = html.find("span", itemprop="headline").string - except AttributeError: - self.name = html.find("span", itemprop="heading").string + breadcrumbs = json.loads(html.find("script", type="application/ld+json").string) + self.name = breadcrumbs["itemListElement"][-1]["Item"]["name"] + self.url = breadcrumbs["itemListElement"][-1]["Item"]["@id"] super().__init__(html) @@ -167,6 +172,8 @@ class Blog(BaseMetaClass): """ + entity_type: str = "blog" + def __init__(self, *, heading, text): author = heading.find("span", class_="subheading").a self.author = Thumbnail(url=author["href"], name=author.string, type=ThumbnailType.member) diff --git a/moddb/pages/base.py b/moddb/pages/base.py index e5ea180..d8568d7 100644 --- a/moddb/pages/base.py +++ b/moddb/pages/base.py @@ -1,3 +1,4 @@ +import json import logging import re from typing import List @@ -45,12 +46,13 @@ class BaseMetaClass: URL to report the page """ + entity_type: str = None + def __init__(self, html: BeautifulSoup): if not getattr(self, "name", None): - try: - self.name = html.find("a", itemprop="mainEntityOfPage").string - except AttributeError: - self.name = html.find("meta", property="og:title")["content"] + breadcrumbs = json.loads(html.find("script", type="application/ld+json").string) + self.name = breadcrumbs["itemListElement"][-1]["Item"]["name"] + self.url = breadcrumbs["itemListElement"][-1]["Item"]["@id"] for index, func in enumerate( [ @@ -75,11 +77,6 @@ def __init__(self, html: BeautifulSoup): else: raise AttributeError(f"Failed to get id from member {self.name}") - try: - self.url = html.find("meta", property="og:url")["content"] - except TypeError: - self.url = join(html.find("a", string=self.name)["href"]) - self.name_id = self.url.split("/")[-1] try: @@ -412,8 +409,6 @@ def __init__(self, html: BeautifulSoup, page_type: SearchCategory): exc_info=LOGGER.level >= logging.DEBUG, ) - self.description = str(html.find("div", id="profiledescription")) - try: self.description = str(html.find("div", id="profiledescription")) self.plaintext = html.find("div", id="profiledescription").text @@ -682,7 +677,7 @@ def __init__(self, html: BeautifulSoup): self.medias = self._get_media(1, html=html) try: - t = ThumbnailType[self.__class__.__name__.lower()] + t = ThumbnailType[self.entity_type] suggestions = html.find( "span", string="You may also like" ).parent.parent.parent.find_all("a", class_="image") diff --git a/moddb/pages/engine.py b/moddb/pages/engine.py index 36298e7..bb7269c 100644 --- a/moddb/pages/engine.py +++ b/moddb/pages/engine.py @@ -42,6 +42,8 @@ class Engine(PageMetaClass, GetGamesMixin): A list of games suggested on the engine main page. """ + entity_type: str = "engine" + def __init__(self, html: bs4.BeautifulSoup): super().__init__(html, SearchCategory.engines) delattr(self, "files") diff --git a/moddb/pages/entity.py b/moddb/pages/entity.py index e1a89b6..90883cd 100644 --- a/moddb/pages/entity.py +++ b/moddb/pages/entity.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import logging from typing import TYPE_CHECKING, List, Tuple @@ -55,6 +56,8 @@ class Group(PageMetaClass, GetAddonsMixin): ----------- name : str The name of the group + url : str + Link to the group private : bool Whether or not the group is private profile : Profile @@ -78,10 +81,14 @@ class Group(PageMetaClass, GetAddonsMixin): The plaintext description of the group """ + entity_type: str = "group" get_reviews = None def __init__(self, html: bs4.BeautifulSoup): - self.name = html.find("div", class_="title").h2.a.string + breadcrumbs = json.loads(html.find("script", type="application/ld+json").string) + self.name = breadcrumbs["itemListElement"][-1]["Item"]["name"] + self.url = breadcrumbs["itemListElement"][-1]["Item"]["@id"] + BaseMetaClass.__init__(self, html) self.private = False @@ -255,6 +262,8 @@ class Team(Group, GetEnginesMixin, GetGamesMixin, GetModsMixin, GetWaresMixin): """ + entity_type: str = "team" + def __init__(self, html: bs4.BeautifulSoup): super().__init__(html) try: @@ -329,6 +338,8 @@ class Member(PageMetaClass, GetGamesMixin, GetModsMixin, GetAddonsMixin): page """ + entity_type: str = "member" + def __init__(self, html: bs4.BeautifulSoup): super().__init__(html, SearchCategory.members) try: diff --git a/moddb/pages/file.py b/moddb/pages/file.py index d0e23d5..b6e236e 100644 --- a/moddb/pages/file.py +++ b/moddb/pages/file.py @@ -1,4 +1,6 @@ import datetime +import json +import logging import re import sys from typing import BinaryIO, List @@ -6,10 +8,11 @@ import bs4 import requests -from ..boxes import Mirror, Thumbnail -from ..enums import AddonCategory, FileCategory, MediaCategory, ThumbnailType +from ..boxes import Mirror, PartialTag, Thumbnail +from ..enums import AddonCategory, FileCategory, Licence, MediaCategory, ThumbnailType from ..utils import ( BASE_URL, + LOGGER, concat_docs, get_date, get_page, @@ -104,18 +107,22 @@ class File(BaseMetaClass): directly attached to this file. """ + entity_type: str = "file" + def __init__(self, html: bs4.BeautifulSoup): if html.find("span", string="File Deleted", class_="heading"): raise ValueError("This file has been removed") + breadcrumbs = json.loads(html.find("script", type="application/ld+json").string) + self.name = breadcrumbs["itemListElement"][-1]["Item"]["name"] + self.url = breadcrumbs["itemListElement"][-1]["Item"]["@id"] + info = html.find("div", class_="table tablemenu") file = { x.string.lower(): x.parent.span.string.strip() for x in info.find_all("h5", string=("Filename", "Size", "MD5 Hash")) } - self.name = ( - html.find("a", title="Report").parent.parent.find("span", class_="heading").string - ) + self.filename = file["filename"] super().__init__(html) @@ -144,12 +151,29 @@ def __init__(self, html: bs4.BeautifulSoup): self.button = info.find("h5", string="Embed Button").parent.span.input["value"] self.widget = info.find("h5", string="Embed Widget").parent.span.input["value"] - self.description = html.find("p", id="downloadsummary").string + self.summary = html.find("p", id="downloadsummary").string + self.description = html.find("p", id="downloaddescription").string self.preview = html.find_all("img", src=True)[0]["src"] self.location = parse_location(html) + try: + raw_tags = html.find("form", attrs={"name": "tagsform"}).find_all("a") + self.tags = [ + PartialTag(x.string, join(x["href"]), x["href"].split("/")[-1]) + for x in raw_tags + if x.string is not None + ] + except AttributeError: + self.tags = [] + LOGGER.info( + "'%s' '%s' has no tags", + self.__class__.__name__, + self.name, + exc_info=LOGGER.level >= logging.DEBUG, + ) + def __repr__(self): return f"<{self.__class__.__name__} name={self.name} type={self.category.name}>" @@ -256,10 +280,28 @@ class Addon(File): * **licence** - order based on licence * **date** - order by upload date, asc is most recent first, desc is oldest first - """ - pass + entity_type: str = "addon" + + def __init__(self, html): + super().__init__(html) + + info = html.find("div", class_="table tablemenu") + self.licence = Licence( + int(info.find("h5", string="Licence").parent.a["href"].split("=")[-1]) + ) + + try: + self.credits = info.find("h5", string="Credits").parent.span.string + except AttributeError: + LOGGER.info( + "'%s' '%s' has no credits", + self.__class__.__name__, + self.name, + exc_info=LOGGER.level >= logging.DEBUG, + ) + self.credts = None @concat_docs @@ -312,11 +354,12 @@ class Media(BaseMetaClass): The description of the file as given by the file uploader. """ + entity_type: str = "media" + def __init__(self, html: bs4.BeautifulSoup): - try: - self.name = html.find("meta", itemprop="name")["content"] - except TypeError: - self.name = html.find("img", id="mediaimage")["title"] + breadcrumbs = json.loads(html.find("script", type="application/ld+json").string) + self.name = breadcrumbs["itemListElement"][-1]["Item"]["name"] + self.url = breadcrumbs["itemListElement"][-1]["Item"]["@id"] super().__init__(html) medias = html.find_all("h5", string=("Date", "By", "Duration", "Size", "Views", "Filename")) diff --git a/moddb/pages/game.py b/moddb/pages/game.py index 747a429..3bd03cd 100644 --- a/moddb/pages/game.py +++ b/moddb/pages/game.py @@ -42,5 +42,7 @@ class Game(PageMetaClass, GetModsMixin, GetAddonsMixin): * **dateup** - order by latest update, asc is most recent update first, desc is oldest update first """ + entity_type: str = "game" + def __init__(self, html: bs4.BeautifulSoup): super().__init__(html, SearchCategory.games) diff --git a/moddb/pages/job.py b/moddb/pages/job.py index 4e3f5c0..2429190 100644 --- a/moddb/pages/job.py +++ b/moddb/pages/job.py @@ -6,10 +6,11 @@ from ..boxes import PartialTag, Thumbnail from ..enums import JobSkill, ThumbnailType +from ..pages.mixins import GetTagsMixin from ..utils import LOGGER, join -class Job: +class Job(GetTagsMixin): """Model representing a job proposed on ModDB Parameters @@ -57,6 +58,8 @@ class Job: """ + entity_type: str = "job" + def __init__(self, html: bs4.BeautifulSoup): breadcrumb = json.loads(html.find("script", type="application/ld+json").string)[ "itemListElement" diff --git a/moddb/pages/mod.py b/moddb/pages/mod.py index 88d8b9b..be3ecc6 100644 --- a/moddb/pages/mod.py +++ b/moddb/pages/mod.py @@ -49,5 +49,7 @@ class Mod(PageMetaClass, GetAddonsMixin): """ + entity_type: str = "mod" + def __init__(self, html: BeautifulSoup): super().__init__(html, SearchCategory.mods) diff --git a/moddb/pages/opinion.py b/moddb/pages/opinion.py index 18206c4..0b67ae7 100644 --- a/moddb/pages/opinion.py +++ b/moddb/pages/opinion.py @@ -89,6 +89,8 @@ class Review: Link to disagree with the review """ + entity_type: str = "review" + def __init__(self, **attrs): text = attrs.get("text") if text: @@ -167,6 +169,8 @@ class Poll(BaseMetaClass): The list of available options for the poll """ + entity_type: str = "poll" + def __init__(self, html: BeautifulSoup): poll = html.find("div", class_="poll") self.question = ( diff --git a/moddb/pages/platform.py b/moddb/pages/platform.py index 82a3aa6..711093b 100644 --- a/moddb/pages/platform.py +++ b/moddb/pages/platform.py @@ -1,11 +1,12 @@ from __future__ import annotations +import json import logging from typing import TYPE_CHECKING from ..boxes import PlatformStatistics, Thumbnail from ..enums import ThumbnailType -from ..utils import LOGGER, concat_docs, get_date, join +from ..utils import LOGGER, concat_docs, get_date from .base import BaseMetaClass from .mixins import GetEnginesMixin, GetGamesMixin, GetModsMixin, GetWaresMixin @@ -78,11 +79,14 @@ class Platform( A list of mods suggested on the platform main page. """ + entity_type: str = "platform" + def __init__(self, html: BeautifulSoup): - self.name = html.find("a", itemprop="mainEntityOfPage").string - self.id = None + breadcrumbs = json.loads(html.find("script", type="application/ld+json").string) + self.name = breadcrumbs["itemListElement"][-1]["Item"]["name"] + self.url = breadcrumbs["itemListElement"][-1]["Item"]["@id"] - self.url = join(html.find("a", itemprop="mainEntityOfPage")["href"]) + self.id = None self.name_id = self.url.split("/")[0] try: self.description = html.find("div", id="profiledescription").p.string diff --git a/moddb/pages/ware.py b/moddb/pages/ware.py index a2eef50..a729be6 100644 --- a/moddb/pages/ware.py +++ b/moddb/pages/ware.py @@ -56,6 +56,8 @@ class Hardware(HardwareSoftwareMetaClass, GetGamesMixin, GetWaresMixin): A list of recommended hardwares. """ + entity_type: str = "hardware" + def __init__(self, html: BeautifulSoup): super().__init__(html) self._type = SearchCategory.hardwares @@ -195,6 +197,8 @@ class Software(HardwareSoftwareMetaClass): """ + entity_type: str = "software" + def __init__(self, html: BeautifulSoup): super().__init__(html) self._type = SearchCategory.softwares diff --git a/moddb/utils.py b/moddb/utils.py index 001540b..83a8c49 100644 --- a/moddb/utils.py +++ b/moddb/utils.py @@ -7,8 +7,9 @@ import ssl import sys import time +from typing_extensions import Protocol import uuid -from typing import Optional, Sequence, Tuple, TypeVar +from typing import Generic, Optional, Sequence, Tuple, TypeVar from urllib.parse import urljoin import bs4 @@ -480,12 +481,31 @@ def get_list_stats(result_box: bs4.BeautifulSoup, per_page: int = 30) -> Tuple[i return current_page, max_page, all_results -class Object: +U = TypeVar("U") + + +class HasIdAndEntityType(Protocol): + id: str + entity_type: str + + +class NamedEntity(HasIdAndEntityType): + name: str + + +class HasUrl(Protocol): + url: str + + +class Object(Generic[U]): """A dud objects that will transform every kwarg given into an attribute""" def __init__(self, **kwargs): self.__dict__.update(kwargs) + def __repr__(self): + return "< Object >" + D = TypeVar("D") diff --git a/p95v3019b20.win64.zip b/p95v3019b20.win64.zip new file mode 100644 index 0000000..da98fbf Binary files /dev/null and b/p95v3019b20.win64.zip differ diff --git a/preview.jpg b/preview.jpg new file mode 100644 index 0000000..0abb494 Binary files /dev/null and b/preview.jpg differ diff --git a/scripts/data/enums_base.py b/scripts/data/enums_base.py index 6976cd3..4910a53 100644 --- a/scripts/data/enums_base.py +++ b/scripts/data/enums_base.py @@ -169,6 +169,43 @@ class Month(enum.Enum): december = "12" +class PlatformCategory(enum.Enum): + """The category of the platform""" + + windows = "Windows|platforms1" + mac = "Mac|platforms8" + linux = "Linux|platforms7" + vr = "VR|platforms35" + ar = "AR|platforms36" + web = "Web|platforms24" + rtx = "RTX|platforms40" + flash = "Flash|platforms23" + dos = "DOS|platforms19" + steamdeck = "SteamDeck|platforms41" + ios = "iOS|platforms20" + android = "Android|platforms22" + metro = "Metro|platforms25" + xsx = "XSX|platforms39" + xone = "XONE|platforms34" + x360 = "X360|platforms2" + xbox = "XBOX|platforms18" + ps5 = "PS5|platforms38" + ps4 = "PS4|platforms32" + ps3 = "PS3|platforms4" + ps2 = "PS2|platforms17" + ps1 = "PS1|platforms16" + vita = "VITA|platforms28" + psp = "PSP|platforms5" + switch = "Switch|platforms37" + wiiu = "WiiU|platforms31" + wii = "Wii|platforms3" + gcn = "GCN|platforms15" + n64 = "N64|platforms14" + snes = "SNES|platforms13" + nes = "NES|platforms12" + ds = "DS|platforms6" + gba = "GBA|platforms11" + # BELOW THIS LINE ENUMS ARE GENERATED AUTOMATICALLY # PR changes to scripts/generate_enums.py if you want to # change something