From ac2a2aaeffaa4b8f90698daae2621090e2af7a17 Mon Sep 17 00:00:00 2001 From: Clement Julia Date: Mon, 19 May 2025 00:38:55 +0200 Subject: [PATCH 1/7] file-location adding file locations --- moddb/pages/base.py | 20 ++++++++++++-------- moddb/pages/file.py | 19 +++++++++++++++++++ moddb/utils.py | 2 ++ 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/moddb/pages/base.py b/moddb/pages/base.py index 21facad..89aa9b0 100644 --- a/moddb/pages/base.py +++ b/moddb/pages/base.py @@ -52,16 +52,20 @@ def __init__(self, html: BeautifulSoup): except AttributeError: self.name = html.find("meta", property="og:title")["content"] - try: - self.id = int( - re.search(r"siteareaid=(\d*)", html.find("a", class_=["reporticon"])["href"])[1] - ) - except TypeError: + + for index, func in enumerate([ + lambda: int(re.search(r"siteareaid=(\d*)", html.find("a", class_=["reporticon"])["href"])[1]), + lambda: int(html.find("input", attrs={"name": "siteareaid"})["value"]), + lambda: int(html.find("meta", property="og:image")["content"].split("/")[-2]), + lambda: re.match(r"https:\/\/www\.moddb\.com\/html\/scripts\/autocomplete\.php\?a=mentions&p=home&l=6&u=(\d*)", str(html)).group(1) + ]): try: - self.id = int(html.find("input", attrs={"name": "siteareaid"})["value"]) + self.id = func() + break except (AttributeError, TypeError): - # really scratching the bottom here but a lot of "official" groups don't have the regular ID - self.id = int(html.find("meta", property="og:image")["content"].split("/")[-2]) + LOGGER.warning("Failed to get id from method %s for member %s", index, self.name) + else: + raise AttributeError(f"Failed to get id from member {self.name}") try: self.url = html.find("meta", property="og:url")["content"] diff --git a/moddb/pages/file.py b/moddb/pages/file.py index eba1e81..11eb0f4 100644 --- a/moddb/pages/file.py +++ b/moddb/pages/file.py @@ -19,6 +19,19 @@ ) from .base import BaseMetaClass +def parse_location(html) -> list[Thumbnail] | None: + location = html.find("h5", string="Location").parent.find_all('a') + if location is None: + return None + + return [ + Thumbnail( + type=ThumbnailType[location[x].string.lower()[:-1]], + url=location[x+1]["href"], + name=location[x+1].string, + ) + for x in range(0, len(location)-1, 2) + ] @concat_docs class File(BaseMetaClass): @@ -83,6 +96,10 @@ class File(BaseMetaClass): Description of the file, as written by the author preview : str URL of the preview image for the file + location: list[Thumbnail] + An ordered list detailing the hierarchy of entities the + file or addon sits under. The last one being the entity + directly attached to this file. """ def __init__(self, html: bs4.BeautifulSoup): @@ -129,6 +146,8 @@ def __init__(self, html: bs4.BeautifulSoup): self.preview = html.find_all("img", src=True)[0]["src"] + self.location = parse_location(html) + def __repr__(self): return f"<{self.__class__.__name__} name={self.name} type={self.category.name}>" diff --git a/moddb/utils.py b/moddb/utils.py index db35210..004ec71 100644 --- a/moddb/utils.py +++ b/moddb/utils.py @@ -218,6 +218,8 @@ def generate_login_cookies(username: str, password: str, session: requests.Sessi req = requests.Request("GET", f"{BASE_URL}/members/login") resp = session.send(prepare_request(req, session)) + resp.raise_for_status() + html = soup(resp.text) form = html.find("form", attrs={"name": "membersform"}) From 9e34382eb07183e9fc9dcc6402bf7942f8c650ba Mon Sep 17 00:00:00 2001 From: Clement Julia Date: Mon, 19 May 2025 08:42:14 +0200 Subject: [PATCH 2/7] file-location changing parameter name --- docs/source/changelog.rst | 11 +++++++++++ moddb/base.py | 8 ++++---- moddb/pages/base.py | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 3dbdc49..19a67f2 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -8,6 +8,17 @@ The page attempt to keep a clear list of breaking/non-breaking changes and new f :local: :backlinks: none +v0.13.0 +----------- +Bug Fixes +########### +* Renamed `base.search` `category` parameter to `search_category` to avoid conflict with filters +* Fixed the way IDs are retrieved + +New Features +############## +* `File` and `Addon` now have a `location` value that contains the location list of the entity + v0.12.0 ------- Bug Fixes diff --git a/moddb/base.py b/moddb/base.py index ecc3066..5af5155 100644 --- a/moddb/base.py +++ b/moddb/base.py @@ -17,7 +17,7 @@ def search( - category: SearchCategory, + search_category: SearchCategory, *, query: str = None, sort: Tuple[str, str] = None, @@ -35,7 +35,7 @@ def search( Parameters ------------ - category : SearchCategory + search_category : SearchCategory The model type that you want to search query : str String to search for in the model title @@ -58,7 +58,7 @@ def search( game = filters.get("game", None) game = game.id if game else None - url = f"{BASE_URL}/{category.name}/page/{page}" + url = f"{BASE_URL}/{search_category.name}/page/{page}" filter_parsed = {key: value.value for key, value in filters.items() if hasattr(value, "value")} params = { @@ -82,7 +82,7 @@ def search( total_pages=total_pages, current_page=current_page, params=params, - url=f"{BASE_URL}/{category.name}", + url=f"{BASE_URL}/{search_category.name}", total_results=total_results, ) diff --git a/moddb/pages/base.py b/moddb/pages/base.py index 89aa9b0..2ba7b4f 100644 --- a/moddb/pages/base.py +++ b/moddb/pages/base.py @@ -76,7 +76,7 @@ def __init__(self, html: BeautifulSoup): try: self.report = join(html.find("a", string="Report")["href"]) - except TypeError: + except (TypeError, AttributeError): self.report = None LOGGER.info( "'%s' '%s' cannot be reported", From bf88a3bcd86d0bbcbba5e83f2cf5c06d3bcdf02a Mon Sep 17 00:00:00 2001 From: Clement Julia Date: Tue, 20 May 2025 09:28:56 +0200 Subject: [PATCH 3/7] file-location fixed chunking --- docs/source/changelog.rst | 2 ++ moddb/pages/file.py | 30 ++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 19a67f2..8ec4907 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -14,10 +14,12 @@ Bug Fixes ########### * Renamed `base.search` `category` parameter to `search_category` to avoid conflict with filters * Fixed the way IDs are retrieved +* Fixed a bug where saving files and medias would not stream the response New Features ############## * `File` and `Addon` now have a `location` value that contains the location list of the entity +* `File.save`, `Addon.save` and `Media.save` can now take `chunk_size` as a keyword parameter to define the size of the chunks to stream in v0.12.0 ------- diff --git a/moddb/pages/file.py b/moddb/pages/file.py index 11eb0f4..5b62b04 100644 --- a/moddb/pages/file.py +++ b/moddb/pages/file.py @@ -1,6 +1,7 @@ import datetime import re import sys +from typing import BinaryIO, List import bs4 import requests @@ -151,20 +152,22 @@ def __init__(self, html: bs4.BeautifulSoup): def __repr__(self): return f"<{self.__class__.__name__} name={self.name} type={self.category.name}>" - def save(self, file_obj, *, mirror=None): + def save(self, file_obj: BinaryIO, *, mirror=None, chunk_size: int = 10_000_000): """Save the file to an object. This functions makes two requests. If you pass a valid mirror it will make only one request. Parameters ----------- - file_obj : typing.BinaryIO + file_obj : BinaryIO The file obj to save the file to. The binary data will be streamed to that object. mirror : Optional[Mirror] An optional mirror object to download the file from a specific moddb mirror - + chunk_size: int + The size of the chunks to stream the response + back in. 10MB by default """ if mirror is None: download = get_page(f"{BASE_URL}/downloads/start/{self.id}") @@ -172,14 +175,14 @@ def save(self, file_obj, *, mirror=None): else: url = mirror._url - SESSION = sys.modules["moddb"].SESSION + SESSION: requests.Session = sys.modules["moddb"].SESSION prepped = prepare_request(requests.Request("GET", join(url)), SESSION) with SESSION.send(prepped, stream=True) as r: - raise_for_status(r) - for chunk in r.iter_content(chunk_size=8192): + r.raise_for_status() + for chunk in r.iter_content(chunk_size=chunk_size): file_obj.write(chunk) - def get_mirrors(self): + def get_mirrors(self) -> List[Mirror]: """Get all the mirrors from which a file can be downloaded. This can then be passed to File.save to download from a specific mirror. @@ -361,20 +364,23 @@ def __init__(self, html: bs4.BeautifulSoup): def __repr__(self): return f"" - def save(self, file_obj): + def save(self, file_obj: BinaryIO, *, chunk_size: int = 10_000_000): """Save the media to an object. Parameters ----------- - file_obj : typing.BinaryIO + file_obj : BinaryIO The file obj to save the file to. The binary data will be streamed to that object. + chunk_size: int + The size of the chunks to stream the response + back in. 10MB by default """ - SESSION = sys.modules["moddb"].SESSION + SESSION: requests.Session = sys.modules["moddb"].SESSION prepped = prepare_request(requests.Request("GET", self.fileurl), SESSION) with SESSION.send(prepped, stream=True) as r: - raise_for_status(r) - for chunk in r.iter_content(chunk_size=8192): + r.raise_for_status() + for chunk in r.iter_content(chunk_size=chunk_size): file_obj.write(chunk) From 04e49993dddfc7b47475b9331f9ab39b6c3e7e32 Mon Sep 17 00:00:00 2001 From: Clement Julia Date: Tue, 20 May 2025 09:45:00 +0200 Subject: [PATCH 4/7] file-location black --- moddb/pages/base.py | 20 +++++++++++++------- moddb/pages/file.py | 21 +++++++++++---------- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/moddb/pages/base.py b/moddb/pages/base.py index 2ba7b4f..68f696f 100644 --- a/moddb/pages/base.py +++ b/moddb/pages/base.py @@ -52,13 +52,19 @@ def __init__(self, html: BeautifulSoup): except AttributeError: self.name = html.find("meta", property="og:title")["content"] - - for index, func in enumerate([ - lambda: int(re.search(r"siteareaid=(\d*)", html.find("a", class_=["reporticon"])["href"])[1]), - lambda: int(html.find("input", attrs={"name": "siteareaid"})["value"]), - lambda: int(html.find("meta", property="og:image")["content"].split("/")[-2]), - lambda: re.match(r"https:\/\/www\.moddb\.com\/html\/scripts\/autocomplete\.php\?a=mentions&p=home&l=6&u=(\d*)", str(html)).group(1) - ]): + for index, func in enumerate( + [ + lambda: int( + re.search(r"siteareaid=(\d*)", html.find("a", class_=["reporticon"])["href"])[1] + ), + lambda: int(html.find("input", attrs={"name": "siteareaid"})["value"]), + lambda: int(html.find("meta", property="og:image")["content"].split("/")[-2]), + lambda: re.match( + r"https:\/\/www\.moddb\.com\/html\/scripts\/autocomplete\.php\?a=mentions&p=home&l=6&u=(\d*)", + str(html), + ).group(1), + ] + ): try: self.id = func() break diff --git a/moddb/pages/file.py b/moddb/pages/file.py index 5b62b04..d0e23d5 100644 --- a/moddb/pages/file.py +++ b/moddb/pages/file.py @@ -16,24 +16,25 @@ get_views, join, prepare_request, - raise_for_status, ) from .base import BaseMetaClass + def parse_location(html) -> list[Thumbnail] | None: - location = html.find("h5", string="Location").parent.find_all('a') + location = html.find("h5", string="Location").parent.find_all("a") if location is None: return None - + return [ Thumbnail( - type=ThumbnailType[location[x].string.lower()[:-1]], - url=location[x+1]["href"], - name=location[x+1].string, + type=ThumbnailType[location[x].string.lower()[:-1]], + url=location[x + 1]["href"], + name=location[x + 1].string, ) - for x in range(0, len(location)-1, 2) + for x in range(0, len(location) - 1, 2) ] + @concat_docs class File(BaseMetaClass): """An oject representing a file on ModDB, a file is something posted by the page owner which is directly linked @@ -100,7 +101,7 @@ class File(BaseMetaClass): location: list[Thumbnail] An ordered list detailing the hierarchy of entities the file or addon sits under. The last one being the entity - directly attached to this file. + directly attached to this file. """ def __init__(self, html: bs4.BeautifulSoup): @@ -166,7 +167,7 @@ def save(self, file_obj: BinaryIO, *, mirror=None, chunk_size: int = 10_000_000) An optional mirror object to download the file from a specific moddb mirror chunk_size: int - The size of the chunks to stream the response + The size of the chunks to stream the response back in. 10MB by default """ if mirror is None: @@ -373,7 +374,7 @@ def save(self, file_obj: BinaryIO, *, chunk_size: int = 10_000_000): The file obj to save the file to. The binary data will be streamed to that object. chunk_size: int - The size of the chunks to stream the response + The size of the chunks to stream the response back in. 10MB by default """ From de5698c7f30a276c9b3918a542352b69720425cf Mon Sep 17 00:00:00 2001 From: Clement Julia Date: Tue, 20 May 2025 10:45:48 +0200 Subject: [PATCH 5/7] file-location front page fix --- docs/source/changelog.rst | 1 + moddb/pages/fp.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 8ec4907..7658439 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -15,6 +15,7 @@ Bug Fixes * Renamed `base.search` `category` parameter to `search_category` to avoid conflict with filters * Fixed the way IDs are retrieved * Fixed a bug where saving files and medias would not stream the response +* Fixed issue where the front page would sometimes fail to parse New Features ############## diff --git a/moddb/pages/fp.py b/moddb/pages/fp.py index b59fd46..92a6964 100644 --- a/moddb/pages/fp.py +++ b/moddb/pages/fp.py @@ -4,7 +4,7 @@ from ..boxes import Thumbnail from ..enums import ThumbnailType -from ..utils import get_page, get_page_type +from ..utils import LOGGER, get_page, get_page_type from . import opinion @@ -52,12 +52,20 @@ def __init__(self, html: bs4.BeautifulSoup): elif "data-bg" in x.div: image = x.div["data-bg"] + try: + page_type = get_page_type(x.a["href"]) + except IndexError: + LOGGER.warning( + "Unabled to get page type from %s for front page, skipping", x.a["href"] + ) + continue + thumbnail = Thumbnail( name=name.string if name else None, url=x.a["href"], summary=summary.string if summary else None, image=image, - type=get_page_type(x.a["href"]), + type=page_type, ) self.slider.append(thumbnail) From c62cafb5fdd4a6d2ce1a8bdef674f4cc6138ea5f Mon Sep 17 00:00:00 2001 From: Clement Julia Date: Tue, 20 May 2025 10:50:32 +0200 Subject: [PATCH 6/7] file-location adding delay between main tests --- tests/test_main.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/test_main.py b/tests/test_main.py index fd7319d..a782811 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,4 +1,5 @@ import logging +import time import pytest from tests import ( @@ -30,6 +31,9 @@ handler.setFormatter(logging.Formatter("%(asctime)s:%(levelname)s:%(name)s: %(message)s")) logger.addHandler(handler) +@pytest.fixture(scope='class') +def delay(): + time.sleep(60) @pytest.mark.vcr class TestMods(test_mod.TestMod): @@ -39,6 +43,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestGames(test_game.TestGame): @pytest.fixture(params=utils.game_urls, autouse=True) def _get_object(self, request): @@ -46,6 +51,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestEngines(test_engine.TestEngine): @pytest.fixture(params=utils.engine_urls, autouse=True) def _get_object(self, request): @@ -53,6 +59,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestFiles(test_file.TestFile): @pytest.fixture(params=utils.file_urls, autouse=True) def _get_object(self, request): @@ -60,6 +67,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestAddons(test_addon.TestAddon): @pytest.fixture(params=utils.addon_urls, autouse=True) def _get_object(self, request): @@ -67,6 +75,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestMedias(test_media.TestMedia): @pytest.fixture(params=utils.media_urls, autouse=True) def _get_object(self, request): @@ -74,6 +83,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestArticles(test_article.TestArticle): @pytest.fixture(params=utils.article_urls, autouse=True) def _get_object(self, request): @@ -81,6 +91,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestGroups(test_group.TestGroup): @pytest.fixture(params=utils.group_urls, autouse=True) def _get_object(self, request): @@ -88,6 +99,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestTeams(test_team.TestTeam): @pytest.fixture(params=utils.team_urls, autouse=True) def _get_object(self, request): @@ -95,6 +107,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestJobs(test_job.TestJob): @pytest.fixture(params=utils.job_urls, autouse=True) def _get_object(self, request): @@ -102,6 +115,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestMembers(test_member.TestMember): @pytest.fixture(params=utils.member_urls, autouse=True) def _get_object(self, request): @@ -109,6 +123,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestPlatforms(test_platform.TestPlatform): @pytest.fixture(params=utils.platform_urls, autouse=True) def _get_object(self, request): @@ -116,6 +131,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestHardwares(test_hardware.TestHardware): @pytest.fixture(params=utils.hardware_urls, autouse=True) def _get_object(self, request): @@ -123,6 +139,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestSoftwares(test_software.TestSoftware): @pytest.fixture(params=utils.software_urls, autouse=True) def _get_object(self, request): @@ -130,6 +147,7 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestPolls(test_poll.TestPoll): @pytest.fixture(params=utils.poll_urls, autouse=True) def _get_object(self, request): @@ -137,20 +155,24 @@ def _get_object(self, request): @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestSearches(test_base.TestSearch): @pytest.fixture(params=utils.queries, autouse=True) def _get_object(self, request): self.search = moddb.search(request.param[1], query=request.param[0]) +@pytest.mark.usefixtures("delay") class TestLogin(test_base.TestLogin): pass @pytest.mark.vcr +@pytest.mark.usefixtures("delay") class TestFrontPage(test_base.TestFrontPage): pass +@pytest.mark.usefixtures("delay") class TestClient(test_client.TestClient): pass From b3a84363c4257414d46dc605d981636eea0f199e Mon Sep 17 00:00:00 2001 From: Clement Julia Date: Tue, 20 May 2025 17:35:22 +0200 Subject: [PATCH 7/7] file-location typing --- moddb/boxes.py | 50 ++++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/moddb/boxes.py b/moddb/boxes.py index 8ccd22b..ce005f3 100644 --- a/moddb/boxes.py +++ b/moddb/boxes.py @@ -5,8 +5,9 @@ import logging import re import sys -from typing import TYPE_CHECKING, Any, List, Tuple, Union +from typing import TYPE_CHECKING, Any, Generic, List, Tuple, TypeVar +from typing_extensions import Self from bs4 import BeautifulSoup from .enums import ( @@ -1072,8 +1073,9 @@ def __init__(self, **kwargs): def __repr__(self): return f"