Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ The page attempt to keep a clear list of breaking/non-breaking changes and new f
:local:
:backlinks: none

v0.13.0
-----------
Bug Fixes
###########
* Renamed `base.search` `category` parameter to `search_category` to avoid conflict with filters
* Fixed the way IDs are retrieved
* Fixed a bug where saving files and medias would not stream the response
* Fixed issue where the front page would sometimes fail to parse

New Features
##############
* `File` and `Addon` now have a `location` value that contains the location list of the entity
* `File.save`, `Addon.save` and `Media.save` can now take `chunk_size` as a keyword parameter to define the size of the chunks to stream in

v0.12.0
-------
Bug Fixes
Expand Down
8 changes: 4 additions & 4 deletions moddb/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@


def search(
category: SearchCategory,
search_category: SearchCategory,
*,
query: str = None,
sort: Tuple[str, str] = None,
Expand All @@ -35,7 +35,7 @@ def search(

Parameters
------------
category : SearchCategory
search_category : SearchCategory
The model type that you want to search
query : str
String to search for in the model title
Expand All @@ -58,7 +58,7 @@ def search(
game = filters.get("game", None)
game = game.id if game else None

url = f"{BASE_URL}/{category.name}/page/{page}"
url = f"{BASE_URL}/{search_category.name}/page/{page}"
filter_parsed = {key: value.value for key, value in filters.items() if hasattr(value, "value")}

params = {
Expand All @@ -82,7 +82,7 @@ def search(
total_pages=total_pages,
current_page=current_page,
params=params,
url=f"{BASE_URL}/{category.name}",
url=f"{BASE_URL}/{search_category.name}",
total_results=total_results,
)

Expand Down
50 changes: 26 additions & 24 deletions moddb/boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import logging
import re
import sys
from typing import TYPE_CHECKING, Any, List, Tuple, Union
from typing import TYPE_CHECKING, Any, Generic, List, Tuple, TypeVar

from typing_extensions import Self
from bs4 import BeautifulSoup

from .enums import (
Expand Down Expand Up @@ -1072,8 +1073,9 @@ def __init__(self, **kwargs):
def __repr__(self):
return f"<Option text={self.text}>"

T = TypeVar('T')

class ModDBList(collections.abc.MutableSequence):
class ModDBList(collections.abc.MutableSequence[T], Generic[T]):
"""Base List type for the lib

Attributes
Expand All @@ -1087,8 +1089,8 @@ class ModDBList(collections.abc.MutableSequence):
"""

def __init__(self, **kwargs):
self._results = kwargs.pop("results")
self._params = kwargs.pop("params", {})
self._results: List[T] = kwargs.pop("results")
self._params: dict = kwargs.pop("params", {})
self._url = kwargs.pop("url")
self.total_pages = kwargs.pop("total_pages")
self.current_page = kwargs.pop("current_page")
Expand All @@ -1113,13 +1115,13 @@ def _do_request(self, **kwargs):
total_results=total_results,
)

def next_page(self) -> Union["ResultList", "CommentList"]:
def next_page(self) -> Self:
"""Returns the next page of results as either a CommentList if you are retriving comments or
as a ResultList if it's literally anything else.

Returns
--------
Union[ResultList, CommentList]
Self
The new search objects containing a new set of results.

Raises
Expand All @@ -1132,13 +1134,13 @@ def next_page(self) -> Union["ResultList", "CommentList"]:

return self.to_page(self.current_page + 1)

def previous_page(self) -> Union["ResultList", "CommentList"]:
def previous_page(self) -> Self:
"""Returns the previous page of results as either a CommentList if you are retriving comments or
as a ResultList if it's literally anything else.

Returns
--------
Union[ResultList, CommentList]
Self
The new list-like object of results.

Raises
Expand All @@ -1151,7 +1153,7 @@ def previous_page(self) -> Union["ResultList", "CommentList"]:

return self.to_page(self.current_page - 1)

def to_page(self, page: int) -> Union["ResultList", "CommentList"]:
def to_page(self, page: int) -> Self:
"""Returns the desired page of results as either a CommentList if you are retriving comments or
as a ResultList if it's literally anything else.

Expand All @@ -1162,7 +1164,7 @@ def to_page(self, page: int) -> Union["ResultList", "CommentList"]:

Returns
--------
Union[ResultList, CommentList]
Self
The new list-like object of results.

Raises
Expand All @@ -1175,7 +1177,7 @@ def to_page(self, page: int) -> Union["ResultList", "CommentList"]:

return self._do_request(page=page)

def get_all_results(self):
def get_all_results(self) -> Self:
"""An expensive methods that iterates over every page of the result query and returns all
the results. This may return more results than you expected if new page have fit the criteria
while iterating.
Expand Down Expand Up @@ -1206,26 +1208,26 @@ def key(element):
search._results = list({key(e): e for e in results}.values())
return search

def __repr__(self):
def __repr__(self) -> str:
return f"<{self.__class__.__name__} pages={self.current_page}/{self.total_pages}, results={self._results}>"

def __getitem__(self, element):
return self._results.__getitem__(element)
def __getitem__(self, index: int) -> T:
return self._results.__getitem__(index)

def __delitem__(self, element):
self._results.__delitem__(element)
def __delitem__(self, index: int):
self._results.__delitem__(index)

def __len__(self):
def __len__(self) -> int:
return self._results.__len__()

def __setitem__(self, key, value):
def __setitem__(self, key: int, value: T):
self._results.__setitem__(key, value)

def insert(self, index, value):
def insert(self, index: int, value: T):
self._results.insert(index, value)


class ResultList(ModDBList):
class ResultList(ModDBList[Thumbnail]):
"""Represents a list of result gotten from one of the many get methods the library uses. This is returned
over a regular list because it has additional methods that allow for easily go through all the results. In
the same way that the moddb site works, you don't have to re-run the query manually to get the next page,
Expand All @@ -1246,7 +1248,7 @@ class ResultList(ModDBList):
def _parse_method(self, html: BeautifulSoup):
return _parse_results(html)

def resort(self, new_sort: Tuple[str, str]) -> "ResultList":
def resort(self, new_sort: Tuple[str, str]) -> Self:
"""Allows you to sort the whole search by a new sorting parameters. Returns a new search object.

Parameters
Expand All @@ -1261,11 +1263,11 @@ def resort(self, new_sort: Tuple[str, str]) -> "ResultList":
"""
return self._do_request(sort=f"{new_sort[0]}-{new_sort[1]}")

def __contains__(self, element):
def __contains__(self, element: Thumbnail) -> bool:
return get(self._results, name=element.name) is not None


class CommentList(ModDBList):
class CommentList(ModDBList[Comment]):
"""Represents a list of comments. This emulates a list and will behave like one, so you
can use any of the regular list operators in addition to the methods defined below.

Expand All @@ -1282,7 +1284,7 @@ class CommentList(ModDBList):
def _parse_method(self, html: BeautifulSoup):
return _parse_comments(html)

def __contains__(self, element):
def __contains__(self, element: Comment) -> bool:
return get(self._results, name=element.name) is not None

def flatten(self) -> List[Comment]:
Expand Down
28 changes: 19 additions & 9 deletions moddb/pages/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,26 @@ def __init__(self, html: BeautifulSoup):
except AttributeError:
self.name = html.find("meta", property="og:title")["content"]

try:
self.id = int(
re.search(r"siteareaid=(\d*)", html.find("a", class_=["reporticon"])["href"])[1]
)
except TypeError:
for index, func in enumerate(
[
lambda: int(
re.search(r"siteareaid=(\d*)", html.find("a", class_=["reporticon"])["href"])[1]
),
lambda: int(html.find("input", attrs={"name": "siteareaid"})["value"]),
lambda: int(html.find("meta", property="og:image")["content"].split("/")[-2]),
lambda: re.match(
r"https:\/\/www\.moddb\.com\/html\/scripts\/autocomplete\.php\?a=mentions&p=home&l=6&u=(\d*)",
str(html),
).group(1),
]
):
try:
self.id = int(html.find("input", attrs={"name": "siteareaid"})["value"])
self.id = func()
break
except (AttributeError, TypeError):
# really scratching the bottom here but a lot of "official" groups don't have the regular ID
self.id = int(html.find("meta", property="og:image")["content"].split("/")[-2])
LOGGER.warning("Failed to get id from method %s for member %s", index, self.name)
else:
raise AttributeError(f"Failed to get id from member {self.name}")

try:
self.url = html.find("meta", property="og:url")["content"]
Expand All @@ -72,7 +82,7 @@ def __init__(self, html: BeautifulSoup):

try:
self.report = join(html.find("a", string="Report")["href"])
except TypeError:
except (TypeError, AttributeError):
self.report = None
LOGGER.info(
"'%s' '%s' cannot be reported",
Expand Down
52 changes: 39 additions & 13 deletions moddb/pages/file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import datetime
import re
import sys
from typing import BinaryIO, List

import bs4
import requests
Expand All @@ -15,11 +16,25 @@
get_views,
join,
prepare_request,
raise_for_status,
)
from .base import BaseMetaClass


def parse_location(html) -> list[Thumbnail] | None:
location = html.find("h5", string="Location").parent.find_all("a")
if location is None:
return None

return [
Thumbnail(
type=ThumbnailType[location[x].string.lower()[:-1]],
url=location[x + 1]["href"],
name=location[x + 1].string,
)
for x in range(0, len(location) - 1, 2)
]


@concat_docs
class File(BaseMetaClass):
"""An oject representing a file on ModDB, a file is something posted by the page owner which is directly linked
Expand Down Expand Up @@ -83,6 +98,10 @@ class File(BaseMetaClass):
Description of the file, as written by the author
preview : str
URL of the preview image for the file
location: list[Thumbnail]
An ordered list detailing the hierarchy of entities the
file or addon sits under. The last one being the entity
directly attached to this file.
"""

def __init__(self, html: bs4.BeautifulSoup):
Expand Down Expand Up @@ -129,38 +148,42 @@ def __init__(self, html: bs4.BeautifulSoup):

self.preview = html.find_all("img", src=True)[0]["src"]

self.location = parse_location(html)

def __repr__(self):
return f"<{self.__class__.__name__} name={self.name} type={self.category.name}>"

def save(self, file_obj, *, mirror=None):
def save(self, file_obj: BinaryIO, *, mirror=None, chunk_size: int = 10_000_000):
"""Save the file to an object. This functions makes
two requests. If you pass a valid mirror it will
make only one request.

Parameters
-----------
file_obj : typing.BinaryIO
file_obj : BinaryIO
The file obj to save the file to. The binary data
will be streamed to that object.
mirror : Optional[Mirror]
An optional mirror object to download the
file from a specific moddb mirror

chunk_size: int
The size of the chunks to stream the response
back in. 10MB by default
"""
if mirror is None:
download = get_page(f"{BASE_URL}/downloads/start/{self.id}")
url = download.find("a", string=f"download {self.filename}")["href"]
else:
url = mirror._url

SESSION = sys.modules["moddb"].SESSION
SESSION: requests.Session = sys.modules["moddb"].SESSION
prepped = prepare_request(requests.Request("GET", join(url)), SESSION)
with SESSION.send(prepped, stream=True) as r:
raise_for_status(r)
for chunk in r.iter_content(chunk_size=8192):
r.raise_for_status()
for chunk in r.iter_content(chunk_size=chunk_size):
file_obj.write(chunk)

def get_mirrors(self):
def get_mirrors(self) -> List[Mirror]:
"""Get all the mirrors from which a file can be downloaded. This
can then be passed to File.save to download from a specific mirror.

Expand Down Expand Up @@ -342,20 +365,23 @@ def __init__(self, html: bs4.BeautifulSoup):
def __repr__(self):
return f"<Media name={self.name} type={self.category.name}>"

def save(self, file_obj):
def save(self, file_obj: BinaryIO, *, chunk_size: int = 10_000_000):
"""Save the media to an object.

Parameters
-----------
file_obj : typing.BinaryIO
file_obj : BinaryIO
The file obj to save the file to. The binary data
will be streamed to that object.
chunk_size: int
The size of the chunks to stream the response
back in. 10MB by default

"""
SESSION = sys.modules["moddb"].SESSION
SESSION: requests.Session = sys.modules["moddb"].SESSION
prepped = prepare_request(requests.Request("GET", self.fileurl), SESSION)

with SESSION.send(prepped, stream=True) as r:
raise_for_status(r)
for chunk in r.iter_content(chunk_size=8192):
r.raise_for_status()
for chunk in r.iter_content(chunk_size=chunk_size):
file_obj.write(chunk)
Loading
Loading