Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 118 additions & 89 deletions plugins/EHentai Downloader/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,101 +96,128 @@ def download_query(item, is_exhentai):
(though only once, meaning, no handler will be called upon again with the exact same URL during a single session)
"""

gid, gtoken = parse_url(item.url)
download_requests = []

#===============================================================================
# get ehentai login
login_site = URLS['eh'] if is_exhentai else URLS['ex']
login_site = URLS['ex'] if is_exhentai else URLS['eh']
login_status = hpx.command.GetLoginStatus(login_site)
login_session = None
if login_status:
login_session = hpx.command.GetLoginSession(login_site)

gid, gtoken = parse_url(item.url)

download_requests = []

thumbnail_req = False
archive_req = False

if login_session:
log.info("logged in, attempting to download archive")
# get the archiver key
log.info("getting archiver key")
# prepare request
eh_data = {
'method': 'gdata',
'gidlist': [[gid, gtoken]],
}
req_props = hpx.command.RequestProperties(
headers=HEADERS,
json=eh_data,
session=login_session
)
api_url = URLS['ex_api' if is_exhentai else 'e_api']
log.info(f"requesting with api url {api_url}")
r = hpx.command.SinglePOSTRequest().request(api_url, req_props)

if r.ok:
try:
try:
response = r.json
except json.JSONDecodeError:
response = None
log.info("got empty response when trying to retrieve archiver key, this usually means that user has no access to exhentai")
if response and not 'error' in response:
for gdata in response['gmetadata']:
if 'archiver_key' in gdata:
if 'title' in gdata:
item.name = gdata['title']
if 'thumb' in gdata:
download_requests.append(
DownloadRequest(
downloaditem=item,
url=gdata['thumb'],
is_thumbnail=True,
properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
))
thumbnail_req = True

log.info(f"found archiver key for gallery {(gid, gtoken)}")
a_key = gdata['archiver_key']
a_url = URLS['ex_archiver' if is_exhentai else 'e_archiver'].format(gallery_id=gid, gallery_token=gtoken, archiver_key=a_key)
# prepare request
# get the download url
form_data = {
"dltype": "org",
"dlcheck": "Download Original Archive"
}
req_props = hpx.command.RequestProperties(
headers=HEADERS,
data=form_data,
session=login_session
)
r = hpx.command.SinglePOSTRequest().request(a_url, req_props)
if r.ok and "Key missing, or incorrect key provided" not in r.text:
soup = BeautifulSoup(r.text, "html.parser")
dp_url = soup.find("p", id="continue")
if dp_url and dp_url.a: # finally
download_requests.append(
DownloadRequest(
downloaditem=item,
url=dp_url.a['href'] + '?start=1',
properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
filename=item.name.strip()+'.zip'))
archive_req = True
else:
log.warning(f"got invalid key page or bad status: {r.status_code}")

else:
log.warning(f"didn't find archiver key for data: {eh_data}")
except Exception as e:
log.debug(f"got an error, last request content: \n\t {r.text}")
raise

if not archive_req:
pass
# TODO: download individual images instead
if not login_session:
log.warning("unable to get a login sesion for querying gallery data")
return ()
log.info("logged in, attempting to download archive")

#===============================================================================
# get the gallery metadata which should have the archive key
# https://ehwiki.org/wiki/API#Gallery_Metadata
log.info("getting archiver key")
eh_data = {
'method': 'gdata',
'gidlist': [[gid, gtoken]],
}
req_props = hpx.command.RequestProperties(
headers=HEADERS,
json=eh_data,
session=login_session
)
api_url = URLS['ex_api' if is_exhentai else 'e_api']
log.info(f"requesting with api url {api_url}")
r = hpx.command.SinglePOSTRequest().request(api_url, req_props)
if not r.ok:
log.warning(f"got invalid metadata page or bad status: {r.status_code}")
log.debug(r.text)
return ()
try:
response = r.json
except json.JSONDecodeError:
response = None
log.info("got empty response when trying to retrieve archiver key, this usually means that user has no access to exhentai")
return ()
if not response or 'error' in response:
log.warning("response has an error of some sort, and so we have no archive key to use")
log.debug(r.text)
return ()

#===============================================================================
# Read the metadata of the gallery to fill out the download queue item
#
# While in theory we should only ever have a single entry in the response, best keep it more general just in case this gets retrofitted to handle multiple urls at once
# The information we reliably get is the thumbnail url and the title of the gallery
# We seem to also always get an archive key, but the key is not always valid, and so the archive url request can fail
#
# Yes, there is an expunged flag in the metadata, but it is always false when the gallery/archive is not available
# It is also true sometimes and yet the gallery/archive is totally accessable and so is meaningless
for gdata in response['gmetadata']:
archive_req = False
try:
if 'title' in gdata:
item.name = gdata['title']
if 'thumb' in gdata:
download_requests.append(
DownloadRequest(
downloaditem=item,
url=gdata['thumb'],
is_thumbnail=True,
properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
))
if 'archiver_key' in gdata:
log.info(f"found archiver key for gallery {(gid, gtoken)}")
a_key = gdata['archiver_key']
a_url = URLS['ex_archiver' if is_exhentai else 'e_archiver'].format(gallery_id=gid, gallery_token=gtoken, archiver_key=a_key)
form_data = {
"dltype": "org", #original quality, instead of a resampled version
"dlcheck": "Download Original Archive"
}
req_props = hpx.command.RequestProperties(
headers=HEADERS,
data=form_data,
session=login_session
)
r = hpx.command.SinglePOSTRequest().request(a_url, req_props)
if r.ok:
if "Insufficient funds" in r.text:
log.info("Unable to grab gallery archive due to insufficent funds (GP) on the account")
item.name = "(Insufficient GP) "+item.name
elif "Key missing, or incorrect key provided" not in r.text:
soup = BeautifulSoup(r.text, "html.parser")
dp_url = soup.find("p", id="continue")
if dp_url and dp_url.a: # finally
download_requests.append(
DownloadRequest(
downloaditem=item,
url=dp_url.a['href'] + '?start=1',
properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
filename=item.name.strip()+'.zip'))
archive_req = True
log.debug(f"adding the archive url {download_requests[-1].url}")
if not archive_req:
log.info("Something went wrong and we did not actually find a URL")
#TODO Actually better handle the various cases of why we do not have a url
else:
log.warning(f"got invalid key page or bad status: {r.status_code}")
if r.status_code == 404 and "This gallery is currently unavailable" in r.text:
#We know that there is a valid key for us to get here, so the gallery existed at some point in the past
#This seems like it is most of the time a copyright takedown, but I have no idea why this is not marked as expunged
item.name = "(Gallery Unavailable) "+item.name
else:
log.warning(f"didn't find archiver key for data: {eh_data}")
item.name = "(Archive Unavailable) "+item.name
except Exception as e:
log.debug(f"got an error, last request content: \n\t {r.text}")
raise

if not archive_req:
pass
# TODO: download individual images instead

if download_requests:
log.info(f"was able to prepare {len(download_requests)} requests")
else:
log.info("unable to prepare any URLs to download")
return tuple(download_requests)

@hpx.attach("Download.done", trigger=[EX_IDENTIFIER, EH_IDENTIFIER])
Expand All @@ -204,8 +231,10 @@ def download_done(result):
should return:
the same :class:`DownloadResult` that was provided to the handler, potentially modified on the 'path' or `status` and `reason` properties
"""
# there's nothing special to post-process in the case of nhentai downloader, so just return the result as is
# there's nothing special to post-process in the case of e(x)hentai downloader, so just return the result as is
log.info(f"download of archive was successful for {result.downloaditem.name}")
#TODO Mark it as a failure if there was only a thumbnail to download
#TODO Archive the individual images together into a cbz or something if we grabbed individual images
return result

def parse_url(url):
Expand All @@ -219,4 +248,4 @@ def parse_url(url):
gallery_id, gallery_token = gallery_id_token.split('/')
else:
log.warning("Error extracting g_id and g_token from url: {}".format(url))
return int(gallery_id), gallery_token
return int(gallery_id), gallery_token