happypandax · glop102 · Sep 1, 2022 · Sep 15, 2022 · Sep 16, 2022
diff --git a/plugins/EHentai Downloader/main.py b/plugins/EHentai Downloader/main.py
@@ -96,101 +96,128 @@ def download_query(item, is_exhentai):
     (though only once, meaning, no handler will be called upon again with the exact same URL during a single session)
     """
 
+    gid, gtoken = parse_url(item.url)
+    download_requests = []
+
+    #===============================================================================
     # get ehentai login
-    login_site = URLS['eh'] if is_exhentai else URLS['ex']
+    login_site = URLS['ex'] if is_exhentai else URLS['eh']
     login_status = hpx.command.GetLoginStatus(login_site)
     login_session = None
     if login_status:
         login_session = hpx.command.GetLoginSession(login_site)
-
-    gid, gtoken = parse_url(item.url)
-
-    download_requests = []
-
-    thumbnail_req = False
-    archive_req = False
-
-    if login_session:
-        log.info("logged in, attempting to download archive")
-        # get the archiver key
-        log.info("getting archiver key")
-        # prepare request
-        eh_data = {
-            'method': 'gdata',
-            'gidlist': [[gid, gtoken]],
-        }
-        req_props = hpx.command.RequestProperties(
-            headers=HEADERS,
-            json=eh_data,
-            session=login_session
-            )
-        api_url = URLS['ex_api' if is_exhentai else 'e_api']
-        log.info(f"requesting with api url {api_url}")
-        r = hpx.command.SinglePOSTRequest().request(api_url, req_props)
-
-        if r.ok:
-            try:
-                try:
-                    response = r.json
-                except json.JSONDecodeError:
-                    response = None
-                    log.info("got empty response when trying to retrieve archiver key, this usually means that user has no access to exhentai")
-                if response and not 'error' in response:
-                    for gdata in response['gmetadata']:
-                        if 'archiver_key' in gdata:
-                            if 'title' in gdata:
-                                item.name = gdata['title']
-                            if 'thumb' in gdata:
-                                download_requests.append(
-                                    DownloadRequest(
-                                        downloaditem=item,
-                                        url=gdata['thumb'],
-                                        is_thumbnail=True,
-                                        properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
-                                        ))
-                                thumbnail_req = True
-
-                            log.info(f"found archiver key for gallery {(gid, gtoken)}")
-                            a_key = gdata['archiver_key']
-                            a_url = URLS['ex_archiver' if is_exhentai else 'e_archiver'].format(gallery_id=gid, gallery_token=gtoken, archiver_key=a_key)
-                            # prepare request
-                            # get the download url
-                            form_data = {
-                                "dltype": "org",
-                                "dlcheck": "Download Original Archive"
-                                }
-                            req_props = hpx.command.RequestProperties(
-                                headers=HEADERS,
-                                data=form_data,
-                                session=login_session
-                                )
-                            r = hpx.command.SinglePOSTRequest().request(a_url, req_props)
-                            if r.ok and "Key missing, or incorrect key provided" not in r.text:
-                                soup = BeautifulSoup(r.text, "html.parser")
-                                dp_url = soup.find("p", id="continue")
-                                if dp_url and dp_url.a: # finally
-                                    download_requests.append(
-                                        DownloadRequest(
-                                            downloaditem=item,
-                                            url=dp_url.a['href'] + '?start=1',
-                                            properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
-                                            filename=item.name.strip()+'.zip'))
-                                    archive_req = True
-                            else:
-                                log.warning(f"got invalid key page or bad status: {r.status_code}")
-
-                        else:
-                            log.warning(f"didn't find archiver key for data: {eh_data}")
-            except Exception as e:
-                log.debug(f"got an error, last request content: \n\t {r.text}")
-                raise
-
-    if not archive_req:
-        pass
-        # TODO: download individual images instead
+    if not login_session:
+        log.warning("unable to get a login sesion for querying gallery data")
+        return ()
+    log.info("logged in, attempting to download archive")
+
+    #===============================================================================
+    # get the gallery metadata which should have the archive key
+    # https://ehwiki.org/wiki/API#Gallery_Metadata
+    log.info("getting archiver key")
+    eh_data = {
+        'method': 'gdata',
+        'gidlist': [[gid, gtoken]],
+    }
+    req_props = hpx.command.RequestProperties(
+        headers=HEADERS,
+        json=eh_data,
+        session=login_session
+        )
+    api_url = URLS['ex_api' if is_exhentai else 'e_api']
+    log.info(f"requesting with api url {api_url}")
+    r = hpx.command.SinglePOSTRequest().request(api_url, req_props)
+    if not r.ok:
+        log.warning(f"got invalid metadata page or bad status: {r.status_code}")
+        log.debug(r.text)
+        return ()
+    try:
+        response = r.json
+    except json.JSONDecodeError:
+        response = None
+        log.info("got empty response when trying to retrieve archiver key, this usually means that user has no access to exhentai")
+        return ()
+    if not response or 'error' in response:
+        log.warning("response has an error of some sort, and so we have no archive key to use")
+        log.debug(r.text)
+        return ()
+
+    #===============================================================================
+    # Read the metadata of the gallery to fill out the download queue item
+    #
+    # While in theory we should only ever have a single entry in the response, best keep it more general just in case this gets retrofitted to handle multiple urls at once
+    # The information we reliably get is the thumbnail url and the title of the gallery
+    # We seem to also always get an archive key, but the key is not always valid, and so the archive url request can fail
+    #
+    # Yes, there is an expunged flag in the metadata, but it is always false when the gallery/archive is not available
+    # It is also true sometimes and yet the gallery/archive is totally accessable and so is meaningless
+    for gdata in response['gmetadata']:
+        archive_req = False
+        try:
+            if 'title' in gdata:
+                item.name = gdata['title']
+            if 'thumb' in gdata:
+                download_requests.append(
+                    DownloadRequest(
+                        downloaditem=item,
+                        url=gdata['thumb'],
+                        is_thumbnail=True,
+                        properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
+                        ))
+            if 'archiver_key' in gdata:
+                log.info(f"found archiver key for gallery {(gid, gtoken)}")
+                a_key = gdata['archiver_key']
+                a_url = URLS['ex_archiver' if is_exhentai else 'e_archiver'].format(gallery_id=gid, gallery_token=gtoken, archiver_key=a_key)
+                form_data = {
+                    "dltype": "org", #original quality, instead of a resampled version
+                    "dlcheck": "Download Original Archive"
+                    }
+                req_props = hpx.command.RequestProperties(
+                    headers=HEADERS,
+                    data=form_data,
+                    session=login_session
+                    )
+                r = hpx.command.SinglePOSTRequest().request(a_url, req_props)
+                if r.ok:
+                    if "Insufficient funds" in r.text:
+                        log.info("Unable to grab gallery archive due to insufficent funds (GP) on the account")
+                        item.name = "(Insufficient GP) "+item.name
+                    elif "Key missing, or incorrect key provided" not in r.text:
+                        soup = BeautifulSoup(r.text, "html.parser")
+                        dp_url = soup.find("p", id="continue")
+                        if dp_url and dp_url.a: # finally
+                            download_requests.append(
+                                DownloadRequest(
+                                    downloaditem=item,
+                                    url=dp_url.a['href'] + '?start=1',
+                                    properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
+                                    filename=item.name.strip()+'.zip'))
+                            archive_req = True
+                            log.debug(f"adding the archive url {download_requests[-1].url}")
+                    if not archive_req:
+                        log.info("Something went wrong and we did not actually find a URL")
+                        #TODO Actually better handle the various cases of why we do not have a url
+                else:
+                    log.warning(f"got invalid key page or bad status: {r.status_code}")
+                    if r.status_code == 404 and "This gallery is currently unavailable" in r.text:
+                        #We know that there is a valid key for us to get here, so the gallery existed at some point in the past
+                        #This seems like it is most of the time a copyright takedown, but I have no idea why this is not marked as expunged
+                        item.name = "(Gallery Unavailable) "+item.name
+            else:
+                log.warning(f"didn't find archiver key for data: {eh_data}")
+                item.name = "(Archive Unavailable) "+item.name
+        except Exception as e:
+            log.debug(f"got an error, last request content: \n\t {r.text}")
+            raise
+
+        if not archive_req:
+            pass
+            # TODO: download individual images instead
 
     if download_requests:
         log.info(f"was able to prepare {len(download_requests)} requests")
+    else:
+        log.info("unable to prepare any URLs to download")
     return tuple(download_requests)
 
 @hpx.attach("Download.done", trigger=[EX_IDENTIFIER, EH_IDENTIFIER])
@@ -204,8 +231,10 @@ def download_done(result):
     should return:
     the same :class:`DownloadResult` that was provided to the handler, potentially modified on the 'path' or `status` and `reason` properties
     """
-    # there's nothing special to post-process in the case of nhentai downloader, so just return the result as is
+    # there's nothing special to post-process in the case of e(x)hentai downloader, so just return the result as is
     log.info(f"download of archive was successful for {result.downloaditem.name}")
+    #TODO Mark it as a failure if there was only a thumbnail to download
+    #TODO Archive the individual images together into a cbz or something if we grabbed individual images
     return result
 
 def parse_url(url):
@@ -219,4 +248,4 @@ def parse_url(url):
         gallery_id, gallery_token = gallery_id_token.split('/')
     else:
         log.warning("Error extracting g_id and g_token from url: {}".format(url))
-    return int(gallery_id), gallery_token
+    return int(gallery_id), gallery_token