From b98b86f4bc690e86a87e6290814e3a1d93331614 Mon Sep 17 00:00:00 2001 From: SilhouetteTR Date: Wed, 18 Feb 2015 22:07:58 +0200 Subject: [PATCH] Added login cookie support and paging fixes for FlickrRipper.java. Just get cookies from browser after logging in to Flickr and put one line in the config. After 1st time, cookies will be Base64 encoded in the config for some very simple security measure. -- How to get cookies easily (for end users) -- Chrome Browser: - Login to Flickr - Hit F12 for Developer Tools in browser - Go to Resources tab - Expand Cookies on the left - Select www.flickr.com - Get the values for these 3 cookies: , and - Put these in the "rip.properties" file like this. That's it. (Replace ### with the values) flickr.cookies2encode = current_identity=###; cookie_accid=###; cookie_epass=###; * Added clearConfigProperty(...) to Utils.java * Modified AbstractHTMLRipper.java so that "no images found" IOException is thrown only for the 1st page. The rest will just log and break out of the while loop. * Added UsenetHub ripper. (http://adult.usenethub.com) * Added Picasa Web Albums ripper. (http://picasaweb.google.com) --- .../ripme/ripper/AbstractHTMLRipper.java | 14 +- .../ripme/ripper/rippers/FlickrRipper.java | 68 ++++++++- .../ripme/ripper/rippers/PicasaRipper.java | 124 ++++++++++++++++ .../ripme/ripper/rippers/UsenethubRipper.java | 133 ++++++++++++++++++ .../java/com/rarchives/ripme/utils/Utils.java | 5 + 5 files changed, 336 insertions(+), 8 deletions(-) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index c9bb0259b..ad1390235 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -64,8 +64,11 @@ public void rip() throws IOException { sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); Document doc = getFirstPage(); + boolean first = true; + while (doc != null) { List imageURLs = getURLsFromPage(doc); + // Remove all but 1 image if (isThisATest()) { while (imageURLs.size() > 1) { @@ -73,8 +76,15 @@ public void rip() throws IOException { } } + //if (imageURLs.size() == 0) { if (imageURLs.size() == 0) { - throw new IOException("No images found at " + doc.location()); + if (first) { + throw new IOException("No images found at " + doc.location()); + } + else { + logger.info("No images in page..."); + break; + } } for (String imageURL : imageURLs) { @@ -115,6 +125,8 @@ public void rip() throws IOException { logger.info("Can't get next page: " + e.getMessage()); break; } + + first = false; } // If they're using a thread pool, wait for it. diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index 71d35da10..e876e5eb4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -31,6 +31,9 @@ public class FlickrRipper extends AbstractHTMLRipper { private Set attempted = new HashSet(); private Document albumDoc = null; private DownloadThreadPool flickrThreadPool; + private Map _cookies; + private String _cookieString; + @Override public DownloadThreadPool getThreadPool() { return flickrThreadPool; @@ -39,6 +42,25 @@ public DownloadThreadPool getThreadPool() { public FlickrRipper(URL url) throws IOException { super(url); flickrThreadPool = new DownloadThreadPool(); + _cookies = new HashMap(); + + // check for "flickr.cookies2encode" string and encode into "flickr.cookies" + String flickrEncode = Utils.getConfigString("flickr.cookies2encode", null); + if (flickrEncode != null && flickrEncode.length() > 0) + { + _cookieString = Base64.encode(flickrEncode.getBytes()); + Utils.setConfigString("flickr.cookies", _cookieString); + Utils.clearConfigProperty("flickr.cookies2encode"); + Utils.saveConfig(); + } + else + { + // get encoded "flickr.cookies" + _cookieString = Utils.getConfigString("flickr.cookies", null); + if (_cookieString == null) { + System.err.println("Could not find flickr cookies in configuration. Won't be able to rip pages requiring a login!"); + } + } } @Override @@ -122,9 +144,29 @@ public String getGID(URL url) throws MalformedURLException { @Override public Document getFirstPage() throws IOException { - if (albumDoc == null) { - albumDoc = Http.url(url).get(); + if (albumDoc == null) + { + try { + String decodedCookieString = new String(Base64.decode(_cookieString)); + + String[] cks = decodedCookieString.split(";"); + for (String s : cks) + { + int idx = s.indexOf("="); + if (idx == -1) + continue; + + String key = s.substring(0, idx); + String value = s.substring(idx+1); + + _cookies.put(key, value); + } + } catch (Exception e) { + } + + albumDoc = Http.url(url).cookies(_cookies).get(); } + return albumDoc; } @@ -134,13 +176,23 @@ public Document getNextPage(Document doc) throws IOException { return null; } // Find how many pages there are - int lastPage = 0; + /* for (Element apage : doc.select("a[data-track^=page-]")) { String lastPageStr = apage.attr("data-track").replace("page-", ""); lastPage = Integer.parseInt(lastPageStr); } + */ + + int nextPage = 0; + + try { + String nextPageStr = doc.select("span.this-page").first().html(); + nextPage = Integer.parseInt(nextPageStr); + } catch (Exception e) { + } + // If we're at the last page, stop. - if (page >= lastPage) { + if (page >= nextPage) { throw new IOException("No more pages"); } // Load the next page @@ -157,7 +209,7 @@ public Document getNextPage(Document doc) throws IOException { } catch (InterruptedException e) { throw new IOException("Interrupted while waiting to load next page " + nextURL); } - return Http.url(nextURL).get(); + return Http.url(nextURL).cookies(_cookies).get(); } @Override @@ -192,6 +244,7 @@ public List getURLsFromPage(Document page) { break; } } + return imageURLs; } @@ -270,7 +323,8 @@ public void run() { private Document getLargestImagePageDocument(URL url) throws IOException { // Get current page - Document doc = Http.url(url).get(); + Document doc = Http.url(url).cookies(_cookies).get(); + // Look for larger image page String largestImagePage = this.url.toExternalForm(); for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) { @@ -288,7 +342,7 @@ private Document getLargestImagePageDocument(URL url) throws IOException { } if (!largestImagePage.equals(this.url.toExternalForm())) { // Found larger image page, get it. - doc = Http.url(largestImagePage).get(); + doc = Http.url(largestImagePage).cookies(_cookies).get(); } return doc; } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java new file mode 100644 index 000000000..212058930 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java @@ -0,0 +1,124 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class PicasaRipper extends AbstractHTMLRipper { + + private Document albumDoc = null; + + public PicasaRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "picasa"; + } + @Override + public String getDomain() { + return "picasaweb.google.com"; + } + + @Override + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + albumDoc = Http.url(url).get(); + } + return albumDoc; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + return null; + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + //for (Element thumb : doc.select("#lhid_content img")) + for (Element thumb : doc.select("img")) + { + if (!thumb.hasAttr("src")) { + continue; + } + + if (thumb.hasAttr("id") || thumb.hasAttr("width") || thumb.hasAttr("height")) + continue; + + /* + String cls = thumb.attr("class"); + if (cls == null || !cls.equals("goog-icon-list-icon-img")) + continue; + */ + + String image = thumb.attr("src"); + image = image.replaceAll( + "/s128/", + "/d/"); + imageURLs.add(image); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + + Pattern p; Matcher m; + + p = Pattern.compile("^.*picasaweb.google.com/([0-9]+).*$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + + throw new MalformedURLException( + "Expected picasaweb.google.com gallery formats: " + + "picasaweb.google.com//... " + + " Got: " + url); + } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + + try { + String inUrl = url.toExternalForm(); + String sUrl; + + if (inUrl.endsWith("/")) + sUrl = inUrl.substring(0, inUrl.length()-1); + else + sUrl = inUrl; + + String id = sUrl.substring(sUrl.lastIndexOf('/') + 1); + id = id.replaceAll("noredirect=1", ""); + + if (id.endsWith("?")) + id = id.substring(0, id.length()-1); + + return getHost() + "_" + getGID(url) + "_" + id; + + } catch (Exception e) { + // Fall back to default album naming convention + } + + return super.getAlbumTitle(url); + } + +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java new file mode 100644 index 000000000..df842150f --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java @@ -0,0 +1,133 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class UsenethubRipper extends AbstractHTMLRipper { + + private Document albumDoc = null; + + public UsenethubRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "usenethub"; + } + @Override + public String getDomain() { + return "adult.usenethub.com"; + } + + @Override + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + albumDoc = Http.url(url).get(); + } + return albumDoc; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + String nextURL = null; + for (Element a : doc.select("a.paging_next")) { + if (a.text().contains("→")) { + nextURL = "http://adult.usenethub.com" + a.attr("href"); + break; + } + } + if (nextURL == null) { + throw new IOException("No next page found"); + } + sleep(1000); + return Http.url(nextURL).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + for (Element thumb : doc.select("#classic img")) { + + if (!thumb.hasAttr("src") || !thumb.hasAttr("alt")) { + continue; + } + + if (thumb.attr("alt").length() == 0 && thumb.hasAttr("width") && thumb.hasAttr("height")) { + continue; + } + + String image = thumb.attr("src"); + image = image.replaceAll( + "http://usebin.org/image/", + "http://usebin.org/source/"); + imageURLs.add(image); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + + /* + @Override + public String getGID(URL url) throws MalformedURLException { + // TODO Auto-generated method stub + return null; + } + */ + + @Override + public String getGID(URL url) throws MalformedURLException { + + String inUrl = url.toExternalForm(); + String sUrl; + + if (inUrl.endsWith("/")) + sUrl = inUrl.substring(0, inUrl.length()-1); + else + sUrl = inUrl; + + String id = sUrl.substring(sUrl.lastIndexOf('/') + 1); + + if (id != null && id.length() > 0) + return id; + + throw new MalformedURLException( + "Expected usenethub.com gallery formats: " + + "imagefap.com/gallery.php?gid=####... or " + + "imagefap.com/pictures/####..." + + " Got: " + url); + } + + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + String title = getFirstPage().title(); + Pattern p = Pattern.compile("^(.*) \\(Usenet Download\\)$"); + Matcher m = p.matcher(title); + if (m.matches()) { + return getHost() + "_" + m.group(1); + } + } catch (IOException e) { + // Fall back to default album naming convention + } + return super.getAlbumTitle(url); + } + +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java index aaf96402d..7f5c2f639 100644 --- a/src/main/java/com/rarchives/ripme/utils/Utils.java +++ b/src/main/java/com/rarchives/ripme/utils/Utils.java @@ -100,6 +100,11 @@ public static void setConfigList(String key, List list) { config.clearProperty(key); config.addProperty(key, list); } + + public static void clearConfigProperty(String key) + { + config.clearProperty(key); + } public static void saveConfig() { try {