diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index c9bb0259b..ad1390235 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -64,8 +64,11 @@ public void rip() throws IOException { sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); Document doc = getFirstPage(); + boolean first = true; + while (doc != null) { List imageURLs = getURLsFromPage(doc); + // Remove all but 1 image if (isThisATest()) { while (imageURLs.size() > 1) { @@ -73,8 +76,15 @@ public void rip() throws IOException { } } + //if (imageURLs.size() == 0) { if (imageURLs.size() == 0) { - throw new IOException("No images found at " + doc.location()); + if (first) { + throw new IOException("No images found at " + doc.location()); + } + else { + logger.info("No images in page..."); + break; + } } for (String imageURL : imageURLs) { @@ -115,6 +125,8 @@ public void rip() throws IOException { logger.info("Can't get next page: " + e.getMessage()); break; } + + first = false; } // If they're using a thread pool, wait for it. diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index 71d35da10..e876e5eb4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -31,6 +31,9 @@ public class FlickrRipper extends AbstractHTMLRipper { private Set attempted = new HashSet(); private Document albumDoc = null; private DownloadThreadPool flickrThreadPool; + private Map _cookies; + private String _cookieString; + @Override public DownloadThreadPool getThreadPool() { return flickrThreadPool; @@ -39,6 +42,25 @@ public DownloadThreadPool getThreadPool() { public FlickrRipper(URL url) throws IOException { super(url); flickrThreadPool = new DownloadThreadPool(); + _cookies = new HashMap(); + + // check for "flickr.cookies2encode" string and encode into "flickr.cookies" + String flickrEncode = Utils.getConfigString("flickr.cookies2encode", null); + if (flickrEncode != null && flickrEncode.length() > 0) + { + _cookieString = Base64.encode(flickrEncode.getBytes()); + Utils.setConfigString("flickr.cookies", _cookieString); + Utils.clearConfigProperty("flickr.cookies2encode"); + Utils.saveConfig(); + } + else + { + // get encoded "flickr.cookies" + _cookieString = Utils.getConfigString("flickr.cookies", null); + if (_cookieString == null) { + System.err.println("Could not find flickr cookies in configuration. Won't be able to rip pages requiring a login!"); + } + } } @Override @@ -122,9 +144,29 @@ public String getGID(URL url) throws MalformedURLException { @Override public Document getFirstPage() throws IOException { - if (albumDoc == null) { - albumDoc = Http.url(url).get(); + if (albumDoc == null) + { + try { + String decodedCookieString = new String(Base64.decode(_cookieString)); + + String[] cks = decodedCookieString.split(";"); + for (String s : cks) + { + int idx = s.indexOf("="); + if (idx == -1) + continue; + + String key = s.substring(0, idx); + String value = s.substring(idx+1); + + _cookies.put(key, value); + } + } catch (Exception e) { + } + + albumDoc = Http.url(url).cookies(_cookies).get(); } + return albumDoc; } @@ -134,13 +176,23 @@ public Document getNextPage(Document doc) throws IOException { return null; } // Find how many pages there are - int lastPage = 0; + /* for (Element apage : doc.select("a[data-track^=page-]")) { String lastPageStr = apage.attr("data-track").replace("page-", ""); lastPage = Integer.parseInt(lastPageStr); } + */ + + int nextPage = 0; + + try { + String nextPageStr = doc.select("span.this-page").first().html(); + nextPage = Integer.parseInt(nextPageStr); + } catch (Exception e) { + } + // If we're at the last page, stop. - if (page >= lastPage) { + if (page >= nextPage) { throw new IOException("No more pages"); } // Load the next page @@ -157,7 +209,7 @@ public Document getNextPage(Document doc) throws IOException { } catch (InterruptedException e) { throw new IOException("Interrupted while waiting to load next page " + nextURL); } - return Http.url(nextURL).get(); + return Http.url(nextURL).cookies(_cookies).get(); } @Override @@ -192,6 +244,7 @@ public List getURLsFromPage(Document page) { break; } } + return imageURLs; } @@ -270,7 +323,8 @@ public void run() { private Document getLargestImagePageDocument(URL url) throws IOException { // Get current page - Document doc = Http.url(url).get(); + Document doc = Http.url(url).cookies(_cookies).get(); + // Look for larger image page String largestImagePage = this.url.toExternalForm(); for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) { @@ -288,7 +342,7 @@ private Document getLargestImagePageDocument(URL url) throws IOException { } if (!largestImagePage.equals(this.url.toExternalForm())) { // Found larger image page, get it. - doc = Http.url(largestImagePage).get(); + doc = Http.url(largestImagePage).cookies(_cookies).get(); } return doc; } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java new file mode 100644 index 000000000..212058930 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java @@ -0,0 +1,124 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class PicasaRipper extends AbstractHTMLRipper { + + private Document albumDoc = null; + + public PicasaRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "picasa"; + } + @Override + public String getDomain() { + return "picasaweb.google.com"; + } + + @Override + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + albumDoc = Http.url(url).get(); + } + return albumDoc; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + return null; + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + //for (Element thumb : doc.select("#lhid_content img")) + for (Element thumb : doc.select("img")) + { + if (!thumb.hasAttr("src")) { + continue; + } + + if (thumb.hasAttr("id") || thumb.hasAttr("width") || thumb.hasAttr("height")) + continue; + + /* + String cls = thumb.attr("class"); + if (cls == null || !cls.equals("goog-icon-list-icon-img")) + continue; + */ + + String image = thumb.attr("src"); + image = image.replaceAll( + "/s128/", + "/d/"); + imageURLs.add(image); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + + Pattern p; Matcher m; + + p = Pattern.compile("^.*picasaweb.google.com/([0-9]+).*$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return m.group(1); + } + + throw new MalformedURLException( + "Expected picasaweb.google.com gallery formats: " + + "picasaweb.google.com//... " + + " Got: " + url); + } + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + + try { + String inUrl = url.toExternalForm(); + String sUrl; + + if (inUrl.endsWith("/")) + sUrl = inUrl.substring(0, inUrl.length()-1); + else + sUrl = inUrl; + + String id = sUrl.substring(sUrl.lastIndexOf('/') + 1); + id = id.replaceAll("noredirect=1", ""); + + if (id.endsWith("?")) + id = id.substring(0, id.length()-1); + + return getHost() + "_" + getGID(url) + "_" + id; + + } catch (Exception e) { + // Fall back to default album naming convention + } + + return super.getAlbumTitle(url); + } + +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java new file mode 100644 index 000000000..df842150f --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java @@ -0,0 +1,133 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class UsenethubRipper extends AbstractHTMLRipper { + + private Document albumDoc = null; + + public UsenethubRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "usenethub"; + } + @Override + public String getDomain() { + return "adult.usenethub.com"; + } + + @Override + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + albumDoc = Http.url(url).get(); + } + return albumDoc; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + String nextURL = null; + for (Element a : doc.select("a.paging_next")) { + if (a.text().contains("→")) { + nextURL = "http://adult.usenethub.com" + a.attr("href"); + break; + } + } + if (nextURL == null) { + throw new IOException("No next page found"); + } + sleep(1000); + return Http.url(nextURL).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + for (Element thumb : doc.select("#classic img")) { + + if (!thumb.hasAttr("src") || !thumb.hasAttr("alt")) { + continue; + } + + if (thumb.attr("alt").length() == 0 && thumb.hasAttr("width") && thumb.hasAttr("height")) { + continue; + } + + String image = thumb.attr("src"); + image = image.replaceAll( + "http://usebin.org/image/", + "http://usebin.org/source/"); + imageURLs.add(image); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + + /* + @Override + public String getGID(URL url) throws MalformedURLException { + // TODO Auto-generated method stub + return null; + } + */ + + @Override + public String getGID(URL url) throws MalformedURLException { + + String inUrl = url.toExternalForm(); + String sUrl; + + if (inUrl.endsWith("/")) + sUrl = inUrl.substring(0, inUrl.length()-1); + else + sUrl = inUrl; + + String id = sUrl.substring(sUrl.lastIndexOf('/') + 1); + + if (id != null && id.length() > 0) + return id; + + throw new MalformedURLException( + "Expected usenethub.com gallery formats: " + + "imagefap.com/gallery.php?gid=####... or " + + "imagefap.com/pictures/####..." + + " Got: " + url); + } + + + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + String title = getFirstPage().title(); + Pattern p = Pattern.compile("^(.*) \\(Usenet Download\\)$"); + Matcher m = p.matcher(title); + if (m.matches()) { + return getHost() + "_" + m.group(1); + } + } catch (IOException e) { + // Fall back to default album naming convention + } + return super.getAlbumTitle(url); + } + +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java index aaf96402d..7f5c2f639 100644 --- a/src/main/java/com/rarchives/ripme/utils/Utils.java +++ b/src/main/java/com/rarchives/ripme/utils/Utils.java @@ -100,6 +100,11 @@ public static void setConfigList(String key, List list) { config.clearProperty(key); config.addProperty(key, list); } + + public static void clearConfigProperty(String key) + { + config.clearProperty(key); + } public static void saveConfig() { try {