From b98b86f4bc690e86a87e6290814e3a1d93331614 Mon Sep 17 00:00:00 2001
From: SilhouetteTR <silhouettetr1@gmail.com>
Date: Wed, 18 Feb 2015 22:07:58 +0200
Subject: [PATCH] Added login cookie support and paging fixes for
 FlickrRipper.java.

Just get cookies from browser after logging in to Flickr and put one line in the config. After 1st time, cookies will be Base64 encoded in the config for some very simple security measure.

-- How to get cookies easily (for end users) --

Chrome Browser:
- Login to Flickr
- Hit F12 for Developer Tools in browser
- Go to Resources tab
- Expand Cookies on the left
- Select www.flickr.com
- Get the values for these 3 cookies: <cookie_accid>, <cookie_epass> and <current_identity>
- Put these in the "rip.properties" file like this. That's it. (Replace ### with the values)

flickr.cookies2encode = current_identity=###; cookie_accid=###; cookie_epass=###;

* Added clearConfigProperty(...) to Utils.java
* Modified AbstractHTMLRipper.java so that "no images found" IOException is thrown only for the 1st page. The rest will just log and break out of the while loop.
* Added UsenetHub ripper. (http://adult.usenethub.com)
* Added Picasa Web Albums ripper. (http://picasaweb.google.com)
---
 .../ripme/ripper/AbstractHTMLRipper.java      |  14 +-
 .../ripme/ripper/rippers/FlickrRipper.java    |  68 ++++++++-
 .../ripme/ripper/rippers/PicasaRipper.java    | 124 ++++++++++++++++
 .../ripme/ripper/rippers/UsenethubRipper.java | 133 ++++++++++++++++++
 .../java/com/rarchives/ripme/utils/Utils.java |   5 +
 5 files changed, 336 insertions(+), 8 deletions(-)
 create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java
 create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java
diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
index c9bb0259b..ad1390235 100644
--- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
@@ -64,8 +64,11 @@ public void rip() throws IOException {
         sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
         Document doc = getFirstPage();
         
+        boolean first = true;
+        
         while (doc != null) {
             List<String> imageURLs = getURLsFromPage(doc);
+
             // Remove all but 1 image
             if (isThisATest()) {
                 while (imageURLs.size() > 1) {
@@ -73,8 +76,15 @@ public void rip() throws IOException {
                 }
             }
 
+            //if (imageURLs.size() == 0) {
             if (imageURLs.size() == 0) {
-                throw new IOException("No images found at " + doc.location());
+            	if (first) {
+            		throw new IOException("No images found at " + doc.location());
+            	}
+            	else {
+            		logger.info("No images in page...");
+            		break;
+            	}
             }
             
             for (String imageURL : imageURLs) {
@@ -115,6 +125,8 @@ public void rip() throws IOException {
                 logger.info("Can't get next page: " + e.getMessage());
                 break;
             }
+            
+            first = false;
         }
 
         // If they're using a thread pool, wait for it.
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
index 71d35da10..e876e5eb4 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
@@ -31,6 +31,9 @@ public class FlickrRipper extends AbstractHTMLRipper {
     private Set<String> attempted = new HashSet<String>();
     private Document albumDoc = null;
     private DownloadThreadPool flickrThreadPool;
+    private Map<String, String> _cookies;
+    private String _cookieString;
+    
     @Override
     public DownloadThreadPool getThreadPool() {
         return flickrThreadPool;
@@ -39,6 +42,25 @@ public DownloadThreadPool getThreadPool() {
     public FlickrRipper(URL url) throws IOException {
         super(url);
         flickrThreadPool = new DownloadThreadPool();
+        _cookies = new HashMap<String, String>();
+        
+        // check for "flickr.cookies2encode" string and encode into "flickr.cookies"
+        String flickrEncode = Utils.getConfigString("flickr.cookies2encode", null);
+        if (flickrEncode != null && flickrEncode.length() > 0)
+        {
+        	_cookieString = Base64.encode(flickrEncode.getBytes());
+        	Utils.setConfigString("flickr.cookies", _cookieString);
+        	Utils.clearConfigProperty("flickr.cookies2encode");
+        	Utils.saveConfig();
+        }
+        else
+        {
+            // get encoded "flickr.cookies"
+            _cookieString = Utils.getConfigString("flickr.cookies", null);
+            if (_cookieString == null) {
+            	System.err.println("Could not find flickr cookies in configuration. Won't be able to rip pages requiring a login!");
+            }
+        }
     }
 
     @Override
@@ -122,9 +144,29 @@ public String getGID(URL url) throws MalformedURLException {
 
     @Override
     public Document getFirstPage() throws IOException {
-        if (albumDoc == null) {
-            albumDoc = Http.url(url).get();
+        if (albumDoc == null)
+        {
+            try {
+            	String decodedCookieString = new String(Base64.decode(_cookieString));
+
+            	String[] cks = decodedCookieString.split(";");
+                for (String s : cks)
+                {
+                	int idx = s.indexOf("=");
+                	if (idx == -1)
+                		continue;
+                	
+                	String key = s.substring(0, idx);
+                	String value = s.substring(idx+1);
+                	
+                	_cookies.put(key, value);
+                }
+    		} catch (Exception e) {
+    		}
+
+            albumDoc = Http.url(url).cookies(_cookies).get();            
         }
+        
         return albumDoc;
     }
 
@@ -134,13 +176,23 @@ public Document getNextPage(Document doc) throws IOException {
             return null;
         }
         // Find how many pages there are
-        int lastPage = 0;
+        /*
         for (Element apage : doc.select("a[data-track^=page-]")) {
             String lastPageStr = apage.attr("data-track").replace("page-", "");
             lastPage = Integer.parseInt(lastPageStr);
         }
+        */
+
+        int nextPage = 0;
+        
+        try {
+        	String nextPageStr = doc.select("span.this-page").first().html(); 
+            nextPage = Integer.parseInt(nextPageStr);
+		} catch (Exception e) {
+		}
+        
         // If we're at the last page, stop.
-        if (page >= lastPage) {
+        if (page >= nextPage) {
             throw new IOException("No more pages");
         }
         // Load the next page
@@ -157,7 +209,7 @@ public Document getNextPage(Document doc) throws IOException {
         } catch (InterruptedException e) {
             throw new IOException("Interrupted while waiting to load next page " + nextURL);
         }
-        return Http.url(nextURL).get();
+        return Http.url(nextURL).cookies(_cookies).get();
     }
     
     @Override
@@ -192,6 +244,7 @@ public List<String> getURLsFromPage(Document page) {
                 break;
             }
         }
+
         return imageURLs;
     }
     
@@ -270,7 +323,8 @@ public void run() {
         
         private Document getLargestImagePageDocument(URL url) throws IOException {
             // Get current page
-            Document doc = Http.url(url).get();
+            Document doc = Http.url(url).cookies(_cookies).get();
+            
             // Look for larger image page
             String largestImagePage = this.url.toExternalForm();
             for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) {
@@ -288,7 +342,7 @@ private Document getLargestImagePageDocument(URL url) throws IOException {
             }
             if (!largestImagePage.equals(this.url.toExternalForm())) {
                 // Found larger image page, get it.
-                doc = Http.url(largestImagePage).get();
+                doc = Http.url(largestImagePage).cookies(_cookies).get();
             }
             return doc;
         }
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java
new file mode 100644
index 000000000..212058930
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java
@@ -0,0 +1,124 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+public class PicasaRipper extends AbstractHTMLRipper {
+
+    private Document albumDoc = null;
+
+    public PicasaRipper(URL url) throws IOException {
+        super(url);
+    }
+
+    @Override
+    public String getHost() {
+        return "picasa";
+    }
+    @Override
+    public String getDomain() {
+        return "picasaweb.google.com";
+    }
+    
+    @Override
+    public Document getFirstPage() throws IOException {
+        if (albumDoc == null) {
+            albumDoc = Http.url(url).get();
+        }
+        return albumDoc;
+    }
+    
+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        return null;
+    }
+    
+    @Override
+    public List<String> getURLsFromPage(Document doc) {
+        List<String> imageURLs = new ArrayList<String>();
+        //for (Element thumb : doc.select("#lhid_content img")) 
+        for (Element thumb : doc.select("img"))
+        {
+        	if (!thumb.hasAttr("src")) {
+                continue;
+            }
+        	
+        	if (thumb.hasAttr("id") || thumb.hasAttr("width") || thumb.hasAttr("height"))
+        		continue;
+        	
+        	/*
+        	String cls = thumb.attr("class");
+        	if (cls == null || !cls.equals("goog-icon-list-icon-img"))
+        			continue;
+        	*/
+
+            String image = thumb.attr("src");
+            image = image.replaceAll(
+                    "/s128/",
+                    "/d/");
+            imageURLs.add(image);
+        }
+        return imageURLs;
+    }
+    
+    @Override
+    public void downloadURL(URL url, int index) {
+        addURLToDownload(url, getPrefix(index));
+    }
+    
+    @Override
+    public String getGID(URL url) throws MalformedURLException {
+
+        Pattern p; Matcher m;
+
+        p = Pattern.compile("^.*picasaweb.google.com/([0-9]+).*$");
+        m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            return m.group(1);
+        }
+
+        throw new MalformedURLException(
+                "Expected picasaweb.google.com gallery formats: "
+                        + "picasaweb.google.com/<ID>/... "
+                        + " Got: " + url);
+    }
+ 	
+    @Override
+    public String getAlbumTitle(URL url) throws MalformedURLException {
+    	
+    	try {
+        	String inUrl = url.toExternalForm();
+        	String sUrl;
+        	
+            if (inUrl.endsWith("/"))
+            	sUrl = inUrl.substring(0, inUrl.length()-1);
+            else
+            	sUrl = inUrl;
+
+            String id = sUrl.substring(sUrl.lastIndexOf('/') + 1);
+        	id = id.replaceAll("noredirect=1", "");
+        	
+            if (id.endsWith("?"))
+            	id = id.substring(0, id.length()-1);
+
+        	return getHost() + "_" + getGID(url) + "_" + id;
+			
+        } catch (Exception e) {
+            // Fall back to default album naming convention
+        }
+    	
+    	return super.getAlbumTitle(url);
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java
new file mode 100644
index 000000000..df842150f
--- /dev/null
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/UsenethubRipper.java
@@ -0,0 +1,133 @@
+package com.rarchives.ripme.ripper.rippers;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
+
+public class UsenethubRipper extends AbstractHTMLRipper {
+
+    private Document albumDoc = null;
+
+    public UsenethubRipper(URL url) throws IOException {
+        super(url);
+    }
+
+    @Override
+    public String getHost() {
+        return "usenethub";
+    }
+    @Override
+    public String getDomain() {
+        return "adult.usenethub.com";
+    }
+    
+    @Override
+    public Document getFirstPage() throws IOException {
+        if (albumDoc == null) {
+            albumDoc = Http.url(url).get();
+        }
+        return albumDoc;
+    }
+    
+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        String nextURL = null;
+        for (Element a : doc.select("a.paging_next")) {
+            if (a.text().contains("→")) {
+                nextURL = "http://adult.usenethub.com" + a.attr("href");
+                break;
+            }
+        }
+        if (nextURL == null) {
+            throw new IOException("No next page found");
+        }
+        sleep(1000);
+        return Http.url(nextURL).get();
+    }
+    
+    @Override
+    public List<String> getURLsFromPage(Document doc) {
+        List<String> imageURLs = new ArrayList<String>();
+        for (Element thumb : doc.select("#classic img")) {
+
+        	if (!thumb.hasAttr("src") || !thumb.hasAttr("alt")) {
+                continue;
+            }
+
+        	if (thumb.attr("alt").length() == 0 && thumb.hasAttr("width") && thumb.hasAttr("height")) {
+        		continue;
+        	}
+            
+            String image = thumb.attr("src");
+            image = image.replaceAll(
+                    "http://usebin.org/image/",
+                    "http://usebin.org/source/");
+            imageURLs.add(image);
+        }
+        return imageURLs;
+    }
+    
+    @Override
+    public void downloadURL(URL url, int index) {
+        addURLToDownload(url, getPrefix(index));
+    }
+
+    /*
+ 	@Override
+	public String getGID(URL url) throws MalformedURLException {
+		// TODO Auto-generated method stub
+		return null;
+	}
+	*/
+ 	
+    @Override
+    public String getGID(URL url) throws MalformedURLException {
+
+    	String inUrl = url.toExternalForm();
+    	String sUrl;
+    	
+        if (inUrl.endsWith("/"))
+        	sUrl = inUrl.substring(0, inUrl.length()-1);
+        else
+        	sUrl = inUrl;
+
+        String id = sUrl.substring(sUrl.lastIndexOf('/') + 1);
+
+        if (id != null && id.length() > 0)
+        	return id;
+
+        throw new MalformedURLException(
+                "Expected usenethub.com gallery formats: "
+                        + "imagefap.com/gallery.php?gid=####... or "
+                        + "imagefap.com/pictures/####..."
+                        + " Got: " + url);
+    }
+ 	
+
+    @Override
+    public String getAlbumTitle(URL url) throws MalformedURLException {
+        try {
+            // Attempt to use album title as GID
+            String title = getFirstPage().title();
+            Pattern p = Pattern.compile("^(.*) \\(Usenet Download\\)$");
+            Matcher m = p.matcher(title);
+            if (m.matches()) {
+                return getHost() + "_" + m.group(1);
+            }
+        } catch (IOException e) {
+            // Fall back to default album naming convention
+        }
+        return super.getAlbumTitle(url);
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/java/com/rarchives/ripme/utils/Utils.java b/src/main/java/com/rarchives/ripme/utils/Utils.java
index aaf96402d..7f5c2f639 100644
--- a/src/main/java/com/rarchives/ripme/utils/Utils.java
+++ b/src/main/java/com/rarchives/ripme/utils/Utils.java
@@ -100,6 +100,11 @@ public static void setConfigList(String key, List<Object> list) {
         config.clearProperty(key);
         config.addProperty(key, list);
     }
+    
+    public static void clearConfigProperty(String key)
+    {
+    	config.clearProperty(key);
+    }
 
     public static void saveConfig() {
         try {