Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,27 @@ public void rip() throws IOException {
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
Document doc = getFirstPage();

boolean first = true;

while (doc != null) {
List<String> imageURLs = getURLsFromPage(doc);

// Remove all but 1 image
if (isThisATest()) {
while (imageURLs.size() > 1) {
imageURLs.remove(1);
}
}

//if (imageURLs.size() == 0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove redundant code comment.

if (imageURLs.size() == 0) {
throw new IOException("No images found at " + doc.location());
if (first) {
throw new IOException("No images found at " + doc.location());
}
else {
logger.info("No images in page...");
break;
}
}

for (String imageURL : imageURLs) {
Expand Down Expand Up @@ -115,6 +125,8 @@ public void rip() throws IOException {
logger.info("Can't get next page: " + e.getMessage());
break;
}

first = false;
}

// If they're using a thread pool, wait for it.
Expand Down
68 changes: 61 additions & 7 deletions src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public class FlickrRipper extends AbstractHTMLRipper {
private Set<String> attempted = new HashSet<String>();
private Document albumDoc = null;
private DownloadThreadPool flickrThreadPool;
private Map<String, String> _cookies;
private String _cookieString;

@Override
public DownloadThreadPool getThreadPool() {
return flickrThreadPool;
Expand All @@ -39,6 +42,25 @@ public DownloadThreadPool getThreadPool() {
public FlickrRipper(URL url) throws IOException {
super(url);
flickrThreadPool = new DownloadThreadPool();
_cookies = new HashMap<String, String>();

// check for "flickr.cookies2encode" string and encode into "flickr.cookies"
String flickrEncode = Utils.getConfigString("flickr.cookies2encode", null);
if (flickrEncode != null && flickrEncode.length() > 0)
{
_cookieString = Base64.encode(flickrEncode.getBytes());
Utils.setConfigString("flickr.cookies", _cookieString);
Utils.clearConfigProperty("flickr.cookies2encode");
Utils.saveConfig();
}
else
{
// get encoded "flickr.cookies"
_cookieString = Utils.getConfigString("flickr.cookies", null);
if (_cookieString == null) {
System.err.println("Could not find flickr cookies in configuration. Won't be able to rip pages requiring a login!");
}
}
}

@Override
Expand Down Expand Up @@ -122,9 +144,29 @@ public String getGID(URL url) throws MalformedURLException {

@Override
public Document getFirstPage() throws IOException {
if (albumDoc == null) {
albumDoc = Http.url(url).get();
if (albumDoc == null)
{
try {
String decodedCookieString = new String(Base64.decode(_cookieString));

String[] cks = decodedCookieString.split(";");
for (String s : cks)
{
int idx = s.indexOf("=");
if (idx == -1)
continue;

String key = s.substring(0, idx);
String value = s.substring(idx+1);

_cookies.put(key, value);
}
} catch (Exception e) {
}

albumDoc = Http.url(url).cookies(_cookies).get();
}

return albumDoc;
}

Expand All @@ -134,13 +176,23 @@ public Document getNextPage(Document doc) throws IOException {
return null;
}
// Find how many pages there are
int lastPage = 0;
/*
for (Element apage : doc.select("a[data-track^=page-]")) {
String lastPageStr = apage.attr("data-track").replace("page-", "");
lastPage = Integer.parseInt(lastPageStr);
}
*/

int nextPage = 0;

try {
String nextPageStr = doc.select("span.this-page").first().html();
nextPage = Integer.parseInt(nextPageStr);
} catch (Exception e) {
}

// If we're at the last page, stop.
if (page >= lastPage) {
if (page >= nextPage) {
throw new IOException("No more pages");
}
// Load the next page
Expand All @@ -157,7 +209,7 @@ public Document getNextPage(Document doc) throws IOException {
} catch (InterruptedException e) {
throw new IOException("Interrupted while waiting to load next page " + nextURL);
}
return Http.url(nextURL).get();
return Http.url(nextURL).cookies(_cookies).get();
}

@Override
Expand Down Expand Up @@ -192,6 +244,7 @@ public List<String> getURLsFromPage(Document page) {
break;
}
}

return imageURLs;
}

Expand Down Expand Up @@ -270,7 +323,8 @@ public void run() {

private Document getLargestImagePageDocument(URL url) throws IOException {
// Get current page
Document doc = Http.url(url).get();
Document doc = Http.url(url).cookies(_cookies).get();

// Look for larger image page
String largestImagePage = this.url.toExternalForm();
for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) {
Expand All @@ -288,7 +342,7 @@ private Document getLargestImagePageDocument(URL url) throws IOException {
}
if (!largestImagePage.equals(this.url.toExternalForm())) {
// Found larger image page, get it.
doc = Http.url(largestImagePage).get();
doc = Http.url(largestImagePage).cookies(_cookies).get();
}
return doc;
}
Expand Down
124 changes: 124 additions & 0 deletions src/main/java/com/rarchives/ripme/ripper/rippers/PicasaRipper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
package com.rarchives.ripme.ripper.rippers;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;

public class PicasaRipper extends AbstractHTMLRipper {

private Document albumDoc = null;

public PicasaRipper(URL url) throws IOException {
super(url);
}

@Override
public String getHost() {
return "picasa";
}
@Override
public String getDomain() {
return "picasaweb.google.com";
}

@Override
public Document getFirstPage() throws IOException {
if (albumDoc == null) {
albumDoc = Http.url(url).get();
}
return albumDoc;
}

@Override
public Document getNextPage(Document doc) throws IOException {
return null;
}

@Override
public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<String>();
//for (Element thumb : doc.select("#lhid_content img"))
for (Element thumb : doc.select("img"))
{
if (!thumb.hasAttr("src")) {
continue;
}

if (thumb.hasAttr("id") || thumb.hasAttr("width") || thumb.hasAttr("height"))
continue;

/*
String cls = thumb.attr("class");
if (cls == null || !cls.equals("goog-icon-list-icon-img"))
continue;
*/

String image = thumb.attr("src");
image = image.replaceAll(
"/s128/",
"/d/");
imageURLs.add(image);
}
return imageURLs;
}

@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
}

@Override
public String getGID(URL url) throws MalformedURLException {

Pattern p; Matcher m;

p = Pattern.compile("^.*picasaweb.google.com/([0-9]+).*$");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}

throw new MalformedURLException(
"Expected picasaweb.google.com gallery formats: "
+ "picasaweb.google.com/<ID>/... "
+ " Got: " + url);
}

@Override
public String getAlbumTitle(URL url) throws MalformedURLException {

try {
String inUrl = url.toExternalForm();
String sUrl;

if (inUrl.endsWith("/"))
sUrl = inUrl.substring(0, inUrl.length()-1);
else
sUrl = inUrl;

String id = sUrl.substring(sUrl.lastIndexOf('/') + 1);
id = id.replaceAll("noredirect=1", "");

if (id.endsWith("?"))
id = id.substring(0, id.length()-1);

return getHost() + "_" + getGID(url) + "_" + id;

} catch (Exception e) {
// Fall back to default album naming convention
}

return super.getAlbumTitle(url);
}

}
Loading