From edb114355d1c139de0b51580cd929a20f4edd56b Mon Sep 17 00:00:00 2001
From: Vaclav Klecanda <vencax@centrum.cz>
Date: Thu, 17 Dec 2015 16:12:44 +0100
Subject: [PATCH 1/4] setable divider

---
 lib/crawlme.js | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/crawlme.js b/lib/crawlme.js
index 7441bab..5183424 100644
--- a/lib/crawlme.js
+++ b/lib/crawlme.js
@@ -8,9 +8,10 @@ var async = require('async');
 exports = module.exports = function(options) {
   // regex for stripping html of script tags. Borrowed from jQuery
   var stripScriptTags = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi;
-  
+
   // Default options
   options = options || {};
+  options.divider = options.divider || '#!';
   options.waitFor = options.waitFor || 1000;
   options.protocol = options.protocol || 'http';
   options.cacheSize = options.cacheSize || 1*1024*1024;
@@ -42,7 +43,7 @@ exports = module.exports = function(options) {
   // Get the URL to the AJAX version of this page
   function getAjaxUrl(req) {
     var urlParts = req.url.split('?_escaped_fragment_=');
-    
+
     // If no fragment in URL this is not a request for an HTML snapshot
     // of an AJAX page.
     if (urlParts.length !== 2) return undefined;
@@ -58,7 +59,7 @@ exports = module.exports = function(options) {
       // We are dealing with crawlable an ajax page without a hash fragment
       url += path; // No hashbang or fragment
     } else {
-      url += path + '#!' + decodeURIComponent(fragment);
+      url += path + options.divider + decodeURIComponent(fragment);
     }
 
     return url;

From e6dac340958b745536f5333f622292dcbbf99c8c Mon Sep 17 00:00:00 2001
From: Vaclav Klecanda <vencax77@gmail.com>
Date: Wed, 26 Oct 2016 20:55:12 +0200
Subject: [PATCH 2/4] ignore

---
 .gitignore | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..22990c0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,16 @@
+lib-cov
+*.seed
+*.log
+*.csv
+*.dat
+*.out
+*.pid
+*.gz
+.*
+*.sqlite
+pids
+logs
+results
+
+node_modules
+npm-debug.log

From b14b2646d76b4d105e894671f73036d637f0b54c Mon Sep 17 00:00:00 2001
From: Vaclav Klecanda <vencax77@gmail.com>
Date: Wed, 26 Oct 2016 20:55:55 +0200
Subject: [PATCH 3/4] new zombie and adaptation

---
 lib/crawlme.js | 14 +++++++-------
 package.json   | 13 ++++++++++---
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/lib/crawlme.js b/lib/crawlme.js
index 5183424..caf9de4 100644
--- a/lib/crawlme.js
+++ b/lib/crawlme.js
@@ -12,7 +12,7 @@ exports = module.exports = function(options) {
   // Default options
   options = options || {};
   options.divider = options.divider || '#!';
-  options.waitFor = options.waitFor || 1000;
+  options.waitFor = options.waitFor || 2000;
   options.protocol = options.protocol || 'http';
   options.cacheSize = options.cacheSize || 1*1024*1024;
   options.cacheRefresh = options.cacheRefresh || 15*60*1000; //15 minutes
@@ -51,7 +51,7 @@ exports = module.exports = function(options) {
     // Express adds a protocol property to the req object.
     var protocol = req.protocol || options.protocol;
 
-    var url =  protocol + '://' + req.headers.host;
+    var url = process.env.CRAWLME_HOST || protocol + '://' + req.headers.host;
     var path = urlParts[0];
     var fragment = urlParts[1];
 
@@ -76,10 +76,10 @@ exports = module.exports = function(options) {
       if(cached) return cb(null, cached);
     }
 
-    Browser.visit(url, {waitFor: options.waitFor},
-      function(err, browser, status) {
-        if(err) return cb(err);
+    var browser = new Browser();
 
+    browser.visit(url, function() {
+      browser.wait(options.waitFor, function () {
         // links
         var links = browser.queryAll('a');
         links.forEach(function(link) {
@@ -93,8 +93,8 @@ exports = module.exports = function(options) {
         var snapshot = stripScripts(browser.html());
         cache.set(url, snapshot);
         cb(null, snapshot);
-      }
-    );
+      });
+    });
   }
 
   // Start the cache refresh timer
diff --git a/package.json b/package.json
index cf0f37b..67c6cd2 100644
--- a/package.json
+++ b/package.json
@@ -2,16 +2,23 @@
   "author": "Aron Kornhall <aron@optimalbits.com> (http://optimalbits.com)",
   "name": "crawlme",
   "description": "Makes your ajax web application indexable by search engines by generating html snapshots on the fly. Caches results for blazing fast responses and better page ranking.",
-  "keywords": ["ajax", "crawling", "google", "indexing", "SEO", "Search Engine Optimization"],
+  "keywords": [
+    "ajax",
+    "crawling",
+    "google",
+    "indexing",
+    "SEO",
+    "Search Engine Optimization"
+  ],
   "version": "0.0.7",
   "main": "./index.js",
   "engines": {
     "node": ">=0.6.10"
   },
   "dependencies": {
-    "zombie": "2.x.x",
+    "async": "0.2.6",
     "lru-cache": "2.3.x",
-    "async": "0.2.6"
+    "zombie": "^4.3.0"
   },
   "devDependencies": {
     "connect": "2.x.x",

From e80091eedc85d9bebb1f87b3cdc34c22fbc8efb6 Mon Sep 17 00:00:00 2001
From: vencax <vencax@centrum.cz>
Date: Tue, 13 Mar 2018 06:58:24 +0100
Subject: [PATCH 4/4] custom urlgetfunc

---
 lib/crawlme.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/crawlme.js b/lib/crawlme.js
index caf9de4..f7b0ce0 100644
--- a/lib/crawlme.js
+++ b/lib/crawlme.js
@@ -106,12 +106,13 @@ exports = module.exports = function(options) {
     if ('GET' !== req.method) return next();
 
     // Try to extract the ajax URL from the request
-    var url = getAjaxUrl(req);
+    var url = options.getUrl ? options.getUrl(req) : getAjaxUrl(req);
 
     // If we aren't being crawled continue to next middleware
     if (!url) return next();
 
     // Generate the snapshot
+    console.log('Zombie wants to eat: ' + url);
     getHTMLSnapshot(url, function(err, snapshot) {
       if (err) {
         console.log('Zombie reported an error: ' + err);