diff --git a/.gitignore b/.gitignore
index 91dfed8..0aa35d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
.DS_Store
-node_modules
\ No newline at end of file
+node_modules
+.idea
diff --git a/README.md b/README.md
index 3527914..2d9f731 100644
--- a/README.md
+++ b/README.md
@@ -1,28 +1,36 @@
-# node-wget
+# wget-improved
-A download tool, now supporting http/https resource and http/https proxy, written in nodejs.
+wget-improved simplifies retrieving files from any URL
+
+Improvements over [wuchengwei/node-wget](https://github.com/wuchengwei/node-wget)
+- Handles 302 redirects (including infinite redirect loops)
+- Passes URL parameters
+- Better error reporting
+- Does not write using append (uses w+ identical to wget)
+- Handles gzip compression, allow you to automatically gunzip the stream
+
+## Install
-# Installing
```
-npm install wget
+npm install wget-improved --save
```
-# Usage
-
-
## download(src, output, options)
```js
-var wget = require('wget');
-var src = 'https://raw.github.com/Fyrd/caniuse/master/data.json';
-var output = '/tmp/data.json';
+var wget = require('wget-improved');
+var src = 'http://nodejs.org/images/logo.svg';
+var output = '/tmp/logo.svg';
var options = {
- proxy: 'http://host:port'
+ // see options below
};
var download = wget.download(src, output, options);
download.on('error', function(err) {
console.log(err);
});
+download.on('start', function(fileSize) {
+ console.log(fileSize);
+});
download.on('end', function(output) {
console.log(output);
});
@@ -31,7 +39,6 @@ download.on('progress', function(progress) {
});
```
-
## request(options, callback)
```js
@@ -64,4 +71,33 @@ req.end();
req.on('error', function(err) {
console.log(err);
});
-```
\ No newline at end of file
+```
+
+## options
+
+```js
+
+options = {}
+ // Set to true to have any gzip stream automatically decompressed before saving
+ options.gunzip = false;
+ options.proxy = {};
+ options.proxy.protocol = 'http';
+ options.proxy.host = 'someproxy.org';
+ options.proxy.port = 1337;
+ options.proxy.proxyAuth = '{basic auth}';
+ options.proxy.headers = {'User-Agent': 'Node'};
+```
+
+## CLI
+
+```bash
+# If installed globally
+nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json
+
+# If not installed globally
+./node_modules/.bin/nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json
+```
+
+## Todo
+
+- Enable gzip when using request method
diff --git a/bin/nwget b/bin/nwget
new file mode 100755
index 0000000..8881d62
--- /dev/null
+++ b/bin/nwget
@@ -0,0 +1,42 @@
+#!/usr/bin/env node
+
+/**
+
+Usage:
+nwget [URL] [OPTIONS]
+
+Example:
+nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json
+
+**/
+
+var wget = require('../lib/wget');
+var path = require('path');
+
+var args = require('minimist')(process.argv);
+var url = args._[2]
+var output = args.O
+
+if (args.help) return console.log(`Usage: wget [URL] [OPTIONS]
+
+Download:
+ -O, --output-document=FILE write documents to FILE
+`)
+
+if (!url) return console.error('The first argument must be a URL to a downloadable resource (e.g. nwget https://raw.github.com/Fyrd/caniuse/master/data.json)')
+
+if (!output) return console.error('The second argument must be a file path for the downloaded resource (e.g. nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json)')
+
+// console.log(url, output);
+
+var download = wget.download(url, path.resolve(output));
+
+download.on('error', function(err) {
+ console.error(err);
+});
+download.on('end', function(output) {
+ console.log(output);
+});
+download.on('progress', function(progress) {
+ console.log(progress);
+});
diff --git a/bin/nwget.js b/bin/nwget.js
deleted file mode 100644
index e69de29..0000000
diff --git a/lib/wget.js b/lib/wget.js
index 1b55d3e..0934644 100644
--- a/lib/wget.js
+++ b/lib/wget.js
@@ -4,18 +4,34 @@ var http = require('http');
var https = require('https');
var tunnel = require('tunnel');
var url = require('url');
-var util = require('util');
+var zlib = require('zlib');
var fs = require('fs');
var EventEmitter = require('events').EventEmitter;
-function download(src, output, options) {
- var downloader = new EventEmitter(),
+/**
+ * Downloads a file using http get and request
+ * @param {string} src - The http URL to download from
+ * @param {string} output - The filepath to save to
+ * @param {object} options - Options object
+ * @param {object} _parentEvent - Used for when their is a 302 redirect and need to maintain state to a new request
+ * @param {number} redirects - The number of redirects, used to prevent infinite loops
+ * @returns {*|EventEmitter}
+ */
+function download(src, output, options, _parentEvent, redirects) {
+ if(typeof redirects === "undefined") {
+ redirects = 0;
+ }
+ var downloader = _parentEvent || new EventEmitter(),
srcUrl,
tunnelAgent,
req;
- if (options) {
+ if (options) {
options = parseOptions('download', options);
+ } else {
+ options = {
+ gunzip: false
+ };
}
srcUrl = url.parse(src);
srcUrl.protocol = cleanProtocol(srcUrl.protocol);
@@ -24,16 +40,27 @@ function download(src, output, options) {
protocol: srcUrl.protocol,
host: srcUrl.hostname,
port: srcUrl.port,
- path: srcUrl.pathname,
+ path: srcUrl.pathname + (srcUrl.search || ""),
proxy: options?options.proxy:undefined,
method: 'GET'
}, function(res) {
var fileSize, writeStream, downloadedSize;
+ var gunzip = zlib.createGunzip();
+
+ // Handle 302 redirects
+ if(res.statusCode === 301 || res.statusCode === 302 || res.statusCode === 307) {
+ redirects++;
+ if(redirects >= 10) {
+ downloader.emit('error', 'Infinite redirect loop detected');
+ }
+ download(res.headers.location, output, options, downloader, redirects);
+ }
+
if (res.statusCode === 200) {
downloadedSize = 0;
fileSize = res.headers['content-length'];
writeStream = fs.createWriteStream(output, {
- flags: 'a',
+ flags: 'w+',
encoding: 'binary'
});
@@ -41,63 +68,98 @@ function download(src, output, options) {
writeStream.end();
downloader.emit('error', err);
});
+
+ var encoding = "";
+ if(typeof res.headers['content-encoding'] === "string") {
+ encoding = res.headers['content-encoding'];
+ }
+
+ // If the user has specified to unzip, and the file is gzip encoded, pipe to gunzip
+ if(options.gunzip === true && encoding === "gzip") {
+ res.pipe(gunzip);
+ } else {
+ res.pipe(writeStream);
+ }
+
+ //emit a start event so the user knows the file-size he's gonna receive
+ downloader.emit('start', fileSize);
+
+ // Data handlers
res.on('data', function(chunk) {
downloadedSize += chunk.length;
downloader.emit('progress', downloadedSize/fileSize);
+ });
+ gunzip.on('data', function(chunk) {
writeStream.write(chunk);
});
- res.on('end', function() {
+
+ writeStream.on('finish', function() {
writeStream.end();
+ downloader.emit('end', "Finished writing to disk");
+ req.end('finished');
});
- writeStream.on('close', function(){
- downloader.emit('end', output);
- });
- } else {
- downloader.emit('error', 'Server respond ' + res.statusCode);
+ } else if(res.statusCode !== 200 && res.statusCode !== 301 && res.statusCode !== 302) {
+ downloader.emit('error', 'Server responded with unhandled status: ' + res.statusCode);
}
});
- req.end();
+ req.end('done');
req.on('error', function(err) {
downloader.emit('error', err);
});
+ // Attach request to our EventEmitter for backwards compatibility, enables actions such as
+ // req.abort();
+ downloader.req = req;
return downloader;
}
function request(options, callback) {
+ var newOptions = {}, newProxy = {}, key;
options = parseOptions('request', options);
if (options.protocol === 'http') {
if (options.proxy) {
+ for (key in options.proxy) {
+ if (key !== 'protocol') {
+ newProxy[key] = options.proxy[key];
+ }
+ }
if (options.proxy.protocol === 'http') {
- delete options.proxy.protocol; // delete self-defined arg
- options.agent = tunnel.httpOverHttp({proxy: options.proxy});
+ options.agent = tunnel.httpOverHttp({proxy: newProxy});
} else if (options.proxy.protocol === 'https') {
- delete options.proxy.protocol; // delete self-defined arg
- options.agent = tunnel.httpOverHttps({proxy: options.proxy});
+ options.agent = tunnel.httpOverHttps({proxy: newProxy});
} else {
throw options.proxy.protocol + ' proxy is not supported!';
}
}
- delete options.protocol; // delete self-defined arg
- delete options.proxy; // delete self-defined arg
- return http.request(options, callback);
+ for (key in options) {
+ if (key !== 'protocol' && key !== 'proxy') {
+ newOptions[key] = options[key];
+ }
+ }
+ return http.request(newOptions, callback);
}
if (options.protocol === 'https') {
if (options.proxy) {
+ for (key in options.proxy) {
+ if (key !== 'protocol') {
+ newProxy[key] = options.proxy[key];
+ }
+ }
if (options.proxy.protocol === 'http') {
- delete options.proxy.protocol; // delete self-defined arg
- options.agent = tunnel.httpsOverHttp({proxy: options.proxy});
+ options.agent = tunnel.httpsOverHttp({proxy: newProxy});
} else if (options.proxy.protocol === 'https') {
- delete options.proxy.protocol; // delete self-defined arg
- options.agent = tunnel.httpsOverHttps({proxy: options.proxy});
+ options.agent = tunnel.httpsOverHttps({proxy: newProxy});
} else {
throw options.proxy.protocol + ' proxy is not supported!';
}
}
- delete options.protocol; // delete self-defined arg
- delete options.proxy; // delete self-defined arg
- return https.request(options, callback);
+ for (key in options) {
+ if (key !== 'protocol' && key !== 'proxy') {
+ newOptions[key] = options[key];
+ }
+ }
+ return https.request(newOptions, callback);
}
throw 'only allow http or https request!';
}
@@ -135,6 +197,8 @@ function parseOptions(type, options) {
options.proxy.headers = {'User-Agent': 'Node'};
}
}
+
+ options.gunzip = options.gunzip || false;
return options;
}
}
@@ -144,4 +208,4 @@ function cleanProtocol(str) {
}
exports.download = download;
-exports.request = request;
\ No newline at end of file
+exports.request = request;
diff --git a/package.json b/package.json
index 934883b..61754cc 100644
--- a/package.json
+++ b/package.json
@@ -1,18 +1,24 @@
{
- "name": "wget",
- "version": "0.0.1",
- "description": "wget in nodejs.",
- "keywords": ["download", "http", "https", "ftp", "proxy"],
- "author": "Chengwei Wu ",
+ "name": "wget-improved",
+ "version": "1.3.0",
+ "description": "wget in nodejs, forked from wuchengwei/node-wget to add improvements and help maintain the project",
+ "keywords": ["download", "http", "https", "ftp", "proxy", "wget"],
+ "author": "Michael Barajas ",
"repository":{
"type": "git",
- "url": "git://github.com/wuchengwei/node-wget.git"
+ "url": "git://github.com/bearjaws/node-wget.git"
},
- "main": "./index.js",
+ "contributors": [
+ {
+ "name": "Michael Barajas"
+ }
+ ],
+ "homepage": "https://github.com/bearjaws/node-wget",
"bin": {
- "nwget": "./bin/nwget.js"
+ "nwget": "./bin/nwget"
},
"dependencies": {
+ "minimist": "^1.2.0",
"tunnel": "0.0.2"
},
"engines": { "node": ">= 0.6.18" }
diff --git a/test/test.js b/test/test.js
index 9e5dbae..409d80d 100644
--- a/test/test.js
+++ b/test/test.js
@@ -1,13 +1,17 @@
var wget = require('../lib/wget');
-var download = wget.download('https://raw.github.com/Fyrd/caniuse/master/data.json', '/tmp/README.md');
+var download = wget.download('https://www.npmjs.com/static/images/npm-logo.svg', '/tmp/README.md');
// with a proxy:
// var download = wget.download('https://raw.github.com/Fyrd/caniuse/master/data.json', '/tmp/README.md', {proxy: 'http://proxyhost:port'});
download.on('error', function(err) {
console.log(err);
});
+download.on('start', function(fileSize) {
+ console.log(fileSize);
+});
download.on('end', function(output) {
console.log(output);
+ process.exit();
});
download.on('progress', function(progress) {
console.log(progress);