diff --git a/.gitignore b/.gitignore index 91dfed8..0aa35d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .DS_Store -node_modules \ No newline at end of file +node_modules +.idea diff --git a/README.md b/README.md index 3527914..2d9f731 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,36 @@ -# node-wget +# wget-improved -A download tool, now supporting http/https resource and http/https proxy, written in nodejs. +wget-improved simplifies retrieving files from any URL + +Improvements over [wuchengwei/node-wget](https://github.com/wuchengwei/node-wget) +- Handles 302 redirects (including infinite redirect loops) +- Passes URL parameters +- Better error reporting +- Does not write using append (uses w+ identical to wget) +- Handles gzip compression, allow you to automatically gunzip the stream + +## Install -# Installing ``` -npm install wget +npm install wget-improved --save ``` -# Usage - - ## download(src, output, options) ```js -var wget = require('wget'); -var src = 'https://raw.github.com/Fyrd/caniuse/master/data.json'; -var output = '/tmp/data.json'; +var wget = require('wget-improved'); +var src = 'http://nodejs.org/images/logo.svg'; +var output = '/tmp/logo.svg'; var options = { - proxy: 'http://host:port' + // see options below }; var download = wget.download(src, output, options); download.on('error', function(err) { console.log(err); }); +download.on('start', function(fileSize) { + console.log(fileSize); +}); download.on('end', function(output) { console.log(output); }); @@ -31,7 +39,6 @@ download.on('progress', function(progress) { }); ``` - ## request(options, callback) ```js @@ -64,4 +71,33 @@ req.end(); req.on('error', function(err) { console.log(err); }); -``` \ No newline at end of file +``` + +## options + +```js + +options = {} + // Set to true to have any gzip stream automatically decompressed before saving + options.gunzip = false; + options.proxy = {}; + options.proxy.protocol = 'http'; + options.proxy.host = 'someproxy.org'; + options.proxy.port = 1337; + options.proxy.proxyAuth = '{basic auth}'; + options.proxy.headers = {'User-Agent': 'Node'}; +``` + +## CLI + +```bash +# If installed globally +nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json + +# If not installed globally +./node_modules/.bin/nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json +``` + +## Todo + +- Enable gzip when using request method diff --git a/bin/nwget b/bin/nwget new file mode 100755 index 0000000..8881d62 --- /dev/null +++ b/bin/nwget @@ -0,0 +1,42 @@ +#!/usr/bin/env node + +/** + +Usage: +nwget [URL] [OPTIONS] + +Example: +nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json + +**/ + +var wget = require('../lib/wget'); +var path = require('path'); + +var args = require('minimist')(process.argv); +var url = args._[2] +var output = args.O + +if (args.help) return console.log(`Usage: wget [URL] [OPTIONS] + +Download: + -O, --output-document=FILE write documents to FILE +`) + +if (!url) return console.error('The first argument must be a URL to a downloadable resource (e.g. nwget https://raw.github.com/Fyrd/caniuse/master/data.json)') + +if (!output) return console.error('The second argument must be a file path for the downloaded resource (e.g. nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json)') + +// console.log(url, output); + +var download = wget.download(url, path.resolve(output)); + +download.on('error', function(err) { + console.error(err); +}); +download.on('end', function(output) { + console.log(output); +}); +download.on('progress', function(progress) { + console.log(progress); +}); diff --git a/bin/nwget.js b/bin/nwget.js deleted file mode 100644 index e69de29..0000000 diff --git a/lib/wget.js b/lib/wget.js index 1b55d3e..0934644 100644 --- a/lib/wget.js +++ b/lib/wget.js @@ -4,18 +4,34 @@ var http = require('http'); var https = require('https'); var tunnel = require('tunnel'); var url = require('url'); -var util = require('util'); +var zlib = require('zlib'); var fs = require('fs'); var EventEmitter = require('events').EventEmitter; -function download(src, output, options) { - var downloader = new EventEmitter(), +/** + * Downloads a file using http get and request + * @param {string} src - The http URL to download from + * @param {string} output - The filepath to save to + * @param {object} options - Options object + * @param {object} _parentEvent - Used for when their is a 302 redirect and need to maintain state to a new request + * @param {number} redirects - The number of redirects, used to prevent infinite loops + * @returns {*|EventEmitter} + */ +function download(src, output, options, _parentEvent, redirects) { + if(typeof redirects === "undefined") { + redirects = 0; + } + var downloader = _parentEvent || new EventEmitter(), srcUrl, tunnelAgent, req; - if (options) { + if (options) { options = parseOptions('download', options); + } else { + options = { + gunzip: false + }; } srcUrl = url.parse(src); srcUrl.protocol = cleanProtocol(srcUrl.protocol); @@ -24,16 +40,27 @@ function download(src, output, options) { protocol: srcUrl.protocol, host: srcUrl.hostname, port: srcUrl.port, - path: srcUrl.pathname, + path: srcUrl.pathname + (srcUrl.search || ""), proxy: options?options.proxy:undefined, method: 'GET' }, function(res) { var fileSize, writeStream, downloadedSize; + var gunzip = zlib.createGunzip(); + + // Handle 302 redirects + if(res.statusCode === 301 || res.statusCode === 302 || res.statusCode === 307) { + redirects++; + if(redirects >= 10) { + downloader.emit('error', 'Infinite redirect loop detected'); + } + download(res.headers.location, output, options, downloader, redirects); + } + if (res.statusCode === 200) { downloadedSize = 0; fileSize = res.headers['content-length']; writeStream = fs.createWriteStream(output, { - flags: 'a', + flags: 'w+', encoding: 'binary' }); @@ -41,63 +68,98 @@ function download(src, output, options) { writeStream.end(); downloader.emit('error', err); }); + + var encoding = ""; + if(typeof res.headers['content-encoding'] === "string") { + encoding = res.headers['content-encoding']; + } + + // If the user has specified to unzip, and the file is gzip encoded, pipe to gunzip + if(options.gunzip === true && encoding === "gzip") { + res.pipe(gunzip); + } else { + res.pipe(writeStream); + } + + //emit a start event so the user knows the file-size he's gonna receive + downloader.emit('start', fileSize); + + // Data handlers res.on('data', function(chunk) { downloadedSize += chunk.length; downloader.emit('progress', downloadedSize/fileSize); + }); + gunzip.on('data', function(chunk) { writeStream.write(chunk); }); - res.on('end', function() { + + writeStream.on('finish', function() { writeStream.end(); + downloader.emit('end', "Finished writing to disk"); + req.end('finished'); }); - writeStream.on('close', function(){ - downloader.emit('end', output); - }); - } else { - downloader.emit('error', 'Server respond ' + res.statusCode); + } else if(res.statusCode !== 200 && res.statusCode !== 301 && res.statusCode !== 302) { + downloader.emit('error', 'Server responded with unhandled status: ' + res.statusCode); } }); - req.end(); + req.end('done'); req.on('error', function(err) { downloader.emit('error', err); }); + // Attach request to our EventEmitter for backwards compatibility, enables actions such as + // req.abort(); + downloader.req = req; return downloader; } function request(options, callback) { + var newOptions = {}, newProxy = {}, key; options = parseOptions('request', options); if (options.protocol === 'http') { if (options.proxy) { + for (key in options.proxy) { + if (key !== 'protocol') { + newProxy[key] = options.proxy[key]; + } + } if (options.proxy.protocol === 'http') { - delete options.proxy.protocol; // delete self-defined arg - options.agent = tunnel.httpOverHttp({proxy: options.proxy}); + options.agent = tunnel.httpOverHttp({proxy: newProxy}); } else if (options.proxy.protocol === 'https') { - delete options.proxy.protocol; // delete self-defined arg - options.agent = tunnel.httpOverHttps({proxy: options.proxy}); + options.agent = tunnel.httpOverHttps({proxy: newProxy}); } else { throw options.proxy.protocol + ' proxy is not supported!'; } } - delete options.protocol; // delete self-defined arg - delete options.proxy; // delete self-defined arg - return http.request(options, callback); + for (key in options) { + if (key !== 'protocol' && key !== 'proxy') { + newOptions[key] = options[key]; + } + } + return http.request(newOptions, callback); } if (options.protocol === 'https') { if (options.proxy) { + for (key in options.proxy) { + if (key !== 'protocol') { + newProxy[key] = options.proxy[key]; + } + } if (options.proxy.protocol === 'http') { - delete options.proxy.protocol; // delete self-defined arg - options.agent = tunnel.httpsOverHttp({proxy: options.proxy}); + options.agent = tunnel.httpsOverHttp({proxy: newProxy}); } else if (options.proxy.protocol === 'https') { - delete options.proxy.protocol; // delete self-defined arg - options.agent = tunnel.httpsOverHttps({proxy: options.proxy}); + options.agent = tunnel.httpsOverHttps({proxy: newProxy}); } else { throw options.proxy.protocol + ' proxy is not supported!'; } } - delete options.protocol; // delete self-defined arg - delete options.proxy; // delete self-defined arg - return https.request(options, callback); + for (key in options) { + if (key !== 'protocol' && key !== 'proxy') { + newOptions[key] = options[key]; + } + } + return https.request(newOptions, callback); } throw 'only allow http or https request!'; } @@ -135,6 +197,8 @@ function parseOptions(type, options) { options.proxy.headers = {'User-Agent': 'Node'}; } } + + options.gunzip = options.gunzip || false; return options; } } @@ -144,4 +208,4 @@ function cleanProtocol(str) { } exports.download = download; -exports.request = request; \ No newline at end of file +exports.request = request; diff --git a/package.json b/package.json index 934883b..61754cc 100644 --- a/package.json +++ b/package.json @@ -1,18 +1,24 @@ { - "name": "wget", - "version": "0.0.1", - "description": "wget in nodejs.", - "keywords": ["download", "http", "https", "ftp", "proxy"], - "author": "Chengwei Wu ", + "name": "wget-improved", + "version": "1.3.0", + "description": "wget in nodejs, forked from wuchengwei/node-wget to add improvements and help maintain the project", + "keywords": ["download", "http", "https", "ftp", "proxy", "wget"], + "author": "Michael Barajas ", "repository":{ "type": "git", - "url": "git://github.com/wuchengwei/node-wget.git" + "url": "git://github.com/bearjaws/node-wget.git" }, - "main": "./index.js", + "contributors": [ + { + "name": "Michael Barajas" + } + ], + "homepage": "https://github.com/bearjaws/node-wget", "bin": { - "nwget": "./bin/nwget.js" + "nwget": "./bin/nwget" }, "dependencies": { + "minimist": "^1.2.0", "tunnel": "0.0.2" }, "engines": { "node": ">= 0.6.18" } diff --git a/test/test.js b/test/test.js index 9e5dbae..409d80d 100644 --- a/test/test.js +++ b/test/test.js @@ -1,13 +1,17 @@ var wget = require('../lib/wget'); -var download = wget.download('https://raw.github.com/Fyrd/caniuse/master/data.json', '/tmp/README.md'); +var download = wget.download('https://www.npmjs.com/static/images/npm-logo.svg', '/tmp/README.md'); // with a proxy: // var download = wget.download('https://raw.github.com/Fyrd/caniuse/master/data.json', '/tmp/README.md', {proxy: 'http://proxyhost:port'}); download.on('error', function(err) { console.log(err); }); +download.on('start', function(fileSize) { + console.log(fileSize); +}); download.on('end', function(output) { console.log(output); + process.exit(); }); download.on('progress', function(progress) { console.log(progress);