From 8e9745fc7dde562a21d93d340e971206a443199f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 27 Aug 2011 15:01:27 +0200
Subject: [PATCH 001/450] Restructured code, improved readability, deleted junk

---
 CHANGELOG                  |  38 --
 json2.js                   | 482 ---------------------
 lib/DefaultHandler.js      | 135 ++++++
 lib/DomUtils.js            |  94 +++++
 lib/ElementType.js         |   9 +
 lib/Parser.js              | 397 ++++++++++++++++++
 lib/RssHandler.js          | 112 +++++
 lib/htmlparser.js          | 827 +------------------------------------
 lib/htmlparser.min.js      |  22 -
 lib/node-htmlparser.js     |   6 -
 lib/node-htmlparser.min.js |   6 -
 profile.js                 |  63 ---
 runtests.min.html          | 108 -----
 runtests.min.js            |  75 ----
 snippet.js                 |  15 -
 utils_example.js           |  35 --
 16 files changed, 752 insertions(+), 1672 deletions(-)
 delete mode 100644 CHANGELOG
 delete mode 100644 json2.js
 create mode 100644 lib/DefaultHandler.js
 create mode 100644 lib/DomUtils.js
 create mode 100644 lib/ElementType.js
 create mode 100644 lib/Parser.js
 create mode 100644 lib/RssHandler.js
 delete mode 100644 lib/htmlparser.min.js
 delete mode 100644 lib/node-htmlparser.js
 delete mode 100644 lib/node-htmlparser.min.js
 delete mode 100644 profile.js
 delete mode 100644 runtests.min.html
 delete mode 100644 runtests.min.js
 delete mode 100644 snippet.js
 delete mode 100644 utils_example.js

diff --git a/CHANGELOG b/CHANGELOG
deleted file mode 100644
index c262712..0000000
--- a/CHANGELOG
+++ /dev/null
@@ -1,38 +0,0 @@
-v1.8.0
-	*
-
-v1.7.3
-	* Renamed node-htmlparser.* to htmlparser.* and created shims for people still expecting node-htmlparser.*
-
-v1.7.2
-	* Document position feature fixed to work correctly with chunked parsing
-
-v1.7.1
-	* Document position feature disabled until it works correctly with chunked parsing
-
-v1.7.0
-	* Empty tag checking switch to being case insensitive [fgnass]
-	* Added feature to include document position (row, col) in element data [fgnass]
-	* Added parser option "includeLocation" to enable document position data
-
-v1.6.4
-	* Fixed 'prevElement' error [Swizec]
-
-v1.6.3
-	* Updated to support being an npm package
-	* Fixed DomUtils.testElement()
-
-v1.6.1
-	* Optimized DomUtils by up to 2-3x 
-
-v1.6.0
-	* Added support for RSS/Atom feeds
-
-v1.5.0
-	* Added DefaultHandler option "enforceEmptyTags" so that XML can be parsed correctly
-
-v1.4.2
-	* Added tests for parsing XML with namespaces
-
-v1.4.1
-	* Added minified version
diff --git a/json2.js b/json2.js
deleted file mode 100644
index a1a3b17..0000000
--- a/json2.js
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
-    http://www.JSON.org/json2.js
-    2010-03-20
-
-    Public Domain.
-
-    NO WARRANTY EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK.
-
-    See http://www.JSON.org/js.html
-
-
-    This code should be minified before deployment.
-    See http://javascript.crockford.com/jsmin.html
-
-    USE YOUR OWN COPY. IT IS EXTREMELY UNWISE TO LOAD CODE FROM SERVERS YOU DO
-    NOT CONTROL.
-
-
-    This file creates a global JSON object containing two methods: stringify
-    and parse.
-
-        JSON.stringify(value, replacer, space)
-            value       any JavaScript value, usually an object or array.
-
-            replacer    an optional parameter that determines how object
-                        values are stringified for objects. It can be a
-                        function or an array of strings.
-
-            space       an optional parameter that specifies the indentation
-                        of nested structures. If it is omitted, the text will
-                        be packed without extra whitespace. If it is a number,
-                        it will specify the number of spaces to indent at each
-                        level. If it is a string (such as '\t' or '&nbsp;'),
-                        it contains the characters used to indent at each level.
-
-            This method produces a JSON text from a JavaScript value.
-
-            When an object value is found, if the object contains a toJSON
-            method, its toJSON method will be called and the result will be
-            stringified. A toJSON method does not serialize: it returns the
-            value represented by the name/value pair that should be serialized,
-            or undefined if nothing should be serialized. The toJSON method
-            will be passed the key associated with the value, and this will be
-            bound to the value
-
-            For example, this would serialize Dates as ISO strings.
-
-                Date.prototype.toJSON = function (key) {
-                    function f(n) {
-                        // Format integers to have at least two digits.
-                        return n < 10 ? '0' + n : n;
-                    }
-
-                    return this.getUTCFullYear()   + '-' +
-                         f(this.getUTCMonth() + 1) + '-' +
-                         f(this.getUTCDate())      + 'T' +
-                         f(this.getUTCHours())     + ':' +
-                         f(this.getUTCMinutes())   + ':' +
-                         f(this.getUTCSeconds())   + 'Z';
-                };
-
-            You can provide an optional replacer method. It will be passed the
-            key and value of each member, with this bound to the containing
-            object. The value that is returned from your method will be
-            serialized. If your method returns undefined, then the member will
-            be excluded from the serialization.
-
-            If the replacer parameter is an array of strings, then it will be
-            used to select the members to be serialized. It filters the results
-            such that only members with keys listed in the replacer array are
-            stringified.
-
-            Values that do not have JSON representations, such as undefined or
-            functions, will not be serialized. Such values in objects will be
-            dropped; in arrays they will be replaced with null. You can use
-            a replacer function to replace those with JSON values.
-            JSON.stringify(undefined) returns undefined.
-
-            The optional space parameter produces a stringification of the
-            value that is filled with line breaks and indentation to make it
-            easier to read.
-
-            If the space parameter is a non-empty string, then that string will
-            be used for indentation. If the space parameter is a number, then
-            the indentation will be that many spaces.
-
-            Example:
-
-            text = JSON.stringify(['e', {pluribus: 'unum'}]);
-            // text is '["e",{"pluribus":"unum"}]'
-
-
-            text = JSON.stringify(['e', {pluribus: 'unum'}], null, '\t');
-            // text is '[\n\t"e",\n\t{\n\t\t"pluribus": "unum"\n\t}\n]'
-
-            text = JSON.stringify([new Date()], function (key, value) {
-                return this[key] instanceof Date ?
-                    'Date(' + this[key] + ')' : value;
-            });
-            // text is '["Date(---current time---)"]'
-
-
-        JSON.parse(text, reviver)
-            This method parses a JSON text to produce an object or array.
-            It can throw a SyntaxError exception.
-
-            The optional reviver parameter is a function that can filter and
-            transform the results. It receives each of the keys and values,
-            and its return value is used instead of the original value.
-            If it returns what it received, then the structure is not modified.
-            If it returns undefined then the member is deleted.
-
-            Example:
-
-            // Parse the text. Values that look like ISO date strings will
-            // be converted to Date objects.
-
-            myData = JSON.parse(text, function (key, value) {
-                var a;
-                if (typeof value === 'string') {
-                    a =
-/^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}(?:\.\d*)?)Z$/.exec(value);
-                    if (a) {
-                        return new Date(Date.UTC(+a[1], +a[2] - 1, +a[3], +a[4],
-                            +a[5], +a[6]));
-                    }
-                }
-                return value;
-            });
-
-            myData = JSON.parse('["Date(09/09/2001)"]', function (key, value) {
-                var d;
-                if (typeof value === 'string' &&
-                        value.slice(0, 5) === 'Date(' &&
-                        value.slice(-1) === ')') {
-                    d = new Date(value.slice(5, -1));
-                    if (d) {
-                        return d;
-                    }
-                }
-                return value;
-            });
-
-
-    This is a reference implementation. You are free to copy, modify, or
-    redistribute.
-*/
-
-/*jslint evil: true, strict: false */
-
-/*members "", "\b", "\t", "\n", "\f", "\r", "\"", JSON, "\\", apply,
-    call, charCodeAt, getUTCDate, getUTCFullYear, getUTCHours,
-    getUTCMinutes, getUTCMonth, getUTCSeconds, hasOwnProperty, join,
-    lastIndex, length, parse, prototype, push, replace, slice, stringify,
-    test, toJSON, toString, valueOf
-*/
-
-
-// Create a JSON object only if one does not already exist. We create the
-// methods in a closure to avoid creating global variables.
-
-if (!this.JSON) {
-    this.JSON = {};
-}
-
-(function () {
-
-    function f(n) {
-        // Format integers to have at least two digits.
-        return n < 10 ? '0' + n : n;
-    }
-
-    if (typeof Date.prototype.toJSON !== 'function') {
-
-        Date.prototype.toJSON = function (key) {
-
-            return isFinite(this.valueOf()) ?
-                   this.getUTCFullYear()   + '-' +
-                 f(this.getUTCMonth() + 1) + '-' +
-                 f(this.getUTCDate())      + 'T' +
-                 f(this.getUTCHours())     + ':' +
-                 f(this.getUTCMinutes())   + ':' +
-                 f(this.getUTCSeconds())   + 'Z' : null;
-        };
-
-        String.prototype.toJSON =
-        Number.prototype.toJSON =
-        Boolean.prototype.toJSON = function (key) {
-            return this.valueOf();
-        };
-    }
-
-    var cx = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g,
-        escapable = /[\\\"\x00-\x1f\x7f-\x9f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g,
-        gap,
-        indent,
-        meta = {    // table of character substitutions
-            '\b': '\\b',
-            '\t': '\\t',
-            '\n': '\\n',
-            '\f': '\\f',
-            '\r': '\\r',
-            '"' : '\\"',
-            '\\': '\\\\'
-        },
-        rep;
-
-
-    function quote(string) {
-
-// If the string contains no control characters, no quote characters, and no
-// backslash characters, then we can safely slap some quotes around it.
-// Otherwise we must also replace the offending characters with safe escape
-// sequences.
-
-        escapable.lastIndex = 0;
-        return escapable.test(string) ?
-            '"' + string.replace(escapable, function (a) {
-                var c = meta[a];
-                return typeof c === 'string' ? c :
-                    '\\u' + ('0000' + a.charCodeAt(0).toString(16)).slice(-4);
-            }) + '"' :
-            '"' + string + '"';
-    }
-
-
-    function str(key, holder) {
-
-// Produce a string from holder[key].
-
-        var i,          // The loop counter.
-            k,          // The member key.
-            v,          // The member value.
-            length,
-            mind = gap,
-            partial,
-            value = holder[key];
-
-// If the value has a toJSON method, call it to obtain a replacement value.
-
-        if (value && typeof value === 'object' &&
-                typeof value.toJSON === 'function') {
-            value = value.toJSON(key);
-        }
-
-// If we were called with a replacer function, then call the replacer to
-// obtain a replacement value.
-
-        if (typeof rep === 'function') {
-            value = rep.call(holder, key, value);
-        }
-
-// What happens next depends on the value's type.
-
-        switch (typeof value) {
-        case 'string':
-            return quote(value);
-
-        case 'number':
-
-// JSON numbers must be finite. Encode non-finite numbers as null.
-
-            return isFinite(value) ? String(value) : 'null';
-
-        case 'boolean':
-        case 'null':
-
-// If the value is a boolean or null, convert it to a string. Note:
-// typeof null does not produce 'null'. The case is included here in
-// the remote chance that this gets fixed someday.
-
-            return String(value);
-
-// If the type is 'object', we might be dealing with an object or an array or
-// null.
-
-        case 'object':
-
-// Due to a specification blunder in ECMAScript, typeof null is 'object',
-// so watch out for that case.
-
-            if (!value) {
-                return 'null';
-            }
-
-// Make an array to hold the partial results of stringifying this object value.
-
-            gap += indent;
-            partial = [];
-
-// Is the value an array?
-
-            if (Object.prototype.toString.apply(value) === '[object Array]') {
-
-// The value is an array. Stringify every element. Use null as a placeholder
-// for non-JSON values.
-
-                length = value.length;
-                for (i = 0; i < length; i += 1) {
-                    partial[i] = str(i, value) || 'null';
-                }
-
-// Join all of the elements together, separated with commas, and wrap them in
-// brackets.
-
-                v = partial.length === 0 ? '[]' :
-                    gap ? '[\n' + gap +
-                            partial.join(',\n' + gap) + '\n' +
-                                mind + ']' :
-                          '[' + partial.join(',') + ']';
-                gap = mind;
-                return v;
-            }
-
-// If the replacer is an array, use it to select the members to be stringified.
-
-            if (rep && typeof rep === 'object') {
-                length = rep.length;
-                for (i = 0; i < length; i += 1) {
-                    k = rep[i];
-                    if (typeof k === 'string') {
-                        v = str(k, value);
-                        if (v) {
-                            partial.push(quote(k) + (gap ? ': ' : ':') + v);
-                        }
-                    }
-                }
-            } else {
-
-// Otherwise, iterate through all of the keys in the object.
-
-                for (k in value) {
-                    if (Object.hasOwnProperty.call(value, k)) {
-                        v = str(k, value);
-                        if (v) {
-                            partial.push(quote(k) + (gap ? ': ' : ':') + v);
-                        }
-                    }
-                }
-            }
-
-// Join all of the member texts together, separated with commas,
-// and wrap them in braces.
-
-            v = partial.length === 0 ? '{}' :
-                gap ? '{\n' + gap + partial.join(',\n' + gap) + '\n' +
-                        mind + '}' : '{' + partial.join(',') + '}';
-            gap = mind;
-            return v;
-        }
-    }
-
-// If the JSON object does not yet have a stringify method, give it one.
-
-    if (typeof JSON.stringify !== 'function') {
-        JSON.stringify = function (value, replacer, space) {
-
-// The stringify method takes a value and an optional replacer, and an optional
-// space parameter, and returns a JSON text. The replacer can be a function
-// that can replace values, or an array of strings that will select the keys.
-// A default replacer method can be provided. Use of the space parameter can
-// produce text that is more easily readable.
-
-            var i;
-            gap = '';
-            indent = '';
-
-// If the space parameter is a number, make an indent string containing that
-// many spaces.
-
-            if (typeof space === 'number') {
-                for (i = 0; i < space; i += 1) {
-                    indent += ' ';
-                }
-
-// If the space parameter is a string, it will be used as the indent string.
-
-            } else if (typeof space === 'string') {
-                indent = space;
-            }
-
-// If there is a replacer, it must be a function or an array.
-// Otherwise, throw an error.
-
-            rep = replacer;
-            if (replacer && typeof replacer !== 'function' &&
-                    (typeof replacer !== 'object' ||
-                     typeof replacer.length !== 'number')) {
-                throw new Error('JSON.stringify');
-            }
-
-// Make a fake root object containing our value under the key of ''.
-// Return the result of stringifying the value.
-
-            return str('', {'': value});
-        };
-    }
-
-
-// If the JSON object does not yet have a parse method, give it one.
-
-    if (typeof JSON.parse !== 'function') {
-        JSON.parse = function (text, reviver) {
-
-// The parse method takes a text and an optional reviver function, and returns
-// a JavaScript value if the text is a valid JSON text.
-
-            var j;
-
-            function walk(holder, key) {
-
-// The walk method is used to recursively walk the resulting structure so
-// that modifications can be made.
-
-                var k, v, value = holder[key];
-                if (value && typeof value === 'object') {
-                    for (k in value) {
-                        if (Object.hasOwnProperty.call(value, k)) {
-                            v = walk(value, k);
-                            if (v !== undefined) {
-                                value[k] = v;
-                            } else {
-                                delete value[k];
-                            }
-                        }
-                    }
-                }
-                return reviver.call(holder, key, value);
-            }
-
-
-// Parsing happens in four stages. In the first stage, we replace certain
-// Unicode characters with escape sequences. JavaScript handles many characters
-// incorrectly, either silently deleting them, or treating them as line endings.
-
-            text = String(text);
-            cx.lastIndex = 0;
-            if (cx.test(text)) {
-                text = text.replace(cx, function (a) {
-                    return '\\u' +
-                        ('0000' + a.charCodeAt(0).toString(16)).slice(-4);
-                });
-            }
-
-// In the second stage, we run the text against regular expressions that look
-// for non-JSON patterns. We are especially concerned with '()' and 'new'
-// because they can cause invocation, and '=' because it can cause mutation.
-// But just to be safe, we want to reject all unexpected forms.
-
-// We split the second stage into 4 regexp operations in order to work around
-// crippling inefficiencies in IE's and Safari's regexp engines. First we
-// replace the JSON backslash pairs with '@' (a non-JSON character). Second, we
-// replace all simple value tokens with ']' characters. Third, we delete all
-// open brackets that follow a colon or comma or that begin the text. Finally,
-// we look to see that the remaining characters are only whitespace or ']' or
-// ',' or ':' or '{' or '}'. If that is so, then the text is safe for eval.
-
-            if (/^[\],:{}\s]*$/.
-test(text.replace(/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g, '@').
-replace(/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g, ']').
-replace(/(?:^|:|,)(?:\s*\[)+/g, ''))) {
-
-// In the third stage we use the eval function to compile the text into a
-// JavaScript structure. The '{' operator is subject to a syntactic ambiguity
-// in JavaScript: it can begin a block or an object literal. We wrap the text
-// in parens to eliminate the ambiguity.
-
-                j = eval('(' + text + ')');
-
-// In the optional fourth stage, we recursively walk the new structure, passing
-// each name/value pair to a reviver function for possible transformation.
-
-                return typeof reviver === 'function' ?
-                    walk({'': j}, '') : j;
-            }
-
-// If the text is not JSON parseable, then a SyntaxError is thrown.
-
-            throw new SyntaxError('JSON.parse');
-        };
-    }
-}());
diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
new file mode 100644
index 0000000..1536f6e
--- /dev/null
+++ b/lib/DefaultHandler.js
@@ -0,0 +1,135 @@
+var ElementType = require("./ElementType.js");
+
+function DefaultHandler (callback, options) {
+	this.dom = [];
+	this._done = false;
+	this._tagStack = [];
+	this._options = options ? options : { };
+	if (this._options.ignoreWhitespace === undefined)
+		this._options.ignoreWhitespace = false; //Keep whitespace-only text nodes
+	if (this._options.verbose === undefined)
+		this._options.verbose = true; //Keep data property for tags and raw property for all
+	if (this._options.enforceEmptyTags === undefined)
+		this._options.enforceEmptyTags = true; //Don't allow children for HTML tags defined as empty in spec
+	if ((typeof callback) === "function")
+		this._callback = callback;
+}
+
+//**"Static"**//
+//HTML Tags that shouldn't contain child nodes
+DefaultHandler._emptyTags = {
+		area: 1
+	, base: 1
+	, basefont: 1
+	, br: 1
+	, col: 1
+	, frame: 1
+	, hr: 1
+	, img: 1
+	, input: 1
+	, isindex: 1
+	, link: 1
+	, meta: 1
+	, param: 1
+	, embed: 1
+};
+//Regex to detect whitespace only text nodes
+DefaultHandler.reWhitespace = /^\s*$/;
+
+//**Public**//
+//Methods//
+//Resets the handler back to starting state
+DefaultHandler.prototype.reset = function() {
+	this.dom = [];
+	this._done = false;
+	this._tagStack = [];
+	this._tagStack.last = function() {
+		return(this.length ? this[this.length - 1] : null);
+	};
+};
+//Signals the handler that parsing is done
+DefaultHandler.prototype.done = function() {
+	this._done = true;
+	this.handleCallback(null);
+};
+DefaultHandler.prototype.writeText = function(element) {
+	if (this._options.ignoreWhitespace)
+		if (DefaultHandler.reWhitespace.test(element.data))
+			return;
+	this.handleElement(element);
+};
+
+//Methods//
+DefaultHandler.prototype.error =
+DefaultHandler.prototype.handleCallback = function(error) {
+		if ((typeof this._callback) !== "function")
+			if (error)
+				throw error;
+			else
+				return;
+		this._callback(error, this.dom);
+};
+
+DefaultHandler.prototype.isEmptyTag = function(element) {
+	var name = element.name.toLowerCase();
+	if (name.charAt(0) === '/') {
+		name = name.substring(1);
+	}
+	return this._options.enforceEmptyTags && !!DefaultHandler._emptyTags[name];
+};
+
+DefaultHandler.prototype.writeTag = DefaultHandler.prototype.writeDirective = DefaultHandler.prototype.writeComment =
+DefaultHandler.prototype.handleElement = function(element) {
+	if (this._done)
+		this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()"));
+	if (!this._options.verbose) {
+		//element.raw = null; //FIXME: Not clean
+		//FIXME: Serious performance problem using delete
+		delete element.raw;
+		if (element.type === "tag" || element.type === "script" || element.type === "style")
+			delete element.data;
+	}
+	if (!this._tagStack.last()) { //There are no parent elements
+		//If the element can be a container, add it to the tag stack and the top level list
+		if (element.type !== ElementType.Text && element.type !== ElementType.Comment && element.type !== ElementType.Directive) {
+			if (element.name.charAt(0) !== "/") { //Ignore closing tags that obviously don't have an opening tag
+				this.dom.push(element);
+				if (!this.isEmptyTag(element)) { //Don't add tags to the tag stack that can't have children
+					this._tagStack.push(element);
+				}
+			}
+		}
+		else //Otherwise just add to the top level list
+			this.dom.push(element);
+	}
+	else { //There are parent elements
+		//If the element can be a container, add it as a child of the element
+		//on top of the tag stack and then add it to the tag stack
+		if (element.type !== ElementType.Text && element.type !== ElementType.Comment && element.type !== ElementType.Directive) {
+			if (element.name.charAt(0) === "/") {
+				//This is a closing tag, scan the tagStack to find the matching opening tag
+				//and pop the stack up to the opening tag's parent
+				var baseName = element.name.substring(1);
+				if (!this.isEmptyTag(element)) {
+					var pos = this._tagStack.length - 1;
+					while (pos > -1 && this._tagStack[pos--].name !== baseName) { }
+					if (pos > -1 || this._tagStack[0].name === baseName)
+						while (pos < this._tagStack.length - 1)
+							this._tagStack.pop();
+				}
+			}
+			else { //This is not a closing tag
+				if (!this._tagStack.last().children)
+					this._tagStack.last().children = [];
+				this._tagStack.last().children.push(element);
+				if (!this.isEmptyTag(element)) //Don't add tags to the tag stack that can't have children
+					this._tagStack.push(element);
+			}
+		}
+		else { //This is not a container element
+			if (!this._tagStack.last().children)
+				this._tagStack.last().children = [];
+			this._tagStack.last().children.push(element);
+		}
+	}
+};
\ No newline at end of file
diff --git a/lib/DomUtils.js b/lib/DomUtils.js
new file mode 100644
index 0000000..f930ba0
--- /dev/null
+++ b/lib/DomUtils.js
@@ -0,0 +1,94 @@
+var DomUtils = {
+		testElement: function(options, element) {
+		if (!element) {
+			return false;
+		}
+
+		for (var key in options) {
+			if (key === "tag_name") {
+				if (element.type !== "tag" && element.type !== "script" && element.type !== "style") {
+					return false;
+				}
+				if (!options.tag_name(element.name)) {
+					return false;
+				}
+			} else if (key === "tag_type") {
+				if (!options.tag_type(element.type)) {
+					return false;
+				}
+			} else if (key === "tag_contains") {
+				if (element.type !== "text" && element.type !== "comment" && element.type !== "directive") {
+					return false;
+				}
+				if (!options.tag_contains(element.data)) {
+					return false;
+				}
+			} else {
+				if (!element.attribs || !options[key](element.attribs[key])) {
+					return false;
+				}
+			}
+		}
+	
+		return true;
+	}
+
+	, getElements: function(options, currentElement, recurse, limit) {
+		recurse = (recurse === undefined || recurse === null) || !!recurse;
+		limit = isNaN(parseInt(limit, 10)) ? -1 : parseInt(limit, 10);
+
+		if (!currentElement) {
+			return([]);
+		}
+
+		var found = [];
+		var elementList;
+
+		function getTest (checkVal) {
+			return(function (value) { return(value === checkVal); });
+		}
+		for (var key in options) {
+			if ((typeof options[key]) !== "function") {
+				options[key] = getTest(options[key]);
+			}
+		}
+
+		if (DomUtils.testElement(options, currentElement)) {
+			found.push(currentElement);
+		}
+
+		if (limit >= 0 && found.length >= limit) {
+			return(found);
+		}
+
+		if (recurse && currentElement.children) {
+			elementList = currentElement.children;
+		} else if (currentElement instanceof Array) {
+			elementList = currentElement;
+		} else {
+			return(found);
+		}
+
+		for (var i = 0; i < elementList.length; i++) {
+			found = found.concat(DomUtils.getElements(options, elementList[i], recurse, limit));
+			if (limit >= 0 && found.length >= limit) {
+				break;
+			}
+		}
+
+		return(found);
+	}
+	
+	, getElementById: function(id, currentElement, recurse) {
+		var result = DomUtils.getElements({ id: id }, currentElement, recurse, 1);
+		return(result.length ? result[0] : null);
+	}
+	
+	, getElementsByTagName: function(name, currentElement, recurse, limit) {
+		return(DomUtils.getElements({ tag_name: name }, currentElement, recurse, limit));
+	}
+	
+	, getElementsByTagType: function(type, currentElement, recurse, limit) {
+		return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit));
+	}
+};
diff --git a/lib/ElementType.js b/lib/ElementType.js
new file mode 100644
index 0000000..09d3d9f
--- /dev/null
+++ b/lib/ElementType.js
@@ -0,0 +1,9 @@
+//Types of elements found in the DOM
+var ElementType = {
+	Text: "text" //Plain text
+	, Directive: "directive" //Special tag <!...>
+	, Comment: "comment" //Special tag <!--...-->
+	, Script: "script" //Special tag <script>...</script>
+	, Style: "style" //Special tag <style>...</style>
+	, Tag: "tag" //Any tag that isn't special
+};
\ No newline at end of file
diff --git a/lib/Parser.js b/lib/Parser.js
new file mode 100644
index 0000000..ecb7be3
--- /dev/null
+++ b/lib/Parser.js
@@ -0,0 +1,397 @@
+var ElementType = require("./ElementType.js");
+
+function Parser (handler, options) {
+	this._options = options ? options : { };
+	if (this._options.includeLocation === undefined) {
+		this._options.includeLocation = false; //Do not track element position in document by default
+	}
+
+	this.validateHandler(handler);
+	this._handler = handler;
+	
+	this._buffer = "";
+	this._done = false;
+	this._elements = [];
+	this._elementsCurrent = 0;
+	this._current = 0;
+	this._next = 0;
+	this._location = {
+		 row: 0
+		, col: 0
+		, charOffset: 0
+		, inBuffer: 0
+	};
+	this._parseState = ElementType.Text;
+	this._prevTagSep = '';
+	this._tagStack = [];
+}
+
+//**"Static"**//
+//Regular expressions used for cleaning up and parsing (stateless)
+Parser._reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace
+Parser._reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
+Parser._reWhitespace = /\s/g; //Used to find any whitespace to split on
+Parser._reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
+
+//Regular expressions used for parsing (stateful)
+Parser._reAttrib = //Find attributes in a tag
+	/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
+Parser._reTags = /[<\>]/g; //Find tag markers
+
+//**Public**//
+//Methods//
+//Parses a complete HTML and pushes it to the handler
+Parser.prototype.parseComplete = function(data) {
+	this.reset();
+	this.parseChunk(data);
+	this.done();
+};
+
+//Parses a piece of an HTML document
+Parser.prototype.parseChunk = function(data) {
+	if (this._done)
+		this.handleError(new Error("Attempted to parse chunk after parsing already done"));
+	this._buffer += data; //FIXME: this can be a bottleneck
+	this.parseTags();
+};
+
+//Tells the parser that the HTML being parsed is complete
+Parser.prototype.done = function() {
+	if (this._done)
+		return;
+	this._done = true;
+
+	//Push any unparsed text into a final element in the element list
+	if (this._buffer.length) {
+		var rawData = this._buffer;
+		this._buffer = "";
+		var element = {
+				raw: rawData
+			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
+			, type: this._parseState
+			};
+		if (this._parseState === ElementType.Tag || this._parseState === ElementType.Script || this._parseState === ElementType.Style)
+			element.name = this.parseTagName(element.data);
+		this.parseAttribs(element);
+		this._elements.push(element);
+	}
+
+	this.writeHandler();
+	this._handler.done();
+};
+
+//Resets the parser to a blank state, ready to parse a new HTML document
+Parser.prototype.reset = function() {
+	this._buffer = "";
+	this._done = false;
+	this._elements = [];
+	this._elementsCurrent = 0;
+	this._current = 0;
+	this._next = 0;
+	this._location = {
+		 row: 0
+		, col: 0
+		, charOffset: 0
+		, inBuffer: 0
+	};
+	this._parseState = ElementType.Text;
+	this._prevTagSep = '';
+	this._tagStack = [];
+	this._handler.reset();
+};
+
+//**Private**//
+//Methods//
+//Takes an array of elements and parses any found attributes
+Parser.prototype.parseTagAttribs = function(elements) {
+	var idxEnd = elements.length;
+	var idx = 0;
+
+	while (idx < idxEnd) {
+		var element = elements[idx++];
+		if (element.type === ElementType.Tag || element.type === ElementType.Script || element.type === ElementType.style)
+			this.parseAttribs(element);
+	}
+
+	return(elements);
+};
+
+//Takes an element and adds an "attribs" property for any element attributes found 
+Parser.prototype.parseAttribs = function(element) {
+	//Only parse attributes for tags
+	if (element.type !== ElementType.Script && element.type !== ElementType.Style && element.type !== ElementType.Tag)
+		return;
+
+	var tagName = element.data.split(Parser._reWhitespace, 1)[0];
+	var attribRaw = element.data.substring(tagName.length);
+	if (attribRaw.length < 1)
+		return;
+
+	var match;
+	Parser._reAttrib.lastIndex = 0;
+	while (match = Parser._reAttrib.exec(attribRaw)) {
+		if (element.attribs === undefined)
+			element.attribs = {};
+
+		if (typeof match[1] === "string" && match[1].length) {
+			element.attribs[match[1]] = match[2];
+		} else if (typeof match[3] === "string" && match[3].length) {
+			element.attribs[match[3].toString()] = match[4].toString();
+		} else if (typeof match[5] === "string" && match[5].length) {
+			element.attribs[match[5]] = match[6];
+		} else if (typeof match[7] === "string" && match[7].length) {
+			element.attribs[match[7]] = match[7];
+		}
+	}
+};
+
+//Extracts the base tag name from the data value of an element
+Parser.prototype.parseTagName = function(data) {
+	if (data === null || data === "")
+		return("");
+	var match = Parser._reTagName.exec(data);
+	if (!match)
+		return("");
+	return((match[1] ? "/" : "") + match[2]);
+};
+
+//Parses through HTML text and returns an array of found elements
+//I admit, this function is rather large but splitting up had an noticeable impact on speed
+Parser.prototype.parseTags = function() {
+	var bufferEnd = this._buffer.length - 1;
+	while (Parser._reTags.test(this._buffer)) {
+		this._next = Parser._reTags.lastIndex - 1;
+		var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
+		var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
+
+		//A new element to eventually be appended to the element list
+		var element = {
+				raw: rawData
+			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
+			, type: this._parseState
+		};
+
+		var elementName = this.parseTagName(element.data), prevElement, rawLen;
+
+		//This section inspects the current tag stack and modifies the current
+		//element if we're actually parsing a special area (script/comment/style tag)
+		if (this._tagStack.length) { //We're parsing inside a script/comment/style tag
+			if (this._tagStack[this._tagStack.length - 1] === ElementType.Script) { //We're currently in a script tag
+				if (elementName === "/script") //Actually, we're no longer in a script tag, so pop it off the stack
+					this._tagStack.pop();
+				else { //Not a closing script tag
+					if (element.raw.indexOf("!--") !== 0) { //Make sure we're not in a comment
+						//All data from here to script close is now a text element
+						element.type = ElementType.Text;
+						//If the previous element is text, append the current text to it
+						if (this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text) {
+							prevElement = this._elements[this._elements.length - 1];
+							prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
+							element.raw = element.data = ""; //This causes the current element to not be added to the element list
+						}
+					}
+				}
+			}
+			else if (this._tagStack[this._tagStack.length - 1] === ElementType.Style) { //We're currently in a style tag
+				if (elementName === "/style") //Actually, we're no longer in a style tag, so pop it off the stack
+					this._tagStack.pop();
+				else {
+					if (element.raw.indexOf("!--") !== 0) { //Make sure we're not in a comment
+						//All data from here to style close is now a text element
+						element.type = ElementType.Text;
+						//If the previous element is text, append the current text to it
+						if (this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text) {
+							prevElement = this._elements[this._elements.length - 1];
+							if (element.raw !== "") {
+								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
+								element.raw = element.data = ""; //This causes the current element to not be added to the element list
+							} else { //Element is empty, so just append the last tag marker found
+								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep;
+							}
+						} else { //The previous element was not text
+							if (element.raw !== "") {
+								element.raw = element.data = element.raw;
+							}
+						}
+					}
+				}
+			}
+			else if (this._tagStack[this._tagStack.length - 1] === ElementType.Comment) { //We're currently in a comment tag
+				rawLen = element.raw.length;
+				if (element.raw.charAt(rawLen - 2) === "-" && element.raw.charAt(rawLen - 1) === "-" && tagSep === ">") {
+					//Actually, we're no longer in a style tag, so pop it off the stack
+					this._tagStack.pop();
+					//If the previous element is a comment, append the current text to it
+					if (this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Comment) {
+						prevElement = this._elements[this._elements.length - 1];
+						prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(Parser._reTrimComment, "");
+						element.raw = element.data = ""; //This causes the current element to not be added to the element list
+						element.type = ElementType.Text;
+					}
+					else //Previous element not a comment
+						element.type = ElementType.Comment; //Change the current element's type to a comment
+				}
+				else { //Still in a comment tag
+					element.type = ElementType.Comment;
+					//If the previous element is a comment, append the current text to it
+					if (this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Comment) {
+						prevElement = this._elements[this._elements.length - 1];
+						prevElement.raw = prevElement.data = prevElement.raw + element.raw + tagSep;
+						element.raw = element.data = ""; //This causes the current element to not be added to the element list
+						element.type = ElementType.Text;
+					}
+					else
+						element.raw = element.data = element.raw + tagSep;
+				}
+			}
+		}
+
+		//Processing of non-special tags
+		if (element.type === ElementType.Tag) {
+			element.name = elementName;
+			
+			if (element.raw.indexOf("!--") === 0) { //This tag is really comment
+				element.type = ElementType.Comment;
+				delete element.name;
+				rawLen = element.raw.length;
+				//Check if the comment is terminated in the current element
+				if (element.raw.charAt(rawLen - 1) === "-" && element.raw.charAt(rawLen - 2) === "-" && tagSep === ">")
+					element.raw = element.data = element.raw.replace(Parser._reTrimComment, "");
+				else { //It's not so push the comment onto the tag stack
+					element.raw += tagSep;
+					this._tagStack.push(ElementType.Comment);
+				}
+			}
+			else if (element.raw.indexOf("!") === 0 || element.raw.indexOf("?") === 0) {
+				element.type = ElementType.Directive;
+				//TODO: what about CDATA?
+			}
+			else if (element.name === "script") {
+				element.type = ElementType.Script;
+				//Special tag, push onto the tag stack if not terminated
+				if (element.data.charAt(element.data.length - 1) !== "/")
+					this._tagStack.push(ElementType.Script);
+			}
+			else if (element.name === "/script")
+				element.type = ElementType.Script;
+			else if (element.name === "style") {
+				element.type = ElementType.Style;
+				//Special tag, push onto the tag stack if not terminated
+				if (element.data.charAt(element.data.length - 1) !== "/")
+					this._tagStack.push(ElementType.Style);
+			}
+			else if (element.name === "/style")
+				element.type = ElementType.Style;
+			if (element.name && element.name.charAt(0) === "/")
+				element.data = element.name;
+		}
+
+		//Add all tags and non-empty text elements to the element list
+		if (element.raw !== "" || element.type !== ElementType.Text) {
+			if (this._options.includeLocation && !element.location) {
+				element.location = this.getLocation(element.type === ElementType.Tag);
+			}
+			this.parseAttribs(element);
+			this._elements.push(element);
+			//If tag self-terminates, add an explicit, separate closing tag
+			if (
+				element.type !== ElementType.Text
+				&&
+				element.type !== ElementType.Comment
+				&&
+				element.type !== ElementType.Directive
+				&&
+				element.data.charAt(element.data.length - 1) === "/"
+				)
+				this._elements.push({
+						raw: "/" + element.name
+					, data: "/" + element.name
+					, name: "/" + element.name
+					, type: element.type
+				});
+		}
+		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
+		this._current = this._next + 1;
+		this._prevTagSep = tagSep;
+	}
+
+	if (this._options.includeLocation) {
+		this.getLocation();
+		this._location.row += this._location.inBuffer;
+		this._location.inBuffer = 0;
+		this._location.charOffset = 0;
+	}
+	this._buffer = (this._current <= bufferEnd) ? this._buffer.substring(this._current) : "";
+	this._current = 0;
+
+	this.writeHandler();
+};
+
+Parser.prototype.getLocation = function(startTag) {
+	var c,
+		l = this._location,
+		end = this._current - (startTag ? 1 : 0),
+		chunk = startTag && l.charOffset === 0 && this._current === 0;
+	
+	for (; l.charOffset < end; l.charOffset++) {
+		c = this._buffer.charAt(l.charOffset);
+		if (c === '\n') {
+			l.inBuffer++;
+			l.col = 0;
+		} else if (c !== '\r') {
+			l.col++;
+		}
+	}
+	return {
+		 line: l.row + l.inBuffer + 1
+		, col: l.col + (chunk ? 0: 1)
+	};
+};
+
+//Checks the handler to make it is an object with the right "interface"
+Parser.prototype.validateHandler = function(handler) {
+	if ((typeof handler) !== "object")
+		throw new Error("Handler is not an object");
+	if ((typeof handler.reset) !== "function")
+		throw new Error("Handler method 'reset' is invalid");
+	if ((typeof handler.done) !== "function")
+		throw new Error("Handler method 'done' is invalid");
+	if ((typeof handler.writeTag) !== "function")
+		throw new Error("Handler method 'writeTag' is invalid");
+	if ((typeof handler.writeText) !== "function")
+		throw new Error("Handler method 'writeText' is invalid");
+	if ((typeof handler.writeComment) !== "function")
+		throw new Error("Handler method 'writeComment' is invalid");
+	if ((typeof handler.writeDirective) !== "function")
+		throw new Error("Handler method 'writeDirective' is invalid");
+};
+
+//Writes parsed elements out to the handler
+Parser.prototype.writeHandler = function(forceFlush) {
+	forceFlush = !!forceFlush;
+	if (this._tagStack.length && !forceFlush)
+		return;
+	while (this._elements.length) {
+		var element = this._elements.shift();
+		switch (element.type) {
+			case ElementType.Comment:
+				this._handler.writeComment(element);
+				break;
+			case ElementType.Directive:
+				this._handler.writeDirective(element);
+				break;
+			case ElementType.Text:
+				this._handler.writeText(element);
+				break;
+			default:
+				this._handler.writeTag(element);
+				break;
+		}
+	}
+};
+
+Parser.prototype.handleError = function(error) {
+	if ((typeof this._handler.error) === "function")
+		this._handler.error(error);
+	else throw error;
+};
\ No newline at end of file
diff --git a/lib/RssHandler.js b/lib/RssHandler.js
new file mode 100644
index 0000000..caf7f83
--- /dev/null
+++ b/lib/RssHandler.js
@@ -0,0 +1,112 @@
+var DefaultHandler = require("./DefaultHandler.js"),
+	DomUtils = require("./DomUtils.js");
+
+//TODO: make this a trully streamable handler
+function RssHandler (callback) {
+	RssHandler.super_.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
+}
+
+function inherits (ctor, superCtor) {
+	var tempCtor = function(){};
+	tempCtor.prototype = superCtor.prototype;
+	ctor.super_ = superCtor;
+	ctor.prototype = new tempCtor();
+	ctor.prototype.constructor = ctor;
+}
+
+inherits(RssHandler, DefaultHandler);
+
+RssHandler.prototype.done = function() {
+	var feed = { };
+	var feedRoot;
+
+	var found = DomUtils.getElementsByTagName(function (value) { return(value === "rss" || value === "feed"); }, this.dom, false);
+	if (found.length) {
+		feedRoot = found[0];
+	}
+	if (feedRoot) {
+		if (feedRoot.name === "rss") {
+			feed.type = "rss";
+			feedRoot = feedRoot.children[0]; //<channel/>
+			feed.id = "";
+			try {
+				feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
+			} catch (ex) { }
+			try {
+				feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].children[0].data;
+			} catch (ex) { }
+			try {
+				feed.description = DomUtils.getElementsByTagName("description", feedRoot.children, false)[0].children[0].data;
+			} catch (ex) { }
+			try {
+				feed.updated = new Date(DomUtils.getElementsByTagName("lastBuildDate", feedRoot.children, false)[0].children[0].data);
+			} catch (ex) { }
+			try {
+				feed.author = DomUtils.getElementsByTagName("managingEditor", feedRoot.children, false)[0].children[0].data;
+			} catch (ex) { }
+			feed.items = [];
+			DomUtils.getElementsByTagName("item", feedRoot.children).forEach(function (item, index, list) {
+				var entry = {};
+				try {
+					entry.id = DomUtils.getElementsByTagName("guid", item.children, false)[0].children[0].data;
+				} catch (ex) { }
+				try {
+					entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
+				} catch (ex) { }
+				try {
+					entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].children[0].data;
+				} catch (ex) { }
+				try {
+					entry.description = DomUtils.getElementsByTagName("description", item.children, false)[0].children[0].data;
+				} catch (ex) { }
+				try {
+					entry.pubDate = new Date(DomUtils.getElementsByTagName("pubDate", item.children, false)[0].children[0].data);
+				} catch (ex) { }
+				feed.items.push(entry);
+			});
+		} else {
+			feed.type = "atom";
+			try {
+				feed.id = DomUtils.getElementsByTagName("id", feedRoot.children, false)[0].children[0].data;
+			} catch (ex) { }
+			try {
+				feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
+			} catch (ex) { }
+			try {
+				feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attribs.href;
+			} catch (ex) { }
+			try {
+				feed.description = DomUtils.getElementsByTagName("subtitle", feedRoot.children, false)[0].children[0].data;
+			} catch (ex) { }
+			try {
+				feed.updated = new Date(DomUtils.getElementsByTagName("updated", feedRoot.children, false)[0].children[0].data);
+			} catch (ex) { }
+			try {
+				feed.author = DomUtils.getElementsByTagName("email", feedRoot.children, true)[0].children[0].data;
+			} catch (ex) { }
+			feed.items = [];
+			DomUtils.getElementsByTagName("entry", feedRoot.children).forEach(function (item, index, list) {
+				var entry = {};
+				try {
+					entry.id = DomUtils.getElementsByTagName("id", item.children, false)[0].children[0].data;
+				} catch (ex) { }
+				try {
+					entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
+				} catch (ex) { }
+				try {
+					entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].attribs.href;
+				} catch (ex) { }
+				try {
+					entry.description = DomUtils.getElementsByTagName("summary", item.children, false)[0].children[0].data;
+				} catch (ex) { }
+				try {
+					entry.pubDate = new Date(DomUtils.getElementsByTagName("updated", item.children, false)[0].children[0].data);
+				} catch (ex) { }
+				feed.items.push(entry);
+			});
+		}
+
+		this.dom = feed;
+	}
+	RssHandler.super_.prototype.done.call(this);
+};
\ No newline at end of file
diff --git a/lib/htmlparser.js b/lib/htmlparser.js
index c56928b..e14ae86 100644
--- a/lib/htmlparser.js
+++ b/lib/htmlparser.js
@@ -1,822 +1,5 @@
-/***********************************************
-Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to
-deal in the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-IN THE SOFTWARE.
-***********************************************/
-/* v1.8.0s */
-
-(function () {
-
-function runningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!runningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	else if (this.Tautologistics.NodeHtmlParser)
-		return; //NodeHtmlParser already defined!
-	this.Tautologistics.NodeHtmlParser = {};
-	exports = this.Tautologistics.NodeHtmlParser;
-}
-
-//Types of elements found in the DOM
-var ElementType = {
-	  Text: "text" //Plain text
-	, Directive: "directive" //Special tag <!...>
-	, Comment: "comment" //Special tag <!--...-->
-	, Script: "script" //Special tag <script>...</script>
-	, Style: "style" //Special tag <style>...</style>
-	, Tag: "tag" //Any tag that isn't special
-}
-
-function Parser (handler, options) {
-	this._options = options ? options : { };
-	if (this._options.includeLocation == undefined) {
-		this._options.includeLocation = false; //Do not track element position in document by default
-	}
-
-	this.validateHandler(handler);
-	this._handler = handler;
-	this.reset();
-}
-
-	//**"Static"**//
-	//Regular expressions used for cleaning up and parsing (stateless)
-	Parser._reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace
-	Parser._reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
-	Parser._reWhitespace = /\s/g; //Used to find any whitespace to split on
-	Parser._reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
-
-	//Regular expressions used for parsing (stateful)
-	Parser._reAttrib = //Find attributes in a tag
-		/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
-	Parser._reTags = /[\<\>]/g; //Find tag markers
-
-	//**Public**//
-	//Methods//
-	//Parses a complete HTML and pushes it to the handler
-	Parser.prototype.parseComplete = function Parser$parseComplete (data) {
-		this.reset();
-		this.parseChunk(data);
-		this.done();
-	}
-
-	//Parses a piece of an HTML document
-	Parser.prototype.parseChunk = function Parser$parseChunk (data) {
-		if (this._done)
-			this.handleError(new Error("Attempted to parse chunk after parsing already done"));
-		this._buffer += data; //FIXME: this can be a bottleneck
-		this.parseTags();
-	}
-
-	//Tells the parser that the HTML being parsed is complete
-	Parser.prototype.done = function Parser$done () {
-		if (this._done)
-			return;
-		this._done = true;
-	
-		//Push any unparsed text into a final element in the element list
-		if (this._buffer.length) {
-			var rawData = this._buffer;
-			this._buffer = "";
-			var element = {
-				  raw: rawData
-				, data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
-				, type: this._parseState
-				};
-			if (this._parseState == ElementType.Tag || this._parseState == ElementType.Script || this._parseState == ElementType.Style)
-				element.name = this.parseTagName(element.data);
-			this.parseAttribs(element);
-			this._elements.push(element);
-		}
-	
-		this.writeHandler();
-		this._handler.done();
-	}
-
-	//Resets the parser to a blank state, ready to parse a new HTML document
-	Parser.prototype.reset = function Parser$reset () {
-		this._buffer = "";
-		this._done = false;
-		this._elements = [];
-		this._elementsCurrent = 0;
-		this._current = 0;
-		this._next = 0;
-		this._location = {
-			  row: 0
-			, col: 0
-			, charOffset: 0
-			, inBuffer: 0
-		};
-		this._parseState = ElementType.Text;
-		this._prevTagSep = '';
-		this._tagStack = [];
-		this._handler.reset();
-	}
-	
-	//**Private**//
-	//Properties//
-	Parser.prototype._options = null; //Parser options for how to behave
-	Parser.prototype._handler = null; //Handler for parsed elements
-	Parser.prototype._buffer = null; //Buffer of unparsed data
-	Parser.prototype._done = false; //Flag indicating whether parsing is done
-	Parser.prototype._elements =  null; //Array of parsed elements
-	Parser.prototype._elementsCurrent = 0; //Pointer to last element in _elements that has been processed
-	Parser.prototype._current = 0; //Position in data that has already been parsed
-	Parser.prototype._next = 0; //Position in data of the next tag marker (<>)
-	Parser.prototype._location = null; //Position tracking for elements in a stream
-	Parser.prototype._parseState = ElementType.Text; //Current type of element being parsed
-	Parser.prototype._prevTagSep = ''; //Previous tag marker found
-	//Stack of element types previously encountered; keeps track of when
-	//parsing occurs inside a script/comment/style tag
-	Parser.prototype._tagStack = null;
-
-	//Methods//
-	//Takes an array of elements and parses any found attributes
-	Parser.prototype.parseTagAttribs = function Parser$parseTagAttribs (elements) {
-		var idxEnd = elements.length;
-		var idx = 0;
-	
-		while (idx < idxEnd) {
-			var element = elements[idx++];
-			if (element.type == ElementType.Tag || element.type == ElementType.Script || element.type == ElementType.style)
-				this.parseAttribs(element);
-		}
-	
-		return(elements);
-	}
-
-	//Takes an element and adds an "attribs" property for any element attributes found 
-	Parser.prototype.parseAttribs = function Parser$parseAttribs (element) {
-		//Only parse attributes for tags
-		if (element.type != ElementType.Script && element.type != ElementType.Style && element.type != ElementType.Tag)
-			return;
-	
-		var tagName = element.data.split(Parser._reWhitespace, 1)[0];
-		var attribRaw = element.data.substring(tagName.length);
-		if (attribRaw.length < 1)
-			return;
-	
-		var match;
-		Parser._reAttrib.lastIndex = 0;
-		while (match = Parser._reAttrib.exec(attribRaw)) {
-			if (element.attribs == undefined)
-				element.attribs = {};
-	
-			if (typeof match[1] == "string" && match[1].length) {
-				element.attribs[match[1]] = match[2];
-			} else if (typeof match[3] == "string" && match[3].length) {
-				element.attribs[match[3].toString()] = match[4].toString();
-			} else if (typeof match[5] == "string" && match[5].length) {
-				element.attribs[match[5]] = match[6];
-			} else if (typeof match[7] == "string" && match[7].length) {
-				element.attribs[match[7]] = match[7];
-			}
-		}
-	}
-
-	//Extracts the base tag name from the data value of an element
-	Parser.prototype.parseTagName = function Parser$parseTagName (data) {
-		if (data == null || data == "")
-			return("");
-		var match = Parser._reTagName.exec(data);
-		if (!match)
-			return("");
-		return((match[1] ? "/" : "") + match[2]);
-	}
-
-	//Parses through HTML text and returns an array of found elements
-	//I admit, this function is rather large but splitting up had an noticeable impact on speed
-	Parser.prototype.parseTags = function Parser$parseTags () {
-		var bufferEnd = this._buffer.length - 1;
-		while (Parser._reTags.test(this._buffer)) {
-			this._next = Parser._reTags.lastIndex - 1;
-			var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
-			var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
-	
-			//A new element to eventually be appended to the element list
-			var element = {
-				  raw: rawData
-				, data: (this._parseState == ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
-				, type: this._parseState
-			};
-	
-			var elementName = this.parseTagName(element.data);
-	
-			//This section inspects the current tag stack and modifies the current
-			//element if we're actually parsing a special area (script/comment/style tag)
-			if (this._tagStack.length) { //We're parsing inside a script/comment/style tag
-				if (this._tagStack[this._tagStack.length - 1] == ElementType.Script) { //We're currently in a script tag
-					if (elementName == "/script") //Actually, we're no longer in a script tag, so pop it off the stack
-						this._tagStack.pop();
-					else { //Not a closing script tag
-						if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
-							//All data from here to script close is now a text element
-							element.type = ElementType.Text;
-							//If the previous element is text, append the current text to it
-							if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
-								var prevElement = this._elements[this._elements.length - 1];
-								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
-								element.raw = element.data = ""; //This causes the current element to not be added to the element list
-							}
-						}
-					}
-				}
-				else if (this._tagStack[this._tagStack.length - 1] == ElementType.Style) { //We're currently in a style tag
-					if (elementName == "/style") //Actually, we're no longer in a style tag, so pop it off the stack
-						this._tagStack.pop();
-					else {
-						if (element.raw.indexOf("!--") != 0) { //Make sure we're not in a comment
-							//All data from here to style close is now a text element
-							element.type = ElementType.Text;
-							//If the previous element is text, append the current text to it
-							if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Text) {
-								var prevElement = this._elements[this._elements.length - 1];
-								if (element.raw != "") {
-									prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
-									element.raw = element.data = ""; //This causes the current element to not be added to the element list
-								} else { //Element is empty, so just append the last tag marker found
-									prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep;
-								}
-							} else { //The previous element was not text
-								if (element.raw != "") {
-									element.raw = element.data = element.raw;
-								}
-							}
-						}
-					}
-				}
-				else if (this._tagStack[this._tagStack.length - 1] == ElementType.Comment) { //We're currently in a comment tag
-					var rawLen = element.raw.length;
-					if (element.raw.charAt(rawLen - 2) == "-" && element.raw.charAt(rawLen - 1) == "-" && tagSep == ">") {
-						//Actually, we're no longer in a style tag, so pop it off the stack
-						this._tagStack.pop();
-						//If the previous element is a comment, append the current text to it
-						if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Comment) {
-							var prevElement = this._elements[this._elements.length - 1];
-							prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(Parser._reTrimComment, "");
-							element.raw = element.data = ""; //This causes the current element to not be added to the element list
-							element.type = ElementType.Text;
-						}
-						else //Previous element not a comment
-							element.type = ElementType.Comment; //Change the current element's type to a comment
-					}
-					else { //Still in a comment tag
-						element.type = ElementType.Comment;
-						//If the previous element is a comment, append the current text to it
-						if (this._elements.length && this._elements[this._elements.length - 1].type == ElementType.Comment) {
-							var prevElement = this._elements[this._elements.length - 1];
-							prevElement.raw = prevElement.data = prevElement.raw + element.raw + tagSep;
-							element.raw = element.data = ""; //This causes the current element to not be added to the element list
-							element.type = ElementType.Text;
-						}
-						else
-							element.raw = element.data = element.raw + tagSep;
-					}
-				}
-			}
-	
-			//Processing of non-special tags
-			if (element.type == ElementType.Tag) {
-				element.name = elementName;
-				
-				if (element.raw.indexOf("!--") == 0) { //This tag is really comment
-					element.type = ElementType.Comment;
-					delete element["name"];
-					var rawLen = element.raw.length;
-					//Check if the comment is terminated in the current element
-					if (element.raw.charAt(rawLen - 1) == "-" && element.raw.charAt(rawLen - 2) == "-" && tagSep == ">")
-						element.raw = element.data = element.raw.replace(Parser._reTrimComment, "");
-					else { //It's not so push the comment onto the tag stack
-						element.raw += tagSep;
-						this._tagStack.push(ElementType.Comment);
-					}
-				}
-				else if (element.raw.indexOf("!") == 0 || element.raw.indexOf("?") == 0) {
-					element.type = ElementType.Directive;
-					//TODO: what about CDATA?
-				}
-				else if (element.name == "script") {
-					element.type = ElementType.Script;
-					//Special tag, push onto the tag stack if not terminated
-					if (element.data.charAt(element.data.length - 1) != "/")
-						this._tagStack.push(ElementType.Script);
-				}
-				else if (element.name == "/script")
-					element.type = ElementType.Script;
-				else if (element.name == "style") {
-					element.type = ElementType.Style;
-					//Special tag, push onto the tag stack if not terminated
-					if (element.data.charAt(element.data.length - 1) != "/")
-						this._tagStack.push(ElementType.Style);
-				}
-				else if (element.name == "/style")
-					element.type = ElementType.Style;
-				if (element.name && element.name.charAt(0) == "/")
-					element.data = element.name;
-			}
-	
-			//Add all tags and non-empty text elements to the element list
-			if (element.raw != "" || element.type != ElementType.Text) {
-				if (this._options.includeLocation && !element.location) {
-					element.location = this.getLocation(element.type == ElementType.Tag);
-				}
-				this.parseAttribs(element);
-				this._elements.push(element);
-				//If tag self-terminates, add an explicit, separate closing tag
-				if (
-					element.type != ElementType.Text
-					&&
-					element.type != ElementType.Comment
-					&&
-					element.type != ElementType.Directive
-					&&
-					element.data.charAt(element.data.length - 1) == "/"
-					)
-					this._elements.push({
-						  raw: "/" + element.name
-						, data: "/" + element.name
-						, name: "/" + element.name
-						, type: element.type
-					});
-			}
-			this._parseState = (tagSep == "<") ? ElementType.Tag : ElementType.Text;
-			this._current = this._next + 1;
-			this._prevTagSep = tagSep;
-		}
-
-		if (this._options.includeLocation) {
-			this.getLocation();
-			this._location.row += this._location.inBuffer;
-			this._location.inBuffer = 0;
-			this._location.charOffset = 0;
-		}
-		this._buffer = (this._current <= bufferEnd) ? this._buffer.substring(this._current) : "";
-		this._current = 0;
-	
-		this.writeHandler();
-	}
-
-	Parser.prototype.getLocation = function Parser$getLocation (startTag) {
-		var c,
-			l = this._location,
-			end = this._current - (startTag ? 1 : 0),
-			chunk = startTag && l.charOffset == 0 && this._current == 0;
-		
-		for (; l.charOffset < end; l.charOffset++) {
-			c = this._buffer.charAt(l.charOffset);
-			if (c == '\n') {
-				l.inBuffer++;
-				l.col = 0;
-			} else if (c != '\r') {
-				l.col++;
-			}
-		}
-		return {
-			  line: l.row + l.inBuffer + 1
-			, col: l.col + (chunk ? 0: 1)
-		};
-	}
-
-	//Checks the handler to make it is an object with the right "interface"
-	Parser.prototype.validateHandler = function Parser$validateHandler (handler) {
-		if ((typeof handler) != "object")
-			throw new Error("Handler is not an object");
-		if ((typeof handler.reset) != "function")
-			throw new Error("Handler method 'reset' is invalid");
-		if ((typeof handler.done) != "function")
-			throw new Error("Handler method 'done' is invalid");
-		if ((typeof handler.writeTag) != "function")
-			throw new Error("Handler method 'writeTag' is invalid");
-		if ((typeof handler.writeText) != "function")
-			throw new Error("Handler method 'writeText' is invalid");
-		if ((typeof handler.writeComment) != "function")
-			throw new Error("Handler method 'writeComment' is invalid");
-		if ((typeof handler.writeDirective) != "function")
-			throw new Error("Handler method 'writeDirective' is invalid");
-	}
-
-	//Writes parsed elements out to the handler
-	Parser.prototype.writeHandler = function Parser$writeHandler (forceFlush) {
-		forceFlush = !!forceFlush;
-		if (this._tagStack.length && !forceFlush)
-			return;
-		while (this._elements.length) {
-			var element = this._elements.shift();
-			switch (element.type) {
-				case ElementType.Comment:
-					this._handler.writeComment(element);
-					break;
-				case ElementType.Directive:
-					this._handler.writeDirective(element);
-					break;
-				case ElementType.Text:
-					this._handler.writeText(element);
-					break;
-				default:
-					this._handler.writeTag(element);
-					break;
-			}
-		}
-	}
-
-	Parser.prototype.handleError = function Parser$handleError (error) {
-		if ((typeof this._handler.error) == "function")
-			this._handler.error(error);
-		else
-			throw error;
-	}
-
-//TODO: make this a trully streamable handler
-function RssHandler (callback) {
-	RssHandler.super_.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
-}
-inherits(RssHandler, DefaultHandler);
-
-	RssHandler.prototype.done = function RssHandler$done () {
-		var feed = { };
-		var feedRoot;
-
-		var found = DomUtils.getElementsByTagName(function (value) { return(value == "rss" || value == "feed"); }, this.dom, false);
-		if (found.length) {
-			feedRoot = found[0];
-		}
-		if (feedRoot) {
-			if (feedRoot.name == "rss") {
-				feed.type = "rss";
-				feedRoot = feedRoot.children[0]; //<channel/>
-				feed.id = "";
-				try {
-					feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					feed.description = DomUtils.getElementsByTagName("description", feedRoot.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					feed.updated = new Date(DomUtils.getElementsByTagName("lastBuildDate", feedRoot.children, false)[0].children[0].data);
-				} catch (ex) { }
-				try {
-					feed.author = DomUtils.getElementsByTagName("managingEditor", feedRoot.children, false)[0].children[0].data;
-				} catch (ex) { }
-				feed.items = [];
-				DomUtils.getElementsByTagName("item", feedRoot.children).forEach(function (item, index, list) {
-					var entry = {};
-					try {
-						entry.id = DomUtils.getElementsByTagName("guid", item.children, false)[0].children[0].data;
-					} catch (ex) { }
-					try {
-						entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
-					} catch (ex) { }
-					try {
-						entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].children[0].data;
-					} catch (ex) { }
-					try {
-						entry.description = DomUtils.getElementsByTagName("description", item.children, false)[0].children[0].data;
-					} catch (ex) { }
-					try {
-						entry.pubDate = new Date(DomUtils.getElementsByTagName("pubDate", item.children, false)[0].children[0].data);
-					} catch (ex) { }
-					feed.items.push(entry);
-				});
-			} else {
-				feed.type = "atom";
-				try {
-					feed.id = DomUtils.getElementsByTagName("id", feedRoot.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attribs.href;
-				} catch (ex) { }
-				try {
-					feed.description = DomUtils.getElementsByTagName("subtitle", feedRoot.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					feed.updated = new Date(DomUtils.getElementsByTagName("updated", feedRoot.children, false)[0].children[0].data);
-				} catch (ex) { }
-				try {
-					feed.author = DomUtils.getElementsByTagName("email", feedRoot.children, true)[0].children[0].data;
-				} catch (ex) { }
-				feed.items = [];
-				DomUtils.getElementsByTagName("entry", feedRoot.children).forEach(function (item, index, list) {
-					var entry = {};
-					try {
-						entry.id = DomUtils.getElementsByTagName("id", item.children, false)[0].children[0].data;
-					} catch (ex) { }
-					try {
-						entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
-					} catch (ex) { }
-					try {
-						entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].attribs.href;
-					} catch (ex) { }
-					try {
-						entry.description = DomUtils.getElementsByTagName("summary", item.children, false)[0].children[0].data;
-					} catch (ex) { }
-					try {
-						entry.pubDate = new Date(DomUtils.getElementsByTagName("updated", item.children, false)[0].children[0].data);
-					} catch (ex) { }
-					feed.items.push(entry);
-				});
-			}
-
-			this.dom = feed;
-		}
-		RssHandler.super_.prototype.done.call(this);
-	}
-
-///////////////////////////////////////////////////
-
-function DefaultHandler (callback, options) {
-	this.reset();
-	this._options = options ? options : { };
-	if (this._options.ignoreWhitespace == undefined)
-		this._options.ignoreWhitespace = false; //Keep whitespace-only text nodes
-	if (this._options.verbose == undefined)
-		this._options.verbose = true; //Keep data property for tags and raw property for all
-	if (this._options.enforceEmptyTags == undefined)
-		this._options.enforceEmptyTags = true; //Don't allow children for HTML tags defined as empty in spec
-	if ((typeof callback) == "function")
-		this._callback = callback;
-}
-
-	//**"Static"**//
-	//HTML Tags that shouldn't contain child nodes
-	DefaultHandler._emptyTags = {
-		  area: 1
-		, base: 1
-		, basefont: 1
-		, br: 1
-		, col: 1
-		, frame: 1
-		, hr: 1
-		, img: 1
-		, input: 1
-		, isindex: 1
-		, link: 1
-		, meta: 1
-		, param: 1
-		, embed: 1
-	}
-	//Regex to detect whitespace only text nodes
-	DefaultHandler.reWhitespace = /^\s*$/;
-
-	//**Public**//
-	//Properties//
-	DefaultHandler.prototype.dom = null; //The hierarchical object containing the parsed HTML
-	//Methods//
-	//Resets the handler back to starting state
-	DefaultHandler.prototype.reset = function DefaultHandler$reset() {
-		this.dom = [];
-		this._done = false;
-		this._tagStack = [];
-		this._tagStack.last = function DefaultHandler$_tagStack$last () {
-			return(this.length ? this[this.length - 1] : null);
-		}
-	}
-	//Signals the handler that parsing is done
-	DefaultHandler.prototype.done = function DefaultHandler$done () {
-		this._done = true;
-		this.handleCallback(null);
-	}
-	DefaultHandler.prototype.writeTag = function DefaultHandler$writeTag (element) {
-		this.handleElement(element);
-	} 
-	DefaultHandler.prototype.writeText = function DefaultHandler$writeText (element) {
-		if (this._options.ignoreWhitespace)
-			if (DefaultHandler.reWhitespace.test(element.data))
-				return;
-		this.handleElement(element);
-	} 
-	DefaultHandler.prototype.writeComment = function DefaultHandler$writeComment (element) {
-		this.handleElement(element);
-	} 
-	DefaultHandler.prototype.writeDirective = function DefaultHandler$writeDirective (element) {
-		this.handleElement(element);
-	}
-	DefaultHandler.prototype.error = function DefaultHandler$error (error) {
-		this.handleCallback(error);
-	}
-
-	//**Private**//
-	//Properties//
-	DefaultHandler.prototype._options = null; //Handler options for how to behave
-	DefaultHandler.prototype._callback = null; //Callback to respond to when parsing done
-	DefaultHandler.prototype._done = false; //Flag indicating whether handler has been notified of parsing completed
-	DefaultHandler.prototype._tagStack = null; //List of parents to the currently element being processed
-	//Methods//
-	DefaultHandler.prototype.handleCallback = function DefaultHandler$handleCallback (error) {
-			if ((typeof this._callback) != "function")
-				if (error)
-					throw error;
-				else
-					return;
-			this._callback(error, this.dom);
-	}
-	
-	DefaultHandler.prototype.isEmptyTag = function(element) {
-		var name = element.name.toLowerCase();
-		if (name.charAt(0) == '/') {
-			name = name.substring(1);
-		}
-		return this._options.enforceEmptyTags && !!DefaultHandler._emptyTags[name];
-	};
-	
-	DefaultHandler.prototype.handleElement = function DefaultHandler$handleElement (element) {
-		if (this._done)
-			this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()"));
-		if (!this._options.verbose) {
-//			element.raw = null; //FIXME: Not clean
-			//FIXME: Serious performance problem using delete
-			delete element.raw;
-			if (element.type == "tag" || element.type == "script" || element.type == "style")
-				delete element.data;
-		}
-		if (!this._tagStack.last()) { //There are no parent elements
-			//If the element can be a container, add it to the tag stack and the top level list
-			if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
-				if (element.name.charAt(0) != "/") { //Ignore closing tags that obviously don't have an opening tag
-					this.dom.push(element);
-					if (!this.isEmptyTag(element)) { //Don't add tags to the tag stack that can't have children
-						this._tagStack.push(element);
-					}
-				}
-			}
-			else //Otherwise just add to the top level list
-				this.dom.push(element);
-		}
-		else { //There are parent elements
-			//If the element can be a container, add it as a child of the element
-			//on top of the tag stack and then add it to the tag stack
-			if (element.type != ElementType.Text && element.type != ElementType.Comment && element.type != ElementType.Directive) {
-				if (element.name.charAt(0) == "/") {
-					//This is a closing tag, scan the tagStack to find the matching opening tag
-					//and pop the stack up to the opening tag's parent
-					var baseName = element.name.substring(1);
-					if (!this.isEmptyTag(element)) {
-						var pos = this._tagStack.length - 1;
-						while (pos > -1 && this._tagStack[pos--].name != baseName) { }
-						if (pos > -1 || this._tagStack[0].name == baseName)
-							while (pos < this._tagStack.length - 1)
-								this._tagStack.pop();
-					}
-				}
-				else { //This is not a closing tag
-					if (!this._tagStack.last().children)
-						this._tagStack.last().children = [];
-					this._tagStack.last().children.push(element);
-					if (!this.isEmptyTag(element)) //Don't add tags to the tag stack that can't have children
-						this._tagStack.push(element);
-				}
-			}
-			else { //This is not a container element
-				if (!this._tagStack.last().children)
-					this._tagStack.last().children = [];
-				this._tagStack.last().children.push(element);
-			}
-		}
-	}
-
-	var DomUtils = {
-		  testElement: function DomUtils$testElement (options, element) {
-			if (!element) {
-				return false;
-			}
-	
-			for (var key in options) {
-				if (key == "tag_name") {
-					if (element.type != "tag" && element.type != "script" && element.type != "style") {
-						return false;
-					}
-					if (!options["tag_name"](element.name)) {
-						return false;
-					}
-				} else if (key == "tag_type") {
-					if (!options["tag_type"](element.type)) {
-						return false;
-					}
-				} else if (key == "tag_contains") {
-					if (element.type != "text" && element.type != "comment" && element.type != "directive") {
-						return false;
-					}
-					if (!options["tag_contains"](element.data)) {
-						return false;
-					}
-				} else {
-					if (!element.attribs || !options[key](element.attribs[key])) {
-						return false;
-					}
-				}
-			}
-		
-			return true;
-		}
-	
-		, getElements: function DomUtils$getElements (options, currentElement, recurse, limit) {
-			recurse = (recurse === undefined || recurse === null) || !!recurse;
-			limit = isNaN(parseInt(limit)) ? -1 : parseInt(limit);
-
-			if (!currentElement) {
-				return([]);
-			}
-	
-			var found = [];
-			var elementList;
-
-			function getTest (checkVal) {
-				return(function (value) { return(value == checkVal); });
-			}
-			for (var key in options) {
-				if ((typeof options[key]) != "function") {
-					options[key] = getTest(options[key]);
-				}
-			}
-	
-			if (DomUtils.testElement(options, currentElement)) {
-				found.push(currentElement);
-			}
-
-			if (limit >= 0 && found.length >= limit) {
-				return(found);
-			}
-
-			if (recurse && currentElement.children) {
-				elementList = currentElement.children;
-			} else if (currentElement instanceof Array) {
-				elementList = currentElement;
-			} else {
-				return(found);
-			}
-	
-			for (var i = 0; i < elementList.length; i++) {
-				found = found.concat(DomUtils.getElements(options, elementList[i], recurse, limit));
-				if (limit >= 0 && found.length >= limit) {
-					break;
-				}
-			}
-	
-			return(found);
-		}
-		
-		, getElementById: function DomUtils$getElementById (id, currentElement, recurse) {
-			var result = DomUtils.getElements({ id: id }, currentElement, recurse, 1);
-			return(result.length ? result[0] : null);
-		}
-		
-		, getElementsByTagName: function DomUtils$getElementsByTagName (name, currentElement, recurse, limit) {
-			return(DomUtils.getElements({ tag_name: name }, currentElement, recurse, limit));
-		}
-		
-		, getElementsByTagType: function DomUtils$getElementsByTagType (type, currentElement, recurse, limit) {
-			return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit));
-		}
-	}
-
-	function inherits (ctor, superCtor) {
-		var tempCtor = function(){};
-		tempCtor.prototype = superCtor.prototype;
-		ctor.super_ = superCtor;
-		ctor.prototype = new tempCtor();
-		ctor.prototype.constructor = ctor;
-	}
-
-exports.Parser = Parser;
-
-exports.DefaultHandler = DefaultHandler;
-
-exports.RssHandler = RssHandler;
-
-exports.ElementType = ElementType;
-
-exports.DomUtils = DomUtils;
-
-})();
+exports.Parser = require("./Parser.js");
+exports.DefaultHandler = require("./DefaultHandler.js");
+exports.RssHandler = require("./RssHandler.js");
+exports.ElementType = require("./ElementType.js");
+exports.DomUtils = require("./DomUtils.js");
\ No newline at end of file
diff --git a/lib/htmlparser.min.js b/lib/htmlparser.min.js
deleted file mode 100644
index 2e09f29..0000000
--- a/lib/htmlparser.min.js
+++ /dev/null
@@ -1,22 +0,0 @@
-/***********************************************
-Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to
-deal in the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-IN THE SOFTWARE.
-***********************************************/
-/* v1.8.0 */
-(function(){function e(a,c){this._options=c?c:{};if(this._options.includeLocation==undefined)this._options.includeLocation=false;this.validateHandler(a);this._handler=a;this.reset()}function n(a){n.super_.call(this,a,{ignoreWhitespace:true,verbose:false,enforceEmptyTags:false})}function i(a,c){this.reset();this._options=c?c:{};if(this._options.ignoreWhitespace==undefined)this._options.ignoreWhitespace=false;if(this._options.verbose==undefined)this._options.verbose=true;if(this._options.enforceEmptyTags== undefined)this._options.enforceEmptyTags=true;if(typeof a=="function")this._callback=a}if(!(typeof require=="function"&&typeof exports=="object"&&typeof module=="object"&&typeof __filename=="string"&&typeof __dirname=="string")){if(this.Tautologistics){if(this.Tautologistics.NodeHtmlParser)return}else this.Tautologistics={};this.Tautologistics.NodeHtmlParser={};exports=this.Tautologistics.NodeHtmlParser}var d={Text:"text",Directive:"directive",Comment:"comment",Script:"script",Style:"style",Tag:"tag"}; e._reTrim=/(^\s+|\s+$)/g;e._reTrimComment=/(^\!--|--$)/g;e._reWhitespace=/\s/g;e._reTagName=/^\s*(\/?)\s*([^\s\/]+)/;e._reAttrib=/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;e._reTags=/[\<\>]/g;e.prototype.parseComplete=function(a){this.reset();this.parseChunk(a);this.done()};e.prototype.parseChunk=function(a){this._done&&this.handleError(Error("Attempted to parse chunk after parsing already done"));this._buffer+=a;this.parseTags()}; e.prototype.done=function(){if(!this._done){this._done=true;if(this._buffer.length){var a=this._buffer;this._buffer="";a={raw:a,data:this._parseState==d.Text?a:a.replace(e._reTrim,""),type:this._parseState};if(this._parseState==d.Tag||this._parseState==d.Script||this._parseState==d.Style)a.name=this.parseTagName(a.data);this.parseAttribs(a);this._elements.push(a)}this.writeHandler();this._handler.done()}};e.prototype.reset=function(){this._buffer="";this._done=false;this._elements=[];this._next=this._current= this._elementsCurrent=0;this._location={row:0,col:0,charOffset:0,inBuffer:0};this._parseState=d.Text;this._prevTagSep="";this._tagStack=[];this._handler.reset()};e.prototype._options=null;e.prototype._handler=null;e.prototype._buffer=null;e.prototype._done=false;e.prototype._elements=null;e.prototype._elementsCurrent=0;e.prototype._current=0;e.prototype._next=0;e.prototype._location=null;e.prototype._parseState=d.Text;e.prototype._prevTagSep="";e.prototype._tagStack=null;e.prototype.parseTagAttribs= function(a){for(var c=a.length,b=0;b<c;){var h=a[b++];if(h.type==d.Tag||h.type==d.Script||h.type==d.style)this.parseAttribs(h)}return a};e.prototype.parseAttribs=function(a){if(!(a.type!=d.Script&&a.type!=d.Style&&a.type!=d.Tag)){var c=a.data.split(e._reWhitespace,1)[0];c=a.data.substring(c.length);if(!(c.length<1)){var b;for(e._reAttrib.lastIndex=0;b=e._reAttrib.exec(c);){if(a.attribs==undefined)a.attribs={};if(typeof b[1]=="string"&&b[1].length)a.attribs[b[1]]=b[2];else if(typeof b[3]=="string"&& b[3].length)a.attribs[b[3].toString()]=b[4].toString();else if(typeof b[5]=="string"&&b[5].length)a.attribs[b[5]]=b[6];else if(typeof b[7]=="string"&&b[7].length)a.attribs[b[7]]=b[7]}}}};e.prototype.parseTagName=function(a){if(a==null||a=="")return"";a=e._reTagName.exec(a);if(!a)return"";return(a[1]?"/":"")+a[2]};e.prototype.parseTags=function(){for(var a=this._buffer.length-1;e._reTags.test(this._buffer);){this._next=e._reTags.lastIndex-1;var c=this._buffer.charAt(this._next),b=this._buffer.substring(this._current, this._next);b={raw:b,data:this._parseState==d.Text?b:b.replace(e._reTrim,""),type:this._parseState};var h=this.parseTagName(b.data);if(this._tagStack.length)if(this._tagStack[this._tagStack.length-1]==d.Script)if(h=="/script")this._tagStack.pop();else{if(b.raw.indexOf("!--")!=0){b.type=d.Text;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Text){var g=this._elements[this._elements.length-1];g.raw=g.data=g.raw+this._prevTagSep+b.raw;b.raw=b.data=""}}}else if(this._tagStack[this._tagStack.length- 1]==d.Style)if(h=="/style")this._tagStack.pop();else{if(b.raw.indexOf("!--")!=0){b.type=d.Text;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Text){g=this._elements[this._elements.length-1];if(b.raw!=""){g.raw=g.data=g.raw+this._prevTagSep+b.raw;b.raw=b.data=""}else g.raw=g.data=g.raw+this._prevTagSep}else if(b.raw!="")b.raw=b.data=b.raw}}else if(this._tagStack[this._tagStack.length-1]==d.Comment){g=b.raw.length;if(b.raw.charAt(g-2)=="-"&&b.raw.charAt(g-1)=="-"&&c==">"){this._tagStack.pop(); if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){g=this._elements[this._elements.length-1];g.raw=g.data=(g.raw+b.raw).replace(e._reTrimComment,"");b.raw=b.data="";b.type=d.Text}else b.type=d.Comment}else{b.type=d.Comment;if(this._elements.length&&this._elements[this._elements.length-1].type==d.Comment){g=this._elements[this._elements.length-1];g.raw=g.data=g.raw+b.raw+c;b.raw=b.data="";b.type=d.Text}else b.raw=b.data=b.raw+c}}if(b.type==d.Tag){b.name=h;if(b.raw.indexOf("!--")== 0){b.type=d.Comment;delete b.name;g=b.raw.length;if(b.raw.charAt(g-1)=="-"&&b.raw.charAt(g-2)=="-"&&c==">")b.raw=b.data=b.raw.replace(e._reTrimComment,"");else{b.raw+=c;this._tagStack.push(d.Comment)}}else if(b.raw.indexOf("!")==0||b.raw.indexOf("?")==0)b.type=d.Directive;else if(b.name=="script"){b.type=d.Script;b.data.charAt(b.data.length-1)!="/"&&this._tagStack.push(d.Script)}else if(b.name=="/script")b.type=d.Script;else if(b.name=="style"){b.type=d.Style;b.data.charAt(b.data.length-1)!="/"&& this._tagStack.push(d.Style)}else if(b.name=="/style")b.type=d.Style;if(b.name&&b.name.charAt(0)=="/")b.data=b.name}if(b.raw!=""||b.type!=d.Text){if(this._options.includeLocation&&!b.location)b.location=this.getLocation(b.type==d.Tag);this.parseAttribs(b);this._elements.push(b);b.type!=d.Text&&b.type!=d.Comment&&b.type!=d.Directive&&b.data.charAt(b.data.length-1)=="/"&&this._elements.push({raw:"/"+b.name,data:"/"+b.name,name:"/"+b.name,type:b.type})}this._parseState=c=="<"?d.Tag:d.Text;this._current= this._next+1;this._prevTagSep=c}if(this._options.includeLocation){this.getLocation();this._location.row+=this._location.inBuffer;this._location.inBuffer=0;this._location.charOffset=0}this._buffer=this._current<=a?this._buffer.substring(this._current):"";this._current=0;this.writeHandler()};e.prototype.getLocation=function(a){for(var c=this._location,b=this._current-(a?1:0),h=a&&c.charOffset==0&&this._current==0;c.charOffset<b;c.charOffset++){a=this._buffer.charAt(c.charOffset);if(a=="\n"){c.inBuffer++; c.col=0}else a!="\r"&&c.col++}return{line:c.row+c.inBuffer+1,col:c.col+(h?0:1)}};e.prototype.validateHandler=function(a){if(typeof a!="object")throw Error("Handler is not an object");if(typeof a.reset!="function")throw Error("Handler method 'reset' is invalid");if(typeof a.done!="function")throw Error("Handler method 'done' is invalid");if(typeof a.writeTag!="function")throw Error("Handler method 'writeTag' is invalid");if(typeof a.writeText!="function")throw Error("Handler method 'writeText' is invalid"); if(typeof a.writeComment!="function")throw Error("Handler method 'writeComment' is invalid");if(typeof a.writeDirective!="function")throw Error("Handler method 'writeDirective' is invalid");};e.prototype.writeHandler=function(a){a=!!a;if(!(this._tagStack.length&&!a))for(;this._elements.length;){a=this._elements.shift();switch(a.type){case d.Comment:this._handler.writeComment(a);break;case d.Directive:this._handler.writeDirective(a);break;case d.Text:this._handler.writeText(a);break;default:this._handler.writeTag(a)}}}; e.prototype.handleError=function(a){if(typeof this._handler.error=="function")this._handler.error(a);else throw a;};(function(a,c){var b=function(){};b.prototype=c.prototype;a.super_=c;a.prototype=new b;a.prototype.constructor=a})(n,i);n.prototype.done=function(){var a={},c,b=f.getElementsByTagName(function(j){return j=="rss"||j=="feed"},this.dom,false);if(b.length)c=b[0];if(c){if(c.name=="rss"){a.type="rss";c=c.children[0];a.id="";try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(h){}try{a.link= f.getElementsByTagName("link",c.children,false)[0].children[0].data}catch(g){}try{a.description=f.getElementsByTagName("description",c.children,false)[0].children[0].data}catch(l){}try{a.updated=new Date(f.getElementsByTagName("lastBuildDate",c.children,false)[0].children[0].data)}catch(m){}try{a.author=f.getElementsByTagName("managingEditor",c.children,false)[0].children[0].data}catch(o){}a.items=[];f.getElementsByTagName("item",c.children).forEach(function(j){var k={};try{k.id=f.getElementsByTagName("guid", j.children,false)[0].children[0].data}catch(q){}try{k.title=f.getElementsByTagName("title",j.children,false)[0].children[0].data}catch(r){}try{k.link=f.getElementsByTagName("link",j.children,false)[0].children[0].data}catch(s){}try{k.description=f.getElementsByTagName("description",j.children,false)[0].children[0].data}catch(t){}try{k.pubDate=new Date(f.getElementsByTagName("pubDate",j.children,false)[0].children[0].data)}catch(u){}a.items.push(k)})}else{a.type="atom";try{a.id=f.getElementsByTagName("id", c.children,false)[0].children[0].data}catch(p){}try{a.title=f.getElementsByTagName("title",c.children,false)[0].children[0].data}catch(v){}try{a.link=f.getElementsByTagName("link",c.children,false)[0].attribs.href}catch(w){}try{a.description=f.getElementsByTagName("subtitle",c.children,false)[0].children[0].data}catch(x){}try{a.updated=new Date(f.getElementsByTagName("updated",c.children,false)[0].children[0].data)}catch(y){}try{a.author=f.getElementsByTagName("email",c.children,true)[0].children[0].data}catch(z){}a.items= [];f.getElementsByTagName("entry",c.children).forEach(function(j){var k={};try{k.id=f.getElementsByTagName("id",j.children,false)[0].children[0].data}catch(q){}try{k.title=f.getElementsByTagName("title",j.children,false)[0].children[0].data}catch(r){}try{k.link=f.getElementsByTagName("link",j.children,false)[0].attribs.href}catch(s){}try{k.description=f.getElementsByTagName("summary",j.children,false)[0].children[0].data}catch(t){}try{k.pubDate=new Date(f.getElementsByTagName("updated",j.children, false)[0].children[0].data)}catch(u){}a.items.push(k)})}this.dom=a}n.super_.prototype.done.call(this)};i._emptyTags={area:1,base:1,basefont:1,br:1,col:1,frame:1,hr:1,img:1,input:1,isindex:1,link:1,meta:1,param:1,embed:1};i.reWhitespace=/^\s*$/;i.prototype.dom=null;i.prototype.reset=function(){this.dom=[];this._done=false;this._tagStack=[];this._tagStack.last=function(){return this.length?this[this.length-1]:null}};i.prototype.done=function(){this._done=true;this.handleCallback(null)};i.prototype.writeTag= function(a){this.handleElement(a)};i.prototype.writeText=function(a){if(this._options.ignoreWhitespace)if(i.reWhitespace.test(a.data))return;this.handleElement(a)};i.prototype.writeComment=function(a){this.handleElement(a)};i.prototype.writeDirective=function(a){this.handleElement(a)};i.prototype.error=function(a){this.handleCallback(a)};i.prototype._options=null;i.prototype._callback=null;i.prototype._done=false;i.prototype._tagStack=null;i.prototype.handleCallback=function(a){if(typeof this._callback!= "function")if(a)throw a;else return;this._callback(a,this.dom)};i.prototype.isEmptyTag=function(a){a=a.name.toLowerCase();if(a.charAt(0)=="/")a=a.substring(1);return this._options.enforceEmptyTags&&!!i._emptyTags[a]};i.prototype.handleElement=function(a){this._done&&this.handleCallback(Error("Writing to the handler after done() called is not allowed without a reset()"));if(!this._options.verbose){delete a.raw;if(a.type=="tag"||a.type=="script"||a.type=="style")delete a.data}if(this._tagStack.last())if(a.type!= d.Text&&a.type!=d.Comment&&a.type!=d.Directive)if(a.name.charAt(0)=="/"){var c=a.name.substring(1);if(!this.isEmptyTag(a)){for(a=this._tagStack.length-1;a>-1&&this._tagStack[a--].name!=c;);if(a>-1||this._tagStack[0].name==c)for(;a<this._tagStack.length-1;)this._tagStack.pop()}}else{if(!this._tagStack.last().children)this._tagStack.last().children=[];this._tagStack.last().children.push(a);this.isEmptyTag(a)||this._tagStack.push(a)}else{if(!this._tagStack.last().children)this._tagStack.last().children= [];this._tagStack.last().children.push(a)}else if(a.type!=d.Text&&a.type!=d.Comment&&a.type!=d.Directive){if(a.name.charAt(0)!="/"){this.dom.push(a);this.isEmptyTag(a)||this._tagStack.push(a)}}else this.dom.push(a)};var f={testElement:function(a,c){if(!c)return false;for(var b in a)if(b=="tag_name"){if(c.type!="tag"&&c.type!="script"&&c.type!="style")return false;if(!a.tag_name(c.name))return false}else if(b=="tag_type"){if(!a.tag_type(c.type))return false}else if(b=="tag_contains"){if(c.type!="text"&& c.type!="comment"&&c.type!="directive")return false;if(!a.tag_contains(c.data))return false}else if(!c.attribs||!a[b](c.attribs[b]))return false;return true},getElements:function(a,c,b,h){function g(o){return function(p){return p==o}}b=b===undefined||b===null||!!b;h=isNaN(parseInt(h))?-1:parseInt(h);if(!c)return[];var l=[],m;for(m in a)if(typeof a[m]!="function")a[m]=g(a[m]);f.testElement(a,c)&&l.push(c);if(h>=0&&l.length>=h)return l;if(b&&c.children)c=c.children;else if(c instanceof Array)c=c;else return l; for(m=0;m<c.length;m++){l=l.concat(f.getElements(a,c[m],b,h));if(h>=0&&l.length>=h)break}return l},getElementById:function(a,c,b){a=f.getElements({id:a},c,b,1);return a.length?a[0]:null},getElementsByTagName:function(a,c,b,h){return f.getElements({tag_name:a},c,b,h)},getElementsByTagType:function(a,c,b,h){return f.getElements({tag_type:a},c,b,h)}};exports.Parser=e;exports.DefaultHandler=i;exports.RssHandler=n;exports.ElementType=d;exports.DomUtils=f})();
\ No newline at end of file
diff --git a/lib/node-htmlparser.js b/lib/node-htmlparser.js
deleted file mode 100644
index 1fc03ea..0000000
--- a/lib/node-htmlparser.js
+++ /dev/null
@@ -1,6 +0,0 @@
-var htmlparser = require("./htmlparser");
-exports.Parser = htmlparser.Parser;
-exports.DefaultHandler = htmlparser.DefaultHandler;
-exports.RssHandler = htmlparser.RssHandler;
-exports.ElementType = htmlparser.ElementType;
-exports.DomUtils = htmlparser.DomUtils;
diff --git a/lib/node-htmlparser.min.js b/lib/node-htmlparser.min.js
deleted file mode 100644
index 27d5eea..0000000
--- a/lib/node-htmlparser.min.js
+++ /dev/null
@@ -1,6 +0,0 @@
-var htmlparser = require("./htmlparser.min");
-exports.Parser = htmlparser.Parser;
-exports.DefaultHandler = htmlparser.DefaultHandler;
-exports.RssHandler = htmlparser.RssHandler;
-exports.ElementType = htmlparser.ElementType;
-exports.DomUtils = htmlparser.DomUtils;
diff --git a/profile.js b/profile.js
deleted file mode 100644
index f9d0ef2..0000000
--- a/profile.js
+++ /dev/null
@@ -1,63 +0,0 @@
-//node --prof --prof_auto profile.js
-//deps/v8/tools/mac-tick-processor v8.log
-var sys = require("sys");
-var fs = require("fs");
-var http = require("http");
-var htmlparser = require("./lib/htmlparser");
-//var libxml = require('./libxmljs');
-
-var testNHP = true; //Should node-htmlparser be exercised?
-var testLXJS = false; //Should libxmljs be exercised?
-var testIterations = 100; //Number of test loops to run
-
-var testHost = "localhost"; //Host to fetch test HTML from
-var testPort = 80; //Port on host to fetch test HTML from
-var testPath = "/~chris/feed.xml"; //Path on host to fetch HTML from
-
-function getMillisecs () {
-	return((new Date()).getTime());
-}
-
-function timeExecutions (loops, func) {
-	var start = getMillisecs();
-
-	while (loops--)
-		func();
-
-	return(getMillisecs() - start);
-}
-
-var html = "";
-http.createClient(testPort, testHost)
-	.request("GET", testPath, { host: testHost })
-	.addListener("response", function (response) {
-		if (response.statusCode == "200") {
-			response.setEncoding("utf8");
-			response.addListener("data", function (chunk) {
-				html += chunk;
-			}).addListener("end", function() {
-				var timeNodeHtmlParser = !testNHP ? 0 : timeExecutions(testIterations, function () {
-					var handler = new htmlparser.DefaultHandler(function(err, dom) {
-						if (err)
-							sys.debug("Error: " + err);
-					});
-					var parser = new htmlparser.Parser(handler, { includeLocation: true });
-					parser.parseComplete(html);
-				})
-				
-				var timeLibXmlJs = !testLXJS ? 0 : timeExecutions(testIterations, function () {
-					var dom = libxml.parseHtmlString(html);
-				})
-
-				if (testNHP)
-					sys.debug("NodeHtmlParser: "  + timeNodeHtmlParser);
-				if (testLXJS)
-					sys.debug("LibXmlJs: "  + timeLibXmlJs);
-				if (testNHP && testLXJS)
-					sys.debug("Difference: " + ((timeNodeHtmlParser - timeLibXmlJs) / timeLibXmlJs) * 100);
-			});
-		}
-		else
-			sys.debug("Error: got response status " + response.statusCode);
-	})
-	.end();
diff --git a/runtests.min.html b/runtests.min.html
deleted file mode 100644
index 73ea4c7..0000000
--- a/runtests.min.html
+++ /dev/null
@@ -1,108 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-	<head>
-		<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-		<title>Node.js HTML Parser</title>
-		<style type="text/css">
-			.good {
-				color: #363;
-			}
-			.bad {
-				color: #633;
-				font-style: italic;
-			}
-		</style>
-		<script language="JavaScript">
-			if ((typeof JSON) != "object") {
-				var head = document.getElementsByTagName("head")[0];
-				var script = document.createElement('script');
-				script.type = "text/javascript";
-				script.src = "json2.js";
-				head.insertBefore(script, head.firstChild)
-			}
-		</script>
-		<script language="JavaScript" src="lib/htmlparser.min.js"></script>
-		<script language="JavaScript" src="tests/01-basic.js"></script>
-		<script language="JavaScript" src="tests/02-single_tag_1.js"></script>
-		<script language="JavaScript" src="tests/03-single_tag_2.js"></script>
-		<script language="JavaScript" src="tests/04-unescaped_in_script.js"></script>
-		<script language="JavaScript" src="tests/05-tags_in_comment.js"></script>
-		<script language="JavaScript" src="tests/06-comment_in_script.js"></script>
-		<script language="JavaScript" src="tests/07-unescaped_in_style.js"></script>
-		<script language="JavaScript" src="tests/08-extra_spaces_in_tag.js"></script>
-		<script language="JavaScript" src="tests/09-unquoted_attrib.js"></script>
-		<script language="JavaScript" src="tests/10-singular_attribute.js"></script>
-		<script language="JavaScript" src="tests/11-text_outside_tags.js"></script>
-		<script language="JavaScript" src="tests/12-text_only.js"></script>
-		<script language="JavaScript" src="tests/13-comment_in_text.js"></script>
-		<script language="JavaScript" src="tests/14-comment_in_text_in_script.js"></script>
-		<script language="JavaScript" src="tests/15-non-verbose.js"></script>
-		<script language="JavaScript" src="tests/16-ignore_whitespace.js"></script>
-		<script language="JavaScript" src="tests/17-xml_namespace.js"></script>
-		<script language="JavaScript" src="tests/18-enforce_empty_tags.js"></script>
-		<script language="JavaScript" src="tests/19-ignore_empty_tags.js"></script>
-		<script language="JavaScript" src="tests/20-rss.js"></script>
-		<script language="JavaScript" src="tests/21-atom.js"></script>
-		<script language="JavaScript" src="tests/22-position_data.js"></script>
-		<!-- //TODO: dynamic loading of test files -->
-	</head>
-	<body style="font-size: small; font-family:Arial, Helvetica, sans-serif;">
-
-		<script language="JavaScript">
-			var chunkSize = 5;
-			var testCount = 0;
-			var failedCount = 0;
-			while (Tautologistics.NodeHtmlParser.Tests.length) {
-				testCount++;
-				var test = Tautologistics.NodeHtmlParser.Tests.shift();
-				try {
-					var handlerCallback = function handlerCallback (error) {
-						if (error)
-							document.write("<hr>Handler error: " + error + "<hr>");
-					}
-					var handler = (test.type == "rss") ?
-						new Tautologistics.NodeHtmlParser.RssHandler(handlerCallback, test.options.handler)
-						:
-						new Tautologistics.NodeHtmlParser.DefaultHandler(handlerCallback, test.options.handler)
-						;
-					var parser = new Tautologistics.NodeHtmlParser.Parser(handler, test.options.parser);
-					document.write("<b>" + test.name + "</b>: ");
-					parser.parseComplete(test.html);
-					var resultComplete = handler.dom;
-					var chunkPos = 0;
-					parser.reset();
-					while (chunkPos < test.html.length) {
-						parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
-						chunkPos += chunkSize;
-					}
-					parser.done();
-					var resultChunk = handler.dom;
-					var testResult =
-						JSON.stringify(resultComplete).toString() === JSON.stringify(test.expected).toString()
-						&&
-						JSON.stringify(resultChunk).toString() === JSON.stringify(test.expected).toString()
-						;
-					document.write(testResult ? "<font class='good'>passed</font>" : "<font class='bad'>FAILED</font>");
-					if (!testResult) {
-						failedCount++;
-						document.write("<pre>");
-						document.write("<b>Complete</b>\n");
-						document.write(JSON.stringify(resultComplete, null, 2));
-						document.write("<b>Chunked</b>\n");
-						document.write(JSON.stringify(resultChunk, null, 2));
-						document.write("<h2>Expected</h2>\n");
-						document.write(JSON.stringify(test.expected, null, 2));
-						document.write("</pre>");
-					}
-				} catch (ex) {
-					document.write("<h1>Exception occured during test: " + ex + "</h1>")
-				}
-				document.write("<br>");
-			}
-			document.write("<hr>");
-			document.write("Total tests: " + testCount + "<br>");
-			document.write("Failed tests: " + failedCount + "<br>");
-		</script>
-	
-	</body>
-</html>
\ No newline at end of file
diff --git a/runtests.min.js b/runtests.min.js
deleted file mode 100644
index df33736..0000000
--- a/runtests.min.js
+++ /dev/null
@@ -1,75 +0,0 @@
-/***********************************************
-Copyright 2010, Chris Winberry <chris@winberry.net>. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to
-deal in the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
- 
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
- 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-IN THE SOFTWARE.
-***********************************************/
-
-var sys = require("sys");
-var fs = require("fs");
-var htmlparser = require("./lib/htmlparser.min");
-
-var testFolder = "./tests";
-var chunkSize = 5;
-
-var testFiles = fs.readdirSync(testFolder);
-var testCount = 0;
-var failedCount = 0;
-for (var i in testFiles) {
-	testCount++;
-	var fileParts = testFiles[i].split(".");
-	fileParts.pop();
-	var moduleName = fileParts.join(".");
-	var test = require(testFolder + "/" + moduleName);
-	var handlerCallback = function handlerCallback (error) {
-		if (error)
-			sys.puts("Handler error: " + error);
-	}
-	var handler = (test.type == "rss") ?
-		new htmlparser.RssHandler(handlerCallback, test.options.handler)
-		:
-		new htmlparser.DefaultHandler(handlerCallback, test.options.handler)
-		;
-	var parser = new htmlparser.Parser(handler, test.options.parser);
-	parser.parseComplete(test.html);
-	var resultComplete = handler.dom;
-	var chunkPos = 0;
-	parser.reset();
-	while (chunkPos < test.html.length) {
-		parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
-		chunkPos += chunkSize;
-	}
-	parser.done();
-	var resultChunk = handler.dom;
-	var testResult =
-		sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
-		&&
-		sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null)
-		;
-	sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED"));
-	if (!testResult) {
-		failedCount++;
-		sys.puts("== Complete ==");
-		sys.puts(sys.inspect(resultComplete, false, null));
-		sys.puts("== Chunked ==");
-		sys.puts(sys.inspect(resultChunk, false, null));
-		sys.puts("== Expected ==");
-		sys.puts(sys.inspect(test.expected, false, null));
-	}
-}
-sys.puts("Total tests: " + testCount);
-sys.puts("Failed tests: " + failedCount);
diff --git a/snippet.js b/snippet.js
deleted file mode 100644
index 9448ea3..0000000
--- a/snippet.js
+++ /dev/null
@@ -1,15 +0,0 @@
-//node --prof --prof_auto profile.js
-//deps/v8/tools/mac-tick-processor v8.log
-var sys = require("sys");
-var htmlparser = require("./htmlparser");
-
-var html = "<link>text</link>";
-
-var handler = new htmlparser.DefaultHandler(function(err, dom) {
-	if (err)
-		sys.debug("Error: " + err);
-	else
-		sys.debug(sys.inspect(dom, false, null));
-}, { enforceEmptyTags: true });
-var parser = new htmlparser.Parser(handler);
-parser.parseComplete(html);
diff --git a/utils_example.js b/utils_example.js
deleted file mode 100644
index d219de5..0000000
--- a/utils_example.js
+++ /dev/null
@@ -1,35 +0,0 @@
-//node --prof --prof_auto profile.js
-//deps/v8/tools/mac-tick-processor v8.log
-var sys = require("sys");
-var htmlparser = require("./lib/htmlparser");
-
-var html = "<a>text a</a><b id='x'>text b</b><c class='y'>text c</c><d id='z' class='w'><e>text e</e></d><g class='g h i'>hhh</g><yy>hellow</yy><yy id='secondyy'>world</yy>";
-
-var handler = new htmlparser.DefaultHandler(function(err, dom) {
-	if (err) {
-		sys.debug("Error: " + err);
-	}
-	else {
-		sys.debug(sys.inspect(dom, false, null));
-		var id = htmlparser.DomUtils.getElementById("x", dom);
-		sys.debug("id: " + sys.inspect(id, false, null));
-		var class = htmlparser.DomUtils.getElements({ class: "y" }, dom);
-		sys.debug("class: " + sys.inspect(class, false, null));
-		var multiclass = htmlparser.DomUtils.getElements({ class: function (value) { return(value && value.indexOf("h") > -1); } }, dom);
-		sys.debug("multiclass: " + sys.inspect(multiclass, false, null));
-		var name = htmlparser.DomUtils.getElementsByTagName("a", dom);
-		sys.debug("name: " + sys.inspect(name, false, null));
-		var text = htmlparser.DomUtils.getElementsByTagType("text", dom);
-		sys.debug("text: " + sys.inspect(text, false, null));
-		var nested = htmlparser.DomUtils.getElements({ tag_name: "d", id: "z", class: "w" }, dom);
-		nested = htmlparser.DomUtils.getElementsByTagName("e", nested);
-		nested = htmlparser.DomUtils.getElementsByTagType("text", nested);
-		sys.debug("nested: " + sys.inspect(nested, false, null));
-		var double = htmlparser.DomUtils.getElementsByTagName("yy", dom);
-		sys.debug("double: " + sys.inspect(double, false, null));
-		var single = htmlparser.DomUtils.getElements( { tag_name: "yy", id: "secondyy" }, dom);
-		sys.debug("single: " + sys.inspect(single, false, null));
-	}
-}, { verbose: false });
-var parser = new htmlparser.Parser(handler);
-parser.parseComplete(html);

From 01fb1badef8c2be003211903e8349c7e6404c478 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 27 Aug 2011 15:10:14 +0200
Subject: [PATCH 002/450] didn't export constructors

---
 lib/DefaultHandler.js |  12 +++--
 lib/DomUtils.js       |   2 +-
 lib/ElementType.js    |   2 +-
 lib/Parser.js         |   4 +-
 lib/RssHandler.js     |   4 +-
 runtests.html         | 108 ------------------------------------------
 runtests.js           |   1 +
 7 files changed, 16 insertions(+), 117 deletions(-)
 delete mode 100644 runtests.html

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 1536f6e..5b06fc2 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -18,7 +18,7 @@ function DefaultHandler (callback, options) {
 //**"Static"**//
 //HTML Tags that shouldn't contain child nodes
 DefaultHandler._emptyTags = {
-		area: 1
+	area: 1
 	, base: 1
 	, basefont: 1
 	, br: 1
@@ -34,7 +34,7 @@ DefaultHandler._emptyTags = {
 	, embed: 1
 };
 //Regex to detect whitespace only text nodes
-DefaultHandler.reWhitespace = /^\s*$/;
+var reWhitespace = /^\s*$/;
 
 //**Public**//
 //Methods//
@@ -53,8 +53,8 @@ DefaultHandler.prototype.done = function() {
 	this.handleCallback(null);
 };
 DefaultHandler.prototype.writeText = function(element) {
-	if (this._options.ignoreWhitespace)
-		if (DefaultHandler.reWhitespace.test(element.data))
+	if(this._options.ignoreWhitespace)
+		if(reWhitespace.test(element.data))
 			return;
 	this.handleElement(element);
 };
@@ -132,4 +132,6 @@ DefaultHandler.prototype.handleElement = function(element) {
 			this._tagStack.last().children.push(element);
 		}
 	}
-};
\ No newline at end of file
+};
+
+exports = DefaultHandler;
\ No newline at end of file
diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index f930ba0..a7420f6 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -1,4 +1,4 @@
-var DomUtils = {
+exports = {
 		testElement: function(options, element) {
 		if (!element) {
 			return false;
diff --git a/lib/ElementType.js b/lib/ElementType.js
index 09d3d9f..96c89fa 100644
--- a/lib/ElementType.js
+++ b/lib/ElementType.js
@@ -1,5 +1,5 @@
 //Types of elements found in the DOM
-var ElementType = {
+exports = {
 	Text: "text" //Plain text
 	, Directive: "directive" //Special tag <!...>
 	, Comment: "comment" //Special tag <!--...-->
diff --git a/lib/Parser.js b/lib/Parser.js
index ecb7be3..2e818f1 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -394,4 +394,6 @@ Parser.prototype.handleError = function(error) {
 	if ((typeof this._handler.error) === "function")
 		this._handler.error(error);
 	else throw error;
-};
\ No newline at end of file
+};
+
+exports = Parser;
\ No newline at end of file
diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index caf7f83..259b52e 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -109,4 +109,6 @@ RssHandler.prototype.done = function() {
 		this.dom = feed;
 	}
 	RssHandler.super_.prototype.done.call(this);
-};
\ No newline at end of file
+};
+
+exports = RssHandler;
\ No newline at end of file
diff --git a/runtests.html b/runtests.html
deleted file mode 100644
index e89702d..0000000
--- a/runtests.html
+++ /dev/null
@@ -1,108 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html>
-	<head>
-		<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-		<title>Node.js HTML Parser</title>
-		<style type="text/css">
-			.good {
-				color: #363;
-			}
-			.bad {
-				color: #633;
-				font-style: italic;
-			}
-		</style>
-		<script language="JavaScript">
-			if ((typeof JSON) != "object") {
-				var head = document.getElementsByTagName("head")[0];
-				var script = document.createElement('script');
-				script.type = "text/javascript";
-				script.src = "json2.js";
-				head.insertBefore(script, head.firstChild)
-			}
-		</script>
-		<script language="JavaScript" src="lib/htmlparser.js"></script>
-		<script language="JavaScript" src="tests/01-basic.js"></script>
-		<script language="JavaScript" src="tests/02-single_tag_1.js"></script>
-		<script language="JavaScript" src="tests/03-single_tag_2.js"></script>
-		<script language="JavaScript" src="tests/04-unescaped_in_script.js"></script>
-		<script language="JavaScript" src="tests/05-tags_in_comment.js"></script>
-		<script language="JavaScript" src="tests/06-comment_in_script.js"></script>
-		<script language="JavaScript" src="tests/07-unescaped_in_style.js"></script>
-		<script language="JavaScript" src="tests/08-extra_spaces_in_tag.js"></script>
-		<script language="JavaScript" src="tests/09-unquoted_attrib.js"></script>
-		<script language="JavaScript" src="tests/10-singular_attribute.js"></script>
-		<script language="JavaScript" src="tests/11-text_outside_tags.js"></script>
-		<script language="JavaScript" src="tests/12-text_only.js"></script>
-		<script language="JavaScript" src="tests/13-comment_in_text.js"></script>
-		<script language="JavaScript" src="tests/14-comment_in_text_in_script.js"></script>
-		<script language="JavaScript" src="tests/15-non-verbose.js"></script>
-		<script language="JavaScript" src="tests/16-ignore_whitespace.js"></script>
-		<script language="JavaScript" src="tests/17-xml_namespace.js"></script>
-		<script language="JavaScript" src="tests/18-enforce_empty_tags.js"></script>
-		<script language="JavaScript" src="tests/19-ignore_empty_tags.js"></script>
-		<script language="JavaScript" src="tests/20-rss.js"></script>
-		<script language="JavaScript" src="tests/21-atom.js"></script>
-		<script language="JavaScript" src="tests/22-position_data.js"></script>
-		<!-- //TODO: dynamic loading of test files -->
-	</head>
-	<body style="font-size: small; font-family:Arial, Helvetica, sans-serif;">
-
-		<script language="JavaScript">
-			var chunkSize = 5;
-			var testCount = 0;
-			var failedCount = 0;
-			while (Tautologistics.NodeHtmlParser.Tests.length) {
-				testCount++;
-				var test = Tautologistics.NodeHtmlParser.Tests.shift();
-				try {
-					var handlerCallback = function handlerCallback (error) {
-						if (error)
-							document.write("<hr>Handler error: " + error + "<hr>");
-					}
-					var handler = (test.type == "rss") ?
-						new Tautologistics.NodeHtmlParser.RssHandler(handlerCallback, test.options.handler)
-						:
-						new Tautologistics.NodeHtmlParser.DefaultHandler(handlerCallback, test.options.handler)
-						;
-					var parser = new Tautologistics.NodeHtmlParser.Parser(handler, test.options.parser);
-					document.write("<b>" + test.name + "</b>: ");
-					parser.parseComplete(test.html);
-					var resultComplete = handler.dom;
-					var chunkPos = 0;
-					parser.reset();
-					while (chunkPos < test.html.length) {
-						parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
-						chunkPos += chunkSize;
-					}
-					parser.done();
-					var resultChunk = handler.dom;
-					var testResult =
-						JSON.stringify(resultComplete).toString() === JSON.stringify(test.expected).toString()
-						&&
-						JSON.stringify(resultChunk).toString() === JSON.stringify(test.expected).toString()
-						;
-					document.write(testResult ? "<font class='good'>passed</font>" : "<font class='bad'>FAILED</font>");
-					if (!testResult) {
-						failedCount++;
-						document.write("<pre>");
-						document.write("<b>Complete</b>\n");
-						document.write(JSON.stringify(resultComplete, null, 2));
-						document.write("<b>Chunked</b>\n");
-						document.write(JSON.stringify(resultChunk, null, 2));
-						document.write("<h2>Expected</h2>\n");
-						document.write(JSON.stringify(test.expected, null, 2));
-						document.write("</pre>");
-					}
-				} catch (ex) {
-					document.write("<h1>Exception occured during test: " + ex + "</h1>")
-				}
-				document.write("<br>");
-			}
-			document.write("<hr>");
-			document.write("Total tests: " + testCount + "<br>");
-			document.write("Failed tests: " + failedCount + "<br>");
-		</script>
-	
-	</body>
-</html>
\ No newline at end of file
diff --git a/runtests.js b/runtests.js
index e906fe4..3ddcd9e 100644
--- a/runtests.js
+++ b/runtests.js
@@ -39,6 +39,7 @@ for (var i in testFiles) {
 		if (error)
 			sys.puts("Handler error: " + error);
 	}
+	console.log(testFiles[i]);
 	var handler = (test.type == "rss") ?
 		new htmlparser.RssHandler(handlerCallback, test.options.handler)
 		:

From b92ecd7c09575f262a8d10be2d807a83e3750121 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 27 Aug 2011 15:12:22 +0200
Subject: [PATCH 003/450] Now all tests pass

---
 lib/DefaultHandler.js | 2 +-
 lib/DomUtils.js       | 4 +++-
 lib/ElementType.js    | 2 +-
 lib/Parser.js         | 2 +-
 lib/RssHandler.js     | 2 +-
 5 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 5b06fc2..3f79c70 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -134,4 +134,4 @@ DefaultHandler.prototype.handleElement = function(element) {
 	}
 };
 
-exports = DefaultHandler;
\ No newline at end of file
+module.exports = DefaultHandler;
\ No newline at end of file
diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index a7420f6..f7a6f3e 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -1,4 +1,4 @@
-exports = {
+var DomUtils = {
 		testElement: function(options, element) {
 		if (!element) {
 			return false;
@@ -92,3 +92,5 @@ exports = {
 		return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit));
 	}
 };
+
+module.exports = DomUtils;
\ No newline at end of file
diff --git a/lib/ElementType.js b/lib/ElementType.js
index 96c89fa..c112c91 100644
--- a/lib/ElementType.js
+++ b/lib/ElementType.js
@@ -1,5 +1,5 @@
 //Types of elements found in the DOM
-exports = {
+module.exports = {
 	Text: "text" //Plain text
 	, Directive: "directive" //Special tag <!...>
 	, Comment: "comment" //Special tag <!--...-->
diff --git a/lib/Parser.js b/lib/Parser.js
index 2e818f1..b74bc2c 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -396,4 +396,4 @@ Parser.prototype.handleError = function(error) {
 	else throw error;
 };
 
-exports = Parser;
\ No newline at end of file
+module.exports = Parser;
\ No newline at end of file
diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index 259b52e..e14ad58 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -111,4 +111,4 @@ RssHandler.prototype.done = function() {
 	RssHandler.super_.prototype.done.call(this);
 };
 
-exports = RssHandler;
\ No newline at end of file
+module.exports = RssHandler;
\ No newline at end of file

From 31bcb1213f6b614f33e4e7961dcb2c0f88607a67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 27 Aug 2011 15:14:07 +0200
Subject: [PATCH 004/450] moved runtests.js to tests-directory

---
 runtests.js => tests/00-runtests.js | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename runtests.js => tests/00-runtests.js (100%)

diff --git a/runtests.js b/tests/00-runtests.js
similarity index 100%
rename from runtests.js
rename to tests/00-runtests.js

From c11def8225e4985e2c45083c3e233b34dfc1c4e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 27 Aug 2011 15:29:46 +0200
Subject: [PATCH 005/450] fixed tests

---
 tests/00-runtests.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 3ddcd9e..b879227 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -21,15 +21,15 @@ IN THE SOFTWARE.
 
 var sys = require("sys");
 var fs = require("fs");
-var htmlparser = require("./lib/htmlparser");
+var htmlparser = require("../lib/htmlparser");
 
-var testFolder = "./tests";
+var testFolder = ".";
 var chunkSize = 5;
 
 var testFiles = fs.readdirSync(testFolder);
 var testCount = 0;
 var failedCount = 0;
-for (var i in testFiles) {
+for (var i = 1; i < testFiles.length; i++) {
 	testCount++;
 	var fileParts = testFiles[i].split(".");
 	fileParts.pop();

From 2119bde08611a84cb8326b00ce43d4e5a198bbc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 27 Aug 2011 15:30:47 +0200
Subject: [PATCH 006/450] moved "last()"-method from _callStack to
 DefaultHandlers prototype

---
 lib/DefaultHandler.js | 55 ++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 3f79c70..8bfdfb0 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -15,23 +15,27 @@ function DefaultHandler (callback, options) {
 		this._callback = callback;
 }
 
-//**"Static"**//
+DefaultHandler.prototype._lastTag = function() {
+	var stack = this._tagStack;
+	return(stack.length ? stack[stack.length - 1] : null);
+};
+
 //HTML Tags that shouldn't contain child nodes
-DefaultHandler._emptyTags = {
-	area: 1
-	, base: 1
-	, basefont: 1
-	, br: 1
-	, col: 1
-	, frame: 1
-	, hr: 1
-	, img: 1
-	, input: 1
-	, isindex: 1
-	, link: 1
-	, meta: 1
-	, param: 1
-	, embed: 1
+var _emptyTags = {
+	area: true
+	, base: true
+	, basefont: true
+	, br: true
+	, col: true
+	, frame: true
+	, hr: true
+	, img: true
+	, input: true
+	, isindex: true
+	, link: true
+	, meta: true
+	, param: true
+	, embed: true
 };
 //Regex to detect whitespace only text nodes
 var reWhitespace = /^\s*$/;
@@ -43,9 +47,6 @@ DefaultHandler.prototype.reset = function() {
 	this.dom = [];
 	this._done = false;
 	this._tagStack = [];
-	this._tagStack.last = function() {
-		return(this.length ? this[this.length - 1] : null);
-	};
 };
 //Signals the handler that parsing is done
 DefaultHandler.prototype.done = function() {
@@ -75,7 +76,7 @@ DefaultHandler.prototype.isEmptyTag = function(element) {
 	if (name.charAt(0) === '/') {
 		name = name.substring(1);
 	}
-	return this._options.enforceEmptyTags && !!DefaultHandler._emptyTags[name];
+	return this._options.enforceEmptyTags && _emptyTags[name];
 };
 
 DefaultHandler.prototype.writeTag = DefaultHandler.prototype.writeDirective = DefaultHandler.prototype.writeComment =
@@ -89,7 +90,7 @@ DefaultHandler.prototype.handleElement = function(element) {
 		if (element.type === "tag" || element.type === "script" || element.type === "style")
 			delete element.data;
 	}
-	if (!this._tagStack.last()) { //There are no parent elements
+	if (!this._lastTag()) { //There are no parent elements
 		//If the element can be a container, add it to the tag stack and the top level list
 		if (element.type !== ElementType.Text && element.type !== ElementType.Comment && element.type !== ElementType.Directive) {
 			if (element.name.charAt(0) !== "/") { //Ignore closing tags that obviously don't have an opening tag
@@ -119,17 +120,17 @@ DefaultHandler.prototype.handleElement = function(element) {
 				}
 			}
 			else { //This is not a closing tag
-				if (!this._tagStack.last().children)
-					this._tagStack.last().children = [];
-				this._tagStack.last().children.push(element);
+				if (!this._lastTag().children)
+					this._lastTag().children = [];
+				this._lastTag().children.push(element);
 				if (!this.isEmptyTag(element)) //Don't add tags to the tag stack that can't have children
 					this._tagStack.push(element);
 			}
 		}
 		else { //This is not a container element
-			if (!this._tagStack.last().children)
-				this._tagStack.last().children = [];
-			this._tagStack.last().children.push(element);
+			if (!this._lastTag().children)
+				this._lastTag().children = [];
+			this._lastTag().children.push(element);
 		}
 	}
 };

From 4a4110b431b2046c1d298f5973c2a2aa3ae642d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 27 Aug 2011 15:54:37 +0200
Subject: [PATCH 007/450] removed repeating code in RssHandler

---
 lib/RssHandler.js | 111 ++++++++++++++++++++--------------------------
 1 file changed, 48 insertions(+), 63 deletions(-)

diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index e14ad58..7273c10 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -16,9 +16,15 @@ function inherits (ctor, superCtor) {
 
 inherits(RssHandler, DefaultHandler);
 
+function fetch(what, where, recurse){
+	try{ return DomUtils.getElementsByTagName(what, where, !!recurse)[0].children[0].data;
+	} catch(e){return false;}
+}
+
 RssHandler.prototype.done = function() {
 	var feed = { };
 	var feedRoot;
+	var tmp;
 
 	var found = DomUtils.getElementsByTagName(function (value) { return(value === "rss" || value === "feed"); }, this.dom, false);
 	if (found.length) {
@@ -29,79 +35,58 @@ RssHandler.prototype.done = function() {
 			feed.type = "rss";
 			feedRoot = feedRoot.children[0]; //<channel/>
 			feed.id = "";
-			try {
-				feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
-			} catch (ex) { }
-			try {
-				feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].children[0].data;
-			} catch (ex) { }
-			try {
-				feed.description = DomUtils.getElementsByTagName("description", feedRoot.children, false)[0].children[0].data;
-			} catch (ex) { }
-			try {
-				feed.updated = new Date(DomUtils.getElementsByTagName("lastBuildDate", feedRoot.children, false)[0].children[0].data);
-			} catch (ex) { }
-			try {
-				feed.author = DomUtils.getElementsByTagName("managingEditor", feedRoot.children, false)[0].children[0].data;
-			} catch (ex) { }
+			if(tmp = fetch("title", feedRoot.children))
+				feed.title = tmp;
+			if(tmp = fetch("link", feedRoot.children))
+				feed.link = tmp;
+			if(tmp = fetch("description", feedRoot.children))
+				feed.description = tmp;
+			if(tmp = fetch("lastBuildDate", feedRoot.children))
+				feed.updated = new Date(tmp);
+			if(tmp = fetch("managingEditor", feedRoot.children))
+				feed.author = tmp;
 			feed.items = [];
 			DomUtils.getElementsByTagName("item", feedRoot.children).forEach(function (item, index, list) {
 				var entry = {};
-				try {
-					entry.id = DomUtils.getElementsByTagName("guid", item.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					entry.description = DomUtils.getElementsByTagName("description", item.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					entry.pubDate = new Date(DomUtils.getElementsByTagName("pubDate", item.children, false)[0].children[0].data);
-				} catch (ex) { }
+				if(tmp = fetch("guid", item.children))
+					entry.id = tmp;
+				if(tmp = fetch("title", item.children))
+					entry.title = tmp;
+				if(tmp = fetch("link", item.children))
+					entry.link = tmp;
+				if(tmp = fetch("description", item.children))
+					entry.description = tmp;
+				if(tmp = fetch("pubDate", item.children))
+					entry.pubDate = new Date(tmp);
 				feed.items.push(entry);
 			});
 		} else {
 			feed.type = "atom";
-			try {
-				feed.id = DomUtils.getElementsByTagName("id", feedRoot.children, false)[0].children[0].data;
-			} catch (ex) { }
-			try {
-				feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data;
-			} catch (ex) { }
-			try {
-				feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attribs.href;
-			} catch (ex) { }
-			try {
-				feed.description = DomUtils.getElementsByTagName("subtitle", feedRoot.children, false)[0].children[0].data;
-			} catch (ex) { }
-			try {
-				feed.updated = new Date(DomUtils.getElementsByTagName("updated", feedRoot.children, false)[0].children[0].data);
-			} catch (ex) { }
-			try {
-				feed.author = DomUtils.getElementsByTagName("email", feedRoot.children, true)[0].children[0].data;
-			} catch (ex) { }
+			if(tmp = fetch("id", feedRoot.children))
+				feed.id = tmp;
+			if(tmp = fetch("title", feedRoot.children))
+				feed.title = tmp;
+			try{ feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attribs.href;
+			}catch (ex){}
+			if(tmp = fetch("subtitle", feedRoot.children))
+				feed.description = tmp;
+			if(tmp = fetch("updated", feedRoot.children))
+				feed.updated = new Date(tmp);
+			if(tmp = fetch("email", feedRoot.children, true))
+				feed.author = tmp;
 			feed.items = [];
 			DomUtils.getElementsByTagName("entry", feedRoot.children).forEach(function (item, index, list) {
 				var entry = {};
-				try {
-					entry.id = DomUtils.getElementsByTagName("id", item.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].attribs.href;
-				} catch (ex) { }
-				try {
-					entry.description = DomUtils.getElementsByTagName("summary", item.children, false)[0].children[0].data;
-				} catch (ex) { }
-				try {
-					entry.pubDate = new Date(DomUtils.getElementsByTagName("updated", item.children, false)[0].children[0].data);
-				} catch (ex) { }
+				if(tmp = fetch("id", item.children))
+					entry.id = tmp;
+				if(tmp = fetch("title", item.children))
+					entry.title = tmp;
+				try { entry.link = DomUtils.getElementsByTagName("link", item.children)[0].attribs.href;
+				} catch(ex){}
+				if(tmp = fetch("summary", item.children))
+					entry.description = tmp;
+				if(tmp = fetch("updated", item.children))
+					entry.pubDate = new Date(tmp);
 				feed.items.push(entry);
 			});
 		}

From 91a6a86f12b33b9db867fe4891c6e99a70b81b64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 27 Aug 2011 16:02:51 +0200
Subject: [PATCH 008/450] again some cleanup

---
 lib/RssHandler.js | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index 7273c10..00edbf0 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -16,9 +16,15 @@ function inherits (ctor, superCtor) {
 
 inherits(RssHandler, DefaultHandler);
 
+function getElements(what, where, one, recurse){
+	var ret = DomUtils.getElementsByTagName(what, where, !!recurse);
+	if(one) try{ return ret[0]; } catch(e){return false;}
+	else return ret;
+}
 function fetch(what, where, recurse){
-	try{ return DomUtils.getElementsByTagName(what, where, !!recurse)[0].children[0].data;
-	} catch(e){return false;}
+	var ret = getElements(what, where, true, !!recurse);
+	if(ret) try{ return ret.children[0].data; } catch(e){return false;}
+	else return false;
 }
 
 RssHandler.prototype.done = function() {
@@ -26,7 +32,7 @@ RssHandler.prototype.done = function() {
 	var feedRoot;
 	var tmp;
 
-	var found = DomUtils.getElementsByTagName(function (value) { return(value === "rss" || value === "feed"); }, this.dom, false);
+	var found = getElements(function (value) { return(value === "rss" || value === "feed"); }, this.dom);
 	if (found.length) {
 		feedRoot = found[0];
 	}
@@ -46,7 +52,7 @@ RssHandler.prototype.done = function() {
 			if(tmp = fetch("managingEditor", feedRoot.children))
 				feed.author = tmp;
 			feed.items = [];
-			DomUtils.getElementsByTagName("item", feedRoot.children).forEach(function (item, index, list) {
+			getElements("item", feedRoot.children).forEach(function (item, index, list) {
 				var entry = {};
 				if(tmp = fetch("guid", item.children))
 					entry.id = tmp;
@@ -66,7 +72,7 @@ RssHandler.prototype.done = function() {
 				feed.id = tmp;
 			if(tmp = fetch("title", feedRoot.children))
 				feed.title = tmp;
-			try{ feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attribs.href;
+			try{ feed.link = getElements("link", feedRoot.children, true).attribs.href;
 			}catch (ex){}
 			if(tmp = fetch("subtitle", feedRoot.children))
 				feed.description = tmp;
@@ -75,13 +81,13 @@ RssHandler.prototype.done = function() {
 			if(tmp = fetch("email", feedRoot.children, true))
 				feed.author = tmp;
 			feed.items = [];
-			DomUtils.getElementsByTagName("entry", feedRoot.children).forEach(function (item, index, list) {
+			getElements("entry", feedRoot.children).forEach(function (item, index, list) {
 				var entry = {};
 				if(tmp = fetch("id", item.children))
 					entry.id = tmp;
 				if(tmp = fetch("title", item.children))
 					entry.title = tmp;
-				try { entry.link = DomUtils.getElementsByTagName("link", item.children)[0].attribs.href;
+				try { entry.link = getElements("link", item.children, true).attribs.href;
 				} catch(ex){}
 				if(tmp = fetch("summary", item.children))
 					entry.description = tmp;

From 9a8786055c08fe9118140aa03c5280591b8902cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 28 Aug 2011 10:49:26 +0200
Subject: [PATCH 009/450] Added EventedHandler, using an interface like sax.js

---
 lib/DefaultHandler.js |  4 +--
 lib/EventedHandler.js | 79 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 3 deletions(-)
 create mode 100644 lib/EventedHandler.js

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 8bfdfb0..545e688 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -37,8 +37,6 @@ var _emptyTags = {
 	, param: true
 	, embed: true
 };
-//Regex to detect whitespace only text nodes
-var reWhitespace = /^\s*$/;
 
 //**Public**//
 //Methods//
@@ -55,7 +53,7 @@ DefaultHandler.prototype.done = function() {
 };
 DefaultHandler.prototype.writeText = function(element) {
 	if(this._options.ignoreWhitespace)
-		if(reWhitespace.test(element.data))
+		if(element.data.trim() === "")
 			return;
 	this.handleElement(element);
 };
diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
new file mode 100644
index 0000000..d233976
--- /dev/null
+++ b/lib/EventedHandler.js
@@ -0,0 +1,79 @@
+var EventedHandler = function(cbs){
+	//map the handlers to their callbacks
+	this.writeComment = stripData(cbs.oncomment);
+	this.writeDirective = stripData(cbs.onprocessinginstruction);
+	this.writeText = stripData(cbs.ontext);
+	this.done = cbs.onend || emptyFunction;
+	
+	//if someone wants to listen to that
+	this.reset = cbs.onreset || emptyFunction;
+	this.error = cbs.onerror; //if nothing was set, the error is thrown
+	
+	//functions to be called within writeTag
+	this.onOpenTag = openTagCB(cbs.onopentag, cbs.onattribute);
+	this.onCloseTag = cbs.onclosetag || emptyFunction;
+	
+	//privates
+	this._stack = [];
+};
+
+var emptyFunction = function(){};
+var stripData = function(callback){
+	if(typeof callback !== "function") return emptyFunction;
+	return function(data){
+		callback(data.data);
+	};
+};
+var openTagCB = function(openTag, attribute){
+	function open(name, attributes){ openTag({name:name, attributes:attributes}); }
+	function attr(name, attributes){ for(var i in attributes) attribute({name:i, value:attributes[i]}); }
+	if(openTag){
+		if(attribute) return function(name, attributes){open(name,attributes); attr(attributes);};
+		else return open;
+	}
+	else if(attribute) return attr;
+		else return emptyFunction;
+};
+
+//HTML Tags that shouldn't contain child nodes
+var emptyTags = {
+	area: true
+	, base: true
+	, basefont: true
+	, br: true
+	, col: true
+	, frame: true
+	, hr: true
+	, img: true
+	, input: true
+	, isindex: true
+	, link: true
+	, meta: true
+	, param: true
+	, embed: true
+};
+
+EventedHandler.prototype.writeTag = function(element){
+	var closing = element.name.charAt(0) === "/",
+		name = closing ? element.name.substring(1) : element.name,
+		attributes = element.attribs || {},
+		empty = emptyTags[name];
+	
+	if(closing){
+		if(!empty){
+			var i = this._stack.length - 1;
+			while(i !== -1 && this._stack[i--].name !== name){}
+			if( (i+=1) !== 0)
+				while(i < this._stack.length) this.onCloseTag(this._stack.pop().name);
+		}
+		else if(name === "br"){ //special case for <br>s
+			this.onOpenTag(name, attributes);
+			this.onCloseTag(name);
+		}
+	}
+	else{
+		this.onOpenTag(name, attributes);
+		if(empty) this.onCloseTag(name);
+		else this._tagStack.push(element);
+	}
+};
\ No newline at end of file

From 38a3502a0cfdfb65ffeb68e0e8fc6fa0eba77e16 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 28 Aug 2011 11:06:18 +0200
Subject: [PATCH 010/450] Some improvements inside the parser. Still very ugly.

---
 lib/Parser.js | 196 +++++++++++++++++++++++++-------------------------
 1 file changed, 97 insertions(+), 99 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index b74bc2c..9375f6f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,8 +1,8 @@
 var ElementType = require("./ElementType.js");
 
-function Parser (handler, options) {
+function Parser (handler, options){
 	this._options = options ? options : { };
-	if (this._options.includeLocation === undefined) {
+	if(this._options.includeLocation === undefined){
 		this._options.includeLocation = false; //Do not track element position in document by default
 	}
 
@@ -28,49 +28,54 @@ function Parser (handler, options) {
 
 //**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
-Parser._reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace
-Parser._reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
-Parser._reWhitespace = /\s/g; //Used to find any whitespace to split on
-Parser._reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
+var _reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace
+var _reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
+var _reWhitespace = /\s/g; //Used to find any whitespace to split on
+var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
 
 //Regular expressions used for parsing (stateful)
-Parser._reAttrib = //Find attributes in a tag
+var _reAttrib = //Find attributes in a tag
 	/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
-Parser._reTags = /[<\>]/g; //Find tag markers
+var _reTags = /[<\>]/g; //Find tag markers
+
+var tagTypes = {};
+tagTypes[ ElementType.Script ] = true;
+tagTypes[ ElementType.Style ] = true;
+tagTypes[ ElementType.Tag ] = true;
 
 //**Public**//
 //Methods//
 //Parses a complete HTML and pushes it to the handler
-Parser.prototype.parseComplete = function(data) {
+Parser.prototype.parseComplete = function(data){
 	this.reset();
 	this.parseChunk(data);
 	this.done();
 };
 
 //Parses a piece of an HTML document
-Parser.prototype.parseChunk = function(data) {
-	if (this._done)
+Parser.prototype.parseChunk = function(data){
+	if(this._done)
 		this.handleError(new Error("Attempted to parse chunk after parsing already done"));
 	this._buffer += data; //FIXME: this can be a bottleneck
 	this.parseTags();
 };
 
 //Tells the parser that the HTML being parsed is complete
-Parser.prototype.done = function() {
-	if (this._done)
+Parser.prototype.done = function(){
+	if(this._done)
 		return;
 	this._done = true;
 
 	//Push any unparsed text into a final element in the element list
-	if (this._buffer.length) {
+	if(this._buffer.length){
 		var rawData = this._buffer;
 		this._buffer = "";
 		var element = {
 				raw: rawData
-			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
+			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(_reTrim, "")
 			, type: this._parseState
 			};
-		if (this._parseState === ElementType.Tag || this._parseState === ElementType.Script || this._parseState === ElementType.Style)
+		if(this._parseState === ElementType.Tag || this._parseState === ElementType.Script || this._parseState === ElementType.Style)
 			element.name = this.parseTagName(element.data);
 		this.parseAttribs(element);
 		this._elements.push(element);
@@ -81,7 +86,7 @@ Parser.prototype.done = function() {
 };
 
 //Resets the parser to a blank state, ready to parse a new HTML document
-Parser.prototype.reset = function() {
+Parser.prototype.reset = function(){
 	this._buffer = "";
 	this._done = false;
 	this._elements = [];
@@ -103,71 +108,65 @@ Parser.prototype.reset = function() {
 //**Private**//
 //Methods//
 //Takes an array of elements and parses any found attributes
-Parser.prototype.parseTagAttribs = function(elements) {
-	var idxEnd = elements.length;
-	var idx = 0;
-
-	while (idx < idxEnd) {
-		var element = elements[idx++];
-		if (element.type === ElementType.Tag || element.type === ElementType.Script || element.type === ElementType.style)
+Parser.prototype.parseTagAttribs = function(elements){
+	for(var i = 0, j = elements.length; i < j; i++){
+		var element = elements[i];
+		if(tagTypes[element.type])
 			this.parseAttribs(element);
 	}
-
+	
 	return(elements);
 };
 
 //Takes an element and adds an "attribs" property for any element attributes found 
-Parser.prototype.parseAttribs = function(element) {
+Parser.prototype.parseAttribs = function(element){
 	//Only parse attributes for tags
-	if (element.type !== ElementType.Script && element.type !== ElementType.Style && element.type !== ElementType.Tag)
-		return;
+	if(!tagTypes[element.type]) return;
 
-	var tagName = element.data.split(Parser._reWhitespace, 1)[0];
+	var tagName = element.data.split(_reWhitespace, 1)[0];
 	var attribRaw = element.data.substring(tagName.length);
-	if (attribRaw.length < 1)
+	if(attribRaw.length < 1)
 		return;
 
 	var match;
-	Parser._reAttrib.lastIndex = 0;
-	while (match = Parser._reAttrib.exec(attribRaw)) {
-		if (element.attribs === undefined)
+	_reAttrib.lastIndex = 0;
+	while (match = _reAttrib.exec(attribRaw)){
+		if(element.attribs === undefined)
 			element.attribs = {};
 
-		if (typeof match[1] === "string" && match[1].length) {
+		if(typeof match[1] === "string" && match[1].length){
 			element.attribs[match[1]] = match[2];
-		} else if (typeof match[3] === "string" && match[3].length) {
+		} else if(typeof match[3] === "string" && match[3].length){
 			element.attribs[match[3].toString()] = match[4].toString();
-		} else if (typeof match[5] === "string" && match[5].length) {
+		} else if(typeof match[5] === "string" && match[5].length){
 			element.attribs[match[5]] = match[6];
-		} else if (typeof match[7] === "string" && match[7].length) {
+		} else if(typeof match[7] === "string" && match[7].length){
 			element.attribs[match[7]] = match[7];
 		}
 	}
 };
 
 //Extracts the base tag name from the data value of an element
-Parser.prototype.parseTagName = function(data) {
-	if (data === null || data === "")
-		return("");
-	var match = Parser._reTagName.exec(data);
-	if (!match)
-		return("");
-	return((match[1] ? "/" : "") + match[2]);
+Parser.prototype.parseTagName = function(data){
+	if(!data) return "";
+	var match = _reTagName.exec(data);
+	if(!match) return "";
+	return (match[1] ? "/" : "") + match[2];
 };
 
 //Parses through HTML text and returns an array of found elements
 //I admit, this function is rather large but splitting up had an noticeable impact on speed
-Parser.prototype.parseTags = function() {
+Parser.prototype.parseTags = function(){
 	var bufferEnd = this._buffer.length - 1;
-	while (Parser._reTags.test(this._buffer)) {
-		this._next = Parser._reTags.lastIndex - 1;
+	while (_reTags.test(this._buffer)){
+		this._next = _reTags.lastIndex - 1;
 		var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
 		var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
 
 		//A new element to eventually be appended to the element list
 		var element = {
 				raw: rawData
-			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(Parser._reTrim, "")
+			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(_reTrim, "")
 			, type: this._parseState
 		};
 
@@ -175,16 +174,16 @@ Parser.prototype.parseTags = function() {
 
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
-		if (this._tagStack.length) { //We're parsing inside a script/comment/style tag
-			if (this._tagStack[this._tagStack.length - 1] === ElementType.Script) { //We're currently in a script tag
-				if (elementName === "/script") //Actually, we're no longer in a script tag, so pop it off the stack
+		if(this._tagStack.length){ //We're parsing inside a script/comment/style tag
+			if(this._tagStack[this._tagStack.length - 1] === ElementType.Script){ //We're currently in a script tag
+				if(elementName === "/script") //Actually, we're no longer in a script tag, so pop it off the stack
 					this._tagStack.pop();
 				else { //Not a closing script tag
-					if (element.raw.indexOf("!--") !== 0) { //Make sure we're not in a comment
+					if(element.raw.indexOf("!--") !== 0){ //Make sure we're not in a comment
 						//All data from here to script close is now a text element
 						element.type = ElementType.Text;
 						//If the previous element is text, append the current text to it
-						if (this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text) {
+						if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text){
 							prevElement = this._elements[this._elements.length - 1];
 							prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
 							element.raw = element.data = ""; //This causes the current element to not be added to the element list
@@ -192,39 +191,39 @@ Parser.prototype.parseTags = function() {
 					}
 				}
 			}
-			else if (this._tagStack[this._tagStack.length - 1] === ElementType.Style) { //We're currently in a style tag
-				if (elementName === "/style") //Actually, we're no longer in a style tag, so pop it off the stack
+			else if(this._tagStack[this._tagStack.length - 1] === ElementType.Style){ //We're currently in a style tag
+				if(elementName === "/style") //Actually, we're no longer in a style tag, so pop it off the stack
 					this._tagStack.pop();
 				else {
-					if (element.raw.indexOf("!--") !== 0) { //Make sure we're not in a comment
+					if(element.raw.indexOf("!--") !== 0){ //Make sure we're not in a comment
 						//All data from here to style close is now a text element
 						element.type = ElementType.Text;
 						//If the previous element is text, append the current text to it
-						if (this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text) {
+						if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text){
 							prevElement = this._elements[this._elements.length - 1];
-							if (element.raw !== "") {
+							if(element.raw !== ""){
 								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
 								element.raw = element.data = ""; //This causes the current element to not be added to the element list
 							} else { //Element is empty, so just append the last tag marker found
 								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep;
 							}
 						} else { //The previous element was not text
-							if (element.raw !== "") {
+							if(element.raw !== ""){
 								element.raw = element.data = element.raw;
 							}
 						}
 					}
 				}
 			}
-			else if (this._tagStack[this._tagStack.length - 1] === ElementType.Comment) { //We're currently in a comment tag
+			else if(this._tagStack[this._tagStack.length - 1] === ElementType.Comment){ //We're currently in a comment tag
 				rawLen = element.raw.length;
-				if (element.raw.charAt(rawLen - 2) === "-" && element.raw.charAt(rawLen - 1) === "-" && tagSep === ">") {
+				if(element.raw.charAt(rawLen - 2) === "-" && element.raw.charAt(rawLen - 1) === "-" && tagSep === ">"){
 					//Actually, we're no longer in a style tag, so pop it off the stack
 					this._tagStack.pop();
 					//If the previous element is a comment, append the current text to it
-					if (this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Comment) {
+					if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Comment){
 						prevElement = this._elements[this._elements.length - 1];
-						prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(Parser._reTrimComment, "");
+						prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(_reTrimComment, "");
 						element.raw = element.data = ""; //This causes the current element to not be added to the element list
 						element.type = ElementType.Text;
 					}
@@ -234,7 +233,7 @@ Parser.prototype.parseTags = function() {
 				else { //Still in a comment tag
 					element.type = ElementType.Comment;
 					//If the previous element is a comment, append the current text to it
-					if (this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Comment) {
+					if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Comment){
 						prevElement = this._elements[this._elements.length - 1];
 						prevElement.raw = prevElement.data = prevElement.raw + element.raw + tagSep;
 						element.raw = element.data = ""; //This causes the current element to not be added to the element list
@@ -247,54 +246,54 @@ Parser.prototype.parseTags = function() {
 		}
 
 		//Processing of non-special tags
-		if (element.type === ElementType.Tag) {
+		if(element.type === ElementType.Tag){
 			element.name = elementName;
 			
-			if (element.raw.indexOf("!--") === 0) { //This tag is really comment
+			if(element.raw.indexOf("!--") === 0){ //This tag is really comment
 				element.type = ElementType.Comment;
 				delete element.name;
 				rawLen = element.raw.length;
 				//Check if the comment is terminated in the current element
-				if (element.raw.charAt(rawLen - 1) === "-" && element.raw.charAt(rawLen - 2) === "-" && tagSep === ">")
-					element.raw = element.data = element.raw.replace(Parser._reTrimComment, "");
+				if(element.raw.charAt(rawLen - 1) === "-" && element.raw.charAt(rawLen - 2) === "-" && tagSep === ">")
+					element.raw = element.data = element.raw.replace(_reTrimComment, "");
 				else { //It's not so push the comment onto the tag stack
 					element.raw += tagSep;
 					this._tagStack.push(ElementType.Comment);
 				}
 			}
-			else if (element.raw.indexOf("!") === 0 || element.raw.indexOf("?") === 0) {
+			else if(element.raw.indexOf("!") === 0 || element.raw.indexOf("?") === 0){
 				element.type = ElementType.Directive;
 				//TODO: what about CDATA?
 			}
-			else if (element.name === "script") {
+			else if(element.name === "script"){
 				element.type = ElementType.Script;
 				//Special tag, push onto the tag stack if not terminated
-				if (element.data.charAt(element.data.length - 1) !== "/")
+				if(element.data.charAt(element.data.length - 1) !== "/")
 					this._tagStack.push(ElementType.Script);
 			}
-			else if (element.name === "/script")
+			else if(element.name === "/script")
 				element.type = ElementType.Script;
-			else if (element.name === "style") {
+			else if(element.name === "style"){
 				element.type = ElementType.Style;
 				//Special tag, push onto the tag stack if not terminated
-				if (element.data.charAt(element.data.length - 1) !== "/")
+				if(element.data.charAt(element.data.length - 1) !== "/")
 					this._tagStack.push(ElementType.Style);
 			}
-			else if (element.name === "/style")
+			else if(element.name === "/style")
 				element.type = ElementType.Style;
-			if (element.name && element.name.charAt(0) === "/")
+			if(element.name && element.name.charAt(0) === "/")
 				element.data = element.name;
 		}
 
 		//Add all tags and non-empty text elements to the element list
-		if (element.raw !== "" || element.type !== ElementType.Text) {
-			if (this._options.includeLocation && !element.location) {
+		if(element.raw !== "" || element.type !== ElementType.Text){
+			if(this._options.includeLocation && !element.location){
 				element.location = this.getLocation(element.type === ElementType.Tag);
 			}
 			this.parseAttribs(element);
 			this._elements.push(element);
 			//If tag self-terminates, add an explicit, separate closing tag
-			if (
+			if(
 				element.type !== ElementType.Text
 				&&
 				element.type !== ElementType.Comment
@@ -315,7 +314,7 @@ Parser.prototype.parseTags = function() {
 		this._prevTagSep = tagSep;
 	}
 
-	if (this._options.includeLocation) {
+	if(this._options.includeLocation){
 		this.getLocation();
 		this._location.row += this._location.inBuffer;
 		this._location.inBuffer = 0;
@@ -327,18 +326,18 @@ Parser.prototype.parseTags = function() {
 	this.writeHandler();
 };
 
-Parser.prototype.getLocation = function(startTag) {
+Parser.prototype.getLocation = function(startTag){
 	var c,
 		l = this._location,
 		end = this._current - (startTag ? 1 : 0),
 		chunk = startTag && l.charOffset === 0 && this._current === 0;
 	
-	for (; l.charOffset < end; l.charOffset++) {
+	for (; l.charOffset < end; l.charOffset++){
 		c = this._buffer.charAt(l.charOffset);
-		if (c === '\n') {
+		if(c === '\n'){
 			l.inBuffer++;
 			l.col = 0;
-		} else if (c !== '\r') {
+		} else if(c !== '\r'){
 			l.col++;
 		}
 	}
@@ -349,31 +348,30 @@ Parser.prototype.getLocation = function(startTag) {
 };
 
 //Checks the handler to make it is an object with the right "interface"
-Parser.prototype.validateHandler = function(handler) {
-	if ((typeof handler) !== "object")
+Parser.prototype.validateHandler = function(handler){
+	if((typeof handler) !== "object")
 		throw new Error("Handler is not an object");
-	if ((typeof handler.reset) !== "function")
+	if((typeof handler.reset) !== "function")
 		throw new Error("Handler method 'reset' is invalid");
-	if ((typeof handler.done) !== "function")
+	if((typeof handler.done) !== "function")
 		throw new Error("Handler method 'done' is invalid");
-	if ((typeof handler.writeTag) !== "function")
+	if((typeof handler.writeTag) !== "function")
 		throw new Error("Handler method 'writeTag' is invalid");
-	if ((typeof handler.writeText) !== "function")
+	if((typeof handler.writeText) !== "function")
 		throw new Error("Handler method 'writeText' is invalid");
-	if ((typeof handler.writeComment) !== "function")
+	if((typeof handler.writeComment) !== "function")
 		throw new Error("Handler method 'writeComment' is invalid");
-	if ((typeof handler.writeDirective) !== "function")
+	if((typeof handler.writeDirective) !== "function")
 		throw new Error("Handler method 'writeDirective' is invalid");
 };
 
 //Writes parsed elements out to the handler
-Parser.prototype.writeHandler = function(forceFlush) {
-	forceFlush = !!forceFlush;
-	if (this._tagStack.length && !forceFlush)
+Parser.prototype.writeHandler = function(forceFlush){
+	if(this._tagStack.length && !forceFlush)
 		return;
-	while (this._elements.length) {
+	while (this._elements.length){
 		var element = this._elements.shift();
-		switch (element.type) {
+		switch (element.type){
 			case ElementType.Comment:
 				this._handler.writeComment(element);
 				break;
@@ -390,8 +388,8 @@ Parser.prototype.writeHandler = function(forceFlush) {
 	}
 };
 
-Parser.prototype.handleError = function(error) {
-	if ((typeof this._handler.error) === "function")
+Parser.prototype.handleError = function(error){
+	if((typeof this._handler.error) === "function")
 		this._handler.error(error);
 	else throw error;
 };

From a94fedf8c70198aefd9328226b5ec30dff263043 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 28 Aug 2011 11:10:07 +0200
Subject: [PATCH 011/450] Export the evented handler

---
 lib/EventedHandler.js |  4 +++-
 lib/htmlparser.js     | 13 ++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index d233976..f4cd91e 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -76,4 +76,6 @@ EventedHandler.prototype.writeTag = function(element){
 		if(empty) this.onCloseTag(name);
 		else this._tagStack.push(element);
 	}
-};
\ No newline at end of file
+};
+
+module.exports = EventedHandler;
\ No newline at end of file
diff --git a/lib/htmlparser.js b/lib/htmlparser.js
index e14ae86..72457cd 100644
--- a/lib/htmlparser.js
+++ b/lib/htmlparser.js
@@ -1,5 +1,8 @@
-exports.Parser = require("./Parser.js");
-exports.DefaultHandler = require("./DefaultHandler.js");
-exports.RssHandler = require("./RssHandler.js");
-exports.ElementType = require("./ElementType.js");
-exports.DomUtils = require("./DomUtils.js");
\ No newline at end of file
+module.exports = {
+	Parser: require("./Parser.js"),
+	DefaultHandler: require("./DefaultHandler.js"),
+	RssHandler: require("./RssHandler.js"),
+	ElementType: require("./ElementType.js"),
+	DomUtils: require("./DomUtils.js"),
+	EventedHandler: require("./EventedHandler.js")
+}
\ No newline at end of file

From 2b57694b591e9933d8260b6b01fcb176648b64c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 28 Aug 2011 11:34:14 +0200
Subject: [PATCH 012/450] Fixed a bug & restructured some code

---
 lib/EventedHandler.js | 19 ++-----------------
 lib/htmlparser.js     |  4 ++--
 2 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index f4cd91e..eb5aa9d 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -36,22 +36,7 @@ var openTagCB = function(openTag, attribute){
 };
 
 //HTML Tags that shouldn't contain child nodes
-var emptyTags = {
-	area: true
-	, base: true
-	, basefont: true
-	, br: true
-	, col: true
-	, frame: true
-	, hr: true
-	, img: true
-	, input: true
-	, isindex: true
-	, link: true
-	, meta: true
-	, param: true
-	, embed: true
-};
+var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:true,img:true,input:true,isindex:true,link:true,meta:true,param:true,embed:true};
 
 EventedHandler.prototype.writeTag = function(element){
 	var closing = element.name.charAt(0) === "/",
@@ -74,7 +59,7 @@ EventedHandler.prototype.writeTag = function(element){
 	else{
 		this.onOpenTag(name, attributes);
 		if(empty) this.onCloseTag(name);
-		else this._tagStack.push(element);
+		else this._stack.push(element);
 	}
 };
 
diff --git a/lib/htmlparser.js b/lib/htmlparser.js
index 72457cd..9aa5bde 100644
--- a/lib/htmlparser.js
+++ b/lib/htmlparser.js
@@ -2,7 +2,7 @@ module.exports = {
 	Parser: require("./Parser.js"),
 	DefaultHandler: require("./DefaultHandler.js"),
 	RssHandler: require("./RssHandler.js"),
+	EventedHandler: require("./EventedHandler.js"),
 	ElementType: require("./ElementType.js"),
-	DomUtils: require("./DomUtils.js"),
-	EventedHandler: require("./EventedHandler.js")
+	DomUtils: require("./DomUtils.js")
 }
\ No newline at end of file

From 1883157fd81e8dac9dc2ce6ffcbbc21333f01412 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 28 Aug 2011 12:04:02 +0200
Subject: [PATCH 013/450] Wrong number of arguments was passed in
 EventedHandler

---
 lib/EventedHandler.js | 2 +-
 lib/Parser.js         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index eb5aa9d..1787e43 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -28,7 +28,7 @@ var openTagCB = function(openTag, attribute){
 	function open(name, attributes){ openTag({name:name, attributes:attributes}); }
 	function attr(name, attributes){ for(var i in attributes) attribute({name:i, value:attributes[i]}); }
 	if(openTag){
-		if(attribute) return function(name, attributes){open(name,attributes); attr(attributes);};
+		if(attribute) return function(name, attributes){open(name,attributes); attr(null, attributes);};
 		else return open;
 	}
 	else if(attribute) return attr;
diff --git a/lib/Parser.js b/lib/Parser.js
index 9375f6f..c092b8c 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -75,7 +75,7 @@ Parser.prototype.done = function(){
 			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(_reTrim, "")
 			, type: this._parseState
 			};
-		if(this._parseState === ElementType.Tag || this._parseState === ElementType.Script || this._parseState === ElementType.Style)
+		if(tagTypes[this._parseState])
 			element.name = this.parseTagName(element.data);
 		this.parseAttribs(element);
 		this._elements.push(element);

From 19c01c3623e7f159af857dc198b16d7186ef3566 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 28 Aug 2011 12:13:45 +0200
Subject: [PATCH 014/450] Removed repeating code

---
 lib/Parser.js | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index c092b8c..fbbaabb 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -55,7 +55,7 @@ Parser.prototype.parseComplete = function(data){
 //Parses a piece of an HTML document
 Parser.prototype.parseChunk = function(data){
 	if(this._done)
-		this.handleError(new Error("Attempted to parse chunk after parsing already done"));
+		this.handleError(Error("Attempted to parse chunk after parsing already done"));
 	this._buffer += data; //FIXME: this can be a bottleneck
 	this.parseTags();
 };
@@ -349,20 +349,12 @@ Parser.prototype.getLocation = function(startTag){
 
 //Checks the handler to make it is an object with the right "interface"
 Parser.prototype.validateHandler = function(handler){
-	if((typeof handler) !== "object")
-		throw new Error("Handler is not an object");
-	if((typeof handler.reset) !== "function")
-		throw new Error("Handler method 'reset' is invalid");
-	if((typeof handler.done) !== "function")
-		throw new Error("Handler method 'done' is invalid");
-	if((typeof handler.writeTag) !== "function")
-		throw new Error("Handler method 'writeTag' is invalid");
-	if((typeof handler.writeText) !== "function")
-		throw new Error("Handler method 'writeText' is invalid");
-	if((typeof handler.writeComment) !== "function")
-		throw new Error("Handler method 'writeComment' is invalid");
-	if((typeof handler.writeDirective) !== "function")
-		throw new Error("Handler method 'writeDirective' is invalid");
+	if(typeof handler !== "object")
+		throw Error("Handler is not an object");
+	["reset", "done", "writeTag", "writeText", "writeComment", "writeDirective"].forEach(function(name){
+		if(typeof handler[name] !== "function")
+			throw Error("Handler method '" + name + "' is invalid");
+	});
 };
 
 //Writes parsed elements out to the handler
@@ -389,7 +381,7 @@ Parser.prototype.writeHandler = function(forceFlush){
 };
 
 Parser.prototype.handleError = function(error){
-	if((typeof this._handler.error) === "function")
+	if(typeof this._handler.error === "function")
 		this._handler.error(error);
 	else throw error;
 };

From 262bbc6b2421505af2292d07ad1bd382ec649272 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 28 Aug 2011 19:37:33 +0200
Subject: [PATCH 015/450] renamed module, added it to npm (as "htmlparser2")

---
 package.json | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/package.json b/package.json
index b395c90..82865a1 100644
--- a/package.json
+++ b/package.json
@@ -1,23 +1,22 @@
 {
-	  "name": "htmlparser"
-	, "description": "Forgiving HTML/XML/RSS Parser in JS for *both* Node and Browsers"
-	, "version": "1.7.3"
-	, "author": "Chris Winberry <chris@winberry.net>"
-	, "contributors": []
+	  "name": "htmlparser2"
+	, "description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface (EventedHandler)."
+	, "version": "1.0.0"
+	, "author": "Felix Boehm <me@feedic.com>"
+	, "contributors": [ "Chris Winberry <chris@winberry.net>" ]
 	, "repository": {
 		  "type": "git"
-		, "url": "git://github.com/tautologistics/node-htmlparser.git"
+		, "url": "git://github.com/fb55/node-htmlparser.git"
 	}
 	, "bugs": {
-		  "mail": "chris@winberry.net"
-		, "web": "http://github.com/tautologistics/node-htmlparser/issues"
+		  "mail": "me@feedic.com"
+		, "web": "http://github.com/fb55/node-htmlparser/issues"
 	}
-	, "os": [ "linux", "darwin", "freebsd", "win32" ]
 	, "directories": { "lib": "./lib/" }
 	, "main": "./lib/htmlparser"
-	, "engines": { "node": ">=0.1.33" }
+	, "engines": { "node": ">0" }
 	, "licenses": [{
 		  "type": "MIT"
 		, "url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"
 	}]
-}
+}
\ No newline at end of file

From 14ee72eb9eb61e1d9ee25e7fef14f39d7415b773 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 21 Oct 2011 14:38:21 +0200
Subject: [PATCH 016/450] Replaced indexOf(a)!==0 with substring(a.length)!==a,
 charAt(length-1) with substr(-1) + many other improvements

---
 lib/Parser.js | 116 +++++++++++++++++++++++---------------------------
 1 file changed, 53 insertions(+), 63 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index fbbaabb..1eb557f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -28,9 +28,8 @@ function Parser (handler, options){
 
 //**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reTrim = /(^\s+|\s+$)/g; //Trim leading/trailing whitespace
 var _reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
-var _reWhitespace = /\s/g; //Used to find any whitespace to split on
+var _reWhitespace = /\s/; //Used to find any whitespace to split on
 var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
 
 //Regular expressions used for parsing (stateful)
@@ -62,8 +61,7 @@ Parser.prototype.parseChunk = function(data){
 
 //Tells the parser that the HTML being parsed is complete
 Parser.prototype.done = function(){
-	if(this._done)
-		return;
+	if(this._done) return;
 	this._done = true;
 
 	//Push any unparsed text into a final element in the element list
@@ -72,7 +70,7 @@ Parser.prototype.done = function(){
 		this._buffer = "";
 		var element = {
 				raw: rawData
-			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(_reTrim, "")
+			, data: (this._parseState === ElementType.Text) ? rawData : rawData.trim()
 			, type: this._parseState
 			};
 		if(tagTypes[this._parseState])
@@ -88,8 +86,8 @@ Parser.prototype.done = function(){
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
 	this._buffer = "";
+	this._prevTagSep = "";
 	this._done = false;
-	this._elements = [];
 	this._elementsCurrent = 0;
 	this._current = 0;
 	this._next = 0;
@@ -100,8 +98,8 @@ Parser.prototype.reset = function(){
 		, inBuffer: 0
 	};
 	this._parseState = ElementType.Text;
-	this._prevTagSep = '';
 	this._tagStack = [];
+	this._elements = [];
 	this._handler.reset();
 };
 
@@ -166,7 +164,7 @@ Parser.prototype.parseTags = function(){
 		//A new element to eventually be appended to the element list
 		var element = {
 				raw: rawData
-			, data: (this._parseState === ElementType.Text) ? rawData : rawData.replace(_reTrim, "")
+			, data: (this._parseState === ElementType.Text) ? rawData : rawData.trim()
 			, type: this._parseState
 		};
 
@@ -175,49 +173,47 @@ Parser.prototype.parseTags = function(){
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
 		if(this._tagStack.length){ //We're parsing inside a script/comment/style tag
-			if(this._tagStack[this._tagStack.length - 1] === ElementType.Script){ //We're currently in a script tag
+			var type = this._tagStack[this._tagStack.length - 1];
+			if(type === ElementType.Script){ //We're currently in a script tag
 				if(elementName === "/script") //Actually, we're no longer in a script tag, so pop it off the stack
 					this._tagStack.pop();
 				else { //Not a closing script tag
-					if(element.raw.indexOf("!--") !== 0){ //Make sure we're not in a comment
+					if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
 						//All data from here to script close is now a text element
 						element.type = ElementType.Text;
 						//If the previous element is text, append the current text to it
 						if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text){
 							prevElement = this._elements[this._elements.length - 1];
-							prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
+							prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + rawData;
 							element.raw = element.data = ""; //This causes the current element to not be added to the element list
 						}
 					}
 				}
 			}
-			else if(this._tagStack[this._tagStack.length - 1] === ElementType.Style){ //We're currently in a style tag
+			else if(type === ElementType.Style){ //We're currently in a style tag
 				if(elementName === "/style") //Actually, we're no longer in a style tag, so pop it off the stack
 					this._tagStack.pop();
 				else {
-					if(element.raw.indexOf("!--") !== 0){ //Make sure we're not in a comment
+					if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
 						//All data from here to style close is now a text element
 						element.type = ElementType.Text;
 						//If the previous element is text, append the current text to it
 						if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text){
 							prevElement = this._elements[this._elements.length - 1];
-							if(element.raw !== ""){
-								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + element.raw;
+							if(rawData !== ""){
+								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + rawData;
 								element.raw = element.data = ""; //This causes the current element to not be added to the element list
 							} else { //Element is empty, so just append the last tag marker found
 								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep;
 							}
-						} else { //The previous element was not text
-							if(element.raw !== ""){
-								element.raw = element.data = element.raw;
-							}
+						} else {//The previous element was not text
+							if(rawData !== "") element.data = rawData;
 						}
 					}
 				}
 			}
-			else if(this._tagStack[this._tagStack.length - 1] === ElementType.Comment){ //We're currently in a comment tag
-				rawLen = element.raw.length;
-				if(element.raw.charAt(rawLen - 2) === "-" && element.raw.charAt(rawLen - 1) === "-" && tagSep === ">"){
+			else if(type === ElementType.Comment){ //We're currently in a comment tag
+				if(rawData.substr(-2) === "--" && tagSep === ">"){
 					//Actually, we're no longer in a style tag, so pop it off the stack
 					this._tagStack.pop();
 					//If the previous element is a comment, append the current text to it
@@ -247,42 +243,40 @@ Parser.prototype.parseTags = function(){
 
 		//Processing of non-special tags
 		if(element.type === ElementType.Tag){
-			element.name = elementName;
-			
-			if(element.raw.indexOf("!--") === 0){ //This tag is really comment
+			if(element.raw.substring(0, 3) === "!--"){ //This tag is really comment
 				element.type = ElementType.Comment;
-				delete element.name;
 				rawLen = element.raw.length;
 				//Check if the comment is terminated in the current element
-				if(element.raw.charAt(rawLen - 1) === "-" && element.raw.charAt(rawLen - 2) === "-" && tagSep === ">")
+				if(element.raw.substr(-2) === "--" && tagSep === ">")
 					element.raw = element.data = element.raw.replace(_reTrimComment, "");
 				else { //It's not so push the comment onto the tag stack
 					element.raw += tagSep;
 					this._tagStack.push(ElementType.Comment);
 				}
 			}
-			else if(element.raw.indexOf("!") === 0 || element.raw.indexOf("?") === 0){
-				element.type = ElementType.Directive;
-				//TODO: what about CDATA?
-			}
-			else if(element.name === "script"){
-				element.type = ElementType.Script;
-				//Special tag, push onto the tag stack if not terminated
-				if(element.data.charAt(element.data.length - 1) !== "/")
-					this._tagStack.push(ElementType.Script);
-			}
-			else if(element.name === "/script")
-				element.type = ElementType.Script;
-			else if(element.name === "style"){
-				element.type = ElementType.Style;
-				//Special tag, push onto the tag stack if not terminated
-				if(element.data.charAt(element.data.length - 1) !== "/")
-					this._tagStack.push(ElementType.Style);
+			else {
+				element.name = elementName;
+				
+				if(element.raw[0] === "!" || element.raw[0] === "?"){
+					element.type = ElementType.Directive;
+					//TODO: what about CDATA?
+				}
+				else if(elementName[0] === "/"){
+					element.data = element.name;
+					if(elementName === "/script") element.type = ElementType.Script;
+					else if(elementName === "/style") element.type = ElementType.Style;
+				}
+				else if(elementName === "script"){
+					element.type = ElementType.Script;
+					//Special tag, push onto the tag stack if not terminated
+					if(element.data.substr(-1) !== "/") this._tagStack.push(ElementType.Script);
+				}
+				else if(elementName === "style"){
+					element.type = ElementType.Style;
+					//Special tag, push onto the tag stack if not terminated
+					if(element.data.substr(-1) !== "/") this._tagStack.push(ElementType.Style);
+				}
 			}
-			else if(element.name === "/style")
-				element.type = ElementType.Style;
-			if(element.name && element.name.charAt(0) === "/")
-				element.data = element.name;
 		}
 
 		//Add all tags and non-empty text elements to the element list
@@ -300,7 +294,7 @@ Parser.prototype.parseTags = function(){
 				&&
 				element.type !== ElementType.Directive
 				&&
-				element.data.charAt(element.data.length - 1) === "/"
+				element.data.substr(-1) === "/"
 				)
 				this._elements.push({
 						raw: "/" + element.name
@@ -329,17 +323,18 @@ Parser.prototype.parseTags = function(){
 Parser.prototype.getLocation = function(startTag){
 	var c,
 		l = this._location,
-		end = this._current - (startTag ? 1 : 0),
-		chunk = startTag && l.charOffset === 0 && this._current === 0;
+		end = this._current,
+		chunk = startTag && l.charOffset === 0 && end === 0;
+	
+	if(startTag) end--;
 	
 	for (; l.charOffset < end; l.charOffset++){
-		c = this._buffer.charAt(l.charOffset);
+		c = this._buffer[l.charOffset];
 		if(c === '\n'){
 			l.inBuffer++;
 			l.col = 0;
-		} else if(c !== '\r'){
+		} else if(c !== '\r')
 			l.col++;
-		}
 	}
 	return {
 		 line: l.row + l.inBuffer + 1
@@ -364,18 +359,13 @@ Parser.prototype.writeHandler = function(forceFlush){
 	while (this._elements.length){
 		var element = this._elements.shift();
 		switch (element.type){
-			case ElementType.Comment:
-				this._handler.writeComment(element);
-				break;
-			case ElementType.Directive:
-				this._handler.writeDirective(element);
+			case ElementType.Comment: this._handler.writeComment(element);
 				break;
-			case ElementType.Text:
-				this._handler.writeText(element);
+			case ElementType.Directive: this._handler.writeDirective(element);
 				break;
-			default:
-				this._handler.writeTag(element);
+			case ElementType.Text: this._handler.writeText(element);
 				break;
+			default: this._handler.writeTag(element);
 		}
 	}
 };

From fd49686da4f9faa50b8d42e1c9267f110eb55f82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 21 Oct 2011 14:38:57 +0200
Subject: [PATCH 017/450] 1.1.0

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 82865a1..7129c9d 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	  "name": "htmlparser2"
 	, "description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface (EventedHandler)."
-	, "version": "1.0.0"
+	, "version": "1.1.0"
 	, "author": "Felix Boehm <me@feedic.com>"
 	, "contributors": [ "Chris Winberry <chris@winberry.net>" ]
 	, "repository": {

From cf39b1c42c506fc2a0bf096ba2a68f9fa4697d48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 21 Oct 2011 15:21:54 +0200
Subject: [PATCH 018/450] Restructured DomUtils

---
 lib/DomUtils.js | 86 ++++++++++++++++++-------------------------------
 1 file changed, 32 insertions(+), 54 deletions(-)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index f7a6f3e..a6e0bdf 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -1,96 +1,74 @@
-var DomUtils = {
-		testElement: function(options, element) {
-		if (!element) {
-			return false;
-		}
+module.exports = {
+	testElement: function(options, element) {
+		if (!element) return false;
+		
+		var type = element.type;
 
 		for (var key in options) {
 			if (key === "tag_name") {
-				if (element.type !== "tag" && element.type !== "script" && element.type !== "style") {
-					return false;
-				}
-				if (!options.tag_name(element.name)) {
-					return false;
-				}
+				if (type !== "tag" && type !== "script" && type !== "style") return false;
+				if (!options.tag_name(element.name)) return false;
 			} else if (key === "tag_type") {
-				if (!options.tag_type(element.type)) {
-					return false;
-				}
+				if (!options.tag_type(type)) return false;
 			} else if (key === "tag_contains") {
-				if (element.type !== "text" && element.type !== "comment" && element.type !== "directive") {
-					return false;
-				}
-				if (!options.tag_contains(element.data)) {
-					return false;
-				}
-			} else {
-				if (!element.attribs || !options[key](element.attribs[key])) {
-					return false;
-				}
-			}
+				if (type !== "text" && type !== "comment" && type !== "directive") return false;
+				if (!options.tag_contains(element.data)) return false;
+			} else if (!element.attribs || !options[key](element.attribs[key]))
+				return false;
 		}
 	
 		return true;
 	}
 
 	, getElements: function(options, currentElement, recurse, limit) {
+		if (!currentElement) return [];
+
 		recurse = (recurse === undefined || recurse === null) || !!recurse;
-		limit = isNaN(parseInt(limit, 10)) ? -1 : parseInt(limit, 10);
 
-		if (!currentElement) {
-			return([]);
-		}
+		var parsed_limit = parseInt(limit, 10);
+		limit = isNaN(parsed_limit) ? -1 : parsed_limit;
 
 		var found = [];
 		var elementList;
 
 		function getTest (checkVal) {
-			return(function (value) { return(value === checkVal); });
+			return function (value) { return value === checkVal; };
 		}
 		for (var key in options) {
-			if ((typeof options[key]) !== "function") {
+			if (typeof options[key] !== "function") {
 				options[key] = getTest(options[key]);
 			}
 		}
 
-		if (DomUtils.testElement(options, currentElement)) {
+		if (this.testElement(options, currentElement)) {
 			found.push(currentElement);
 		}
 
-		if (limit >= 0 && found.length >= limit) {
-			return(found);
-		}
+		if (limit >= 0 && found.length >= limit) return found;
 
-		if (recurse && currentElement.children) {
-			elementList = currentElement.children;
-		} else if (currentElement instanceof Array) {
-			elementList = currentElement;
-		} else {
-			return(found);
-		}
+		if(recurse && currentElement.children) elementList = currentElement.children;
+		else if(Array.isArray(currentElement)) elementList = currentElement;
+		else return found;
 
 		for (var i = 0; i < elementList.length; i++) {
-			found = found.concat(DomUtils.getElements(options, elementList[i], recurse, limit));
-			if (limit >= 0 && found.length >= limit) {
-				break;
-			}
+			found = found.concat(this.getElements(options, elementList[i], recurse, limit));
+
+			if (limit >= 0 && found.length >= limit) break;
 		}
 
-		return(found);
+		return found;
 	}
 	
 	, getElementById: function(id, currentElement, recurse) {
-		var result = DomUtils.getElements({ id: id }, currentElement, recurse, 1);
-		return(result.length ? result[0] : null);
+		var result = this.getElements({ id: id }, currentElement, recurse, 1);
+		return result.length ? result[0] : null;
 	}
 	
 	, getElementsByTagName: function(name, currentElement, recurse, limit) {
-		return(DomUtils.getElements({ tag_name: name }, currentElement, recurse, limit));
+		return this.getElements({ tag_name: name }, currentElement, recurse, limit);
 	}
 	
 	, getElementsByTagType: function(type, currentElement, recurse, limit) {
-		return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit));
+		return this.getElements({ tag_type: type }, currentElement, recurse, limit);
 	}
-};
-
-module.exports = DomUtils;
\ No newline at end of file
+};
\ No newline at end of file

From 796fec61fedb32a35b9cc38d0c485803166a11e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 22 Oct 2011 19:57:42 +0200
Subject: [PATCH 019/450] Some small improvements

---
 lib/Parser.js        | 131 +++++++++++++++++++++----------------------
 tests/00-runtests.js |  13 +++--
 2 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 1eb557f..aafbaa5 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -14,7 +14,6 @@ function Parser (handler, options){
 	this._elements = [];
 	this._elementsCurrent = 0;
 	this._current = 0;
-	this._next = 0;
 	this._location = {
 		 row: 0
 		, col: 0
@@ -35,7 +34,7 @@ var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an el
 //Regular expressions used for parsing (stateful)
 var _reAttrib = //Find attributes in a tag
 	/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
-var _reTags = /[<\>]/g; //Find tag markers
+var _reTags = /[<>]/g; //Find tag markers
 
 var tagTypes = {};
 tagTypes[ ElementType.Script ] = true;
@@ -69,17 +68,18 @@ Parser.prototype.done = function(){
 		var rawData = this._buffer;
 		this._buffer = "";
 		var element = {
-				raw: rawData
-			, data: (this._parseState === ElementType.Text) ? rawData : rawData.trim()
+			  raw: rawData
+			, data: this._parseState === ElementType.Text ? rawData : rawData.trim()
 			, type: this._parseState
 			};
-		if(tagTypes[this._parseState])
+		if(tagTypes[this._parseState]){
 			element.name = this.parseTagName(element.data);
-		this.parseAttribs(element);
+			this.parseAttribs(element);
+		}
 		this._elements.push(element);
 	}
 
-	this.writeHandler();
+	this.writeHandler(true);
 	this._handler.done();
 };
 
@@ -90,7 +90,6 @@ Parser.prototype.reset = function(){
 	this._done = false;
 	this._elementsCurrent = 0;
 	this._current = 0;
-	this._next = 0;
 	this._location = {
 		 row: 0
 		, col: 0
@@ -109,7 +108,6 @@ Parser.prototype.reset = function(){
 Parser.prototype.parseTagAttribs = function(elements){
 	for(var i = 0, j = elements.length; i < j; i++){
 		var element = elements[i];
-		if(tagTypes[element.type])
 			this.parseAttribs(element);
 	}
 	
@@ -120,11 +118,11 @@ Parser.prototype.parseTagAttribs = function(elements){
 Parser.prototype.parseAttribs = function(element){
 	//Only parse attributes for tags
 	if(!tagTypes[element.type]) return;
-
-	var tagName = element.data.split(_reWhitespace, 1)[0];
-	var attribRaw = element.data.substring(tagName.length);
-	if(attribRaw.length < 1)
-		return;
+	
+	var pos = element.data.search(_reWhitespace);
+	if(pos === -1) return;
+	var attribRaw = element.data.substr(pos);
+	if(attribRaw === "") return;
 
 	var match;
 	_reAttrib.lastIndex = 0;
@@ -132,13 +130,13 @@ Parser.prototype.parseAttribs = function(element){
 		if(element.attribs === undefined)
 			element.attribs = {};
 
-		if(typeof match[1] === "string" && match[1].length){
+		if(match[1]){
 			element.attribs[match[1]] = match[2];
-		} else if(typeof match[3] === "string" && match[3].length){
-			element.attribs[match[3].toString()] = match[4].toString();
-		} else if(typeof match[5] === "string" && match[5].length){
+		} else if(match[3]){
+			element.attribs[match[3]] = match[4];
+		} else if(match[5]){
 			element.attribs[match[5]] = match[6];
-		} else if(typeof match[7] === "string" && match[7].length){
+		} else if(match[7]){
 			element.attribs[match[7]] = match[7];
 		}
 	}
@@ -147,44 +145,47 @@ Parser.prototype.parseAttribs = function(element){
 //Extracts the base tag name from the data value of an element
 Parser.prototype.parseTagName = function(data){
 	if(!data) return "";
-	var match = _reTagName.exec(data);
-	if(!match) return "";
-	return (match[1] ? "/" : "") + match[2];
+	var match = data.match(_reTagName);
+	if(match === null) return "";
+	return match[1] + match[2];
 };
 
 //Parses through HTML text and returns an array of found elements
 //I admit, this function is rather large but splitting up had an noticeable impact on speed
 Parser.prototype.parseTags = function(){
-	var bufferEnd = this._buffer.length - 1;
-	while (_reTags.test(this._buffer)){
-		this._next = _reTags.lastIndex - 1;
-		var tagSep = this._buffer.charAt(this._next); //The currently found tag marker
-		var rawData = this._buffer.substring(this._current, this._next); //The next chunk of data to parse
+	var buffer = this._buffer, stack = this._tagStack;
+	
+	var next, tagSep, rawData, element, elementName, prevElement, rawLen;
+	
+	while (_reTags.test(buffer)){
+		next = _reTags.lastIndex - 1;
+		tagSep = buffer.charAt(next); //The currently found tag marker
+		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
 
 		//A new element to eventually be appended to the element list
-		var element = {
+		element = {
 				raw: rawData
 			, data: (this._parseState === ElementType.Text) ? rawData : rawData.trim()
 			, type: this._parseState
 		};
 
-		var elementName = this.parseTagName(element.data), prevElement, rawLen;
+		elementName = this.parseTagName(element.data);
 
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
-		if(this._tagStack.length){ //We're parsing inside a script/comment/style tag
-			var type = this._tagStack[this._tagStack.length - 1];
+		if(stack.length){ //We're parsing inside a script/comment/style tag
+			var type = stack[stack.length - 1];
 			if(type === ElementType.Script){ //We're currently in a script tag
 				if(elementName === "/script") //Actually, we're no longer in a script tag, so pop it off the stack
-					this._tagStack.pop();
+					stack.pop();
 				else { //Not a closing script tag
 					if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
 						//All data from here to script close is now a text element
 						element.type = ElementType.Text;
 						//If the previous element is text, append the current text to it
-						if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text){
-							prevElement = this._elements[this._elements.length - 1];
-							prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + rawData;
+						prevElement = this._elements && this._elements[this._elements.length - 1];
+						if(prevElement && prevElement.type === ElementType.Text){
+							prevElement.data = prevElement.raw += this._prevTagSep + rawData;
 							element.raw = element.data = ""; //This causes the current element to not be added to the element list
 						}
 					}
@@ -192,19 +193,19 @@ Parser.prototype.parseTags = function(){
 			}
 			else if(type === ElementType.Style){ //We're currently in a style tag
 				if(elementName === "/style") //Actually, we're no longer in a style tag, so pop it off the stack
-					this._tagStack.pop();
+					stack.pop();
 				else {
 					if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
 						//All data from here to style close is now a text element
 						element.type = ElementType.Text;
 						//If the previous element is text, append the current text to it
-						if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Text){
-							prevElement = this._elements[this._elements.length - 1];
+						prevElement = this._elements && this._elements[this._elements.length - 1];
+						if(prevElement && prevElement.type === ElementType.Text){
 							if(rawData !== ""){
-								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep + rawData;
+								prevElement.data = prevElement.raw += this._prevTagSep + rawData;
 								element.raw = element.data = ""; //This causes the current element to not be added to the element list
 							} else { //Element is empty, so just append the last tag marker found
-								prevElement.raw = prevElement.data = prevElement.raw + this._prevTagSep;
+								prevElement.data = prevElement.raw += this._prevTagSep;
 							}
 						} else {//The previous element was not text
 							if(rawData !== "") element.data = rawData;
@@ -213,12 +214,14 @@ Parser.prototype.parseTags = function(){
 				}
 			}
 			else if(type === ElementType.Comment){ //We're currently in a comment tag
+
+				prevElement = this._elements && this._elements[this._elements.length - 1];
+
 				if(rawData.substr(-2) === "--" && tagSep === ">"){
 					//Actually, we're no longer in a style tag, so pop it off the stack
-					this._tagStack.pop();
+					stack.pop();
 					//If the previous element is a comment, append the current text to it
-					if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Comment){
-						prevElement = this._elements[this._elements.length - 1];
+					if(prevElement && prevElement.type === ElementType.Comment){
 						prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(_reTrimComment, "");
 						element.raw = element.data = ""; //This causes the current element to not be added to the element list
 						element.type = ElementType.Text;
@@ -229,14 +232,13 @@ Parser.prototype.parseTags = function(){
 				else { //Still in a comment tag
 					element.type = ElementType.Comment;
 					//If the previous element is a comment, append the current text to it
-					if(this._elements.length && this._elements[this._elements.length - 1].type === ElementType.Comment){
-						prevElement = this._elements[this._elements.length - 1];
-						prevElement.raw = prevElement.data = prevElement.raw + element.raw + tagSep;
+					if(prevElement && prevElement.type === ElementType.Comment){
+						prevElement.data = prevElement.raw += element.raw + tagSep;
 						element.raw = element.data = ""; //This causes the current element to not be added to the element list
 						element.type = ElementType.Text;
 					}
 					else
-						element.raw = element.data = element.raw + tagSep;
+						element.data = element.raw += tagSep;
 				}
 			}
 		}
@@ -247,34 +249,34 @@ Parser.prototype.parseTags = function(){
 				element.type = ElementType.Comment;
 				rawLen = element.raw.length;
 				//Check if the comment is terminated in the current element
-				if(element.raw.substr(-2) === "--" && tagSep === ">")
+				if(tagSep === ">" && element.raw.substr(-2) === "--")
 					element.raw = element.data = element.raw.replace(_reTrimComment, "");
 				else { //It's not so push the comment onto the tag stack
 					element.raw += tagSep;
-					this._tagStack.push(ElementType.Comment);
+					stack.push(ElementType.Comment);
 				}
 			}
 			else {
 				element.name = elementName;
 				
-				if(element.raw[0] === "!" || element.raw[0] === "?"){
+				if(element.raw.charAt(0) === "!" || element.raw.charAt(0) === "?"){
 					element.type = ElementType.Directive;
 					//TODO: what about CDATA?
 				}
-				else if(elementName[0] === "/"){
-					element.data = element.name;
+				else if(elementName.charAt(0) === "/"){
+					element.data = elementName;
 					if(elementName === "/script") element.type = ElementType.Script;
 					else if(elementName === "/style") element.type = ElementType.Style;
 				}
 				else if(elementName === "script"){
 					element.type = ElementType.Script;
 					//Special tag, push onto the tag stack if not terminated
-					if(element.data.substr(-1) !== "/") this._tagStack.push(ElementType.Script);
+					if(element.data.substr(-1) !== "/") stack.push(ElementType.Script);
 				}
 				else if(elementName === "style"){
 					element.type = ElementType.Style;
 					//Special tag, push onto the tag stack if not terminated
-					if(element.data.substr(-1) !== "/") this._tagStack.push(ElementType.Style);
+					if(element.data.substr(-1) !== "/") stack.push(ElementType.Style);
 				}
 			}
 		}
@@ -287,24 +289,21 @@ Parser.prototype.parseTags = function(){
 			this.parseAttribs(element);
 			this._elements.push(element);
 			//If tag self-terminates, add an explicit, separate closing tag
-			if(
-				element.type !== ElementType.Text
-				&&
-				element.type !== ElementType.Comment
-				&&
-				element.type !== ElementType.Directive
-				&&
-				element.data.substr(-1) === "/"
-				)
+			if(    element.data.substr(-1) === "/"
+				&& element.type !== ElementType.Text
+				&& element.type !== ElementType.Comment
+				&& element.type !== ElementType.Directive
+				){
 				this._elements.push({
-						raw: "/" + element.name
+					  raw: "/" + element.name
 					, data: "/" + element.name
 					, name: "/" + element.name
 					, type: element.type
 				});
+			}
 		}
 		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
-		this._current = this._next + 1;
+		this._current = next + 1;
 		this._prevTagSep = tagSep;
 	}
 
@@ -314,7 +313,7 @@ Parser.prototype.parseTags = function(){
 		this._location.inBuffer = 0;
 		this._location.charOffset = 0;
 	}
-	this._buffer = (this._current <= bufferEnd) ? this._buffer.substring(this._current) : "";
+	this._buffer = this._buffer.substring(this._current);
 	this._current = 0;
 
 	this.writeHandler();
diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index b879227..f20464c 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -29,18 +29,18 @@ var chunkSize = 5;
 var testFiles = fs.readdirSync(testFolder);
 var testCount = 0;
 var failedCount = 0;
+var totalTime = 0;
 for (var i = 1; i < testFiles.length; i++) {
 	testCount++;
-	var fileParts = testFiles[i].split(".");
-	fileParts.pop();
-	var moduleName = fileParts.join(".");
+	var moduleName = testFiles[i];
 	var test = require(testFolder + "/" + moduleName);
 	var handlerCallback = function handlerCallback (error) {
 		if (error)
 			sys.puts("Handler error: " + error);
 	}
 	console.log(testFiles[i]);
-	var handler = (test.type == "rss") ?
+	var start = Date.now();
+	var handler = (test.type === "rss") ?
 		new htmlparser.RssHandler(handlerCallback, test.options.handler)
 		:
 		new htmlparser.DefaultHandler(handlerCallback, test.options.handler)
@@ -61,7 +61,9 @@ for (var i = 1; i < testFiles.length; i++) {
 		&&
 		sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null)
 		;
-	sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED"));
+	var took = Date.now() - start;
+	totalTime += took;
+	sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED") + " (took: " + took + "ms)");
 	if (!testResult) {
 		failedCount++;
 		sys.puts("== Complete ==");
@@ -74,3 +76,4 @@ for (var i = 1; i < testFiles.length; i++) {
 }
 sys.puts("Total tests: " + testCount);
 sys.puts("Failed tests: " + failedCount);
+sys.puts("Total time: " + totalTime);

From 499bfbd3956a21aa21236d261d1f0b9c8aca6065 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 21:32:41 +0200
Subject: [PATCH 020/450] npm bugfix

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 7129c9d..e3950ab 100644
--- a/package.json
+++ b/package.json
@@ -10,7 +10,7 @@
 	}
 	, "bugs": {
 		  "mail": "me@feedic.com"
-		, "web": "http://github.com/fb55/node-htmlparser/issues"
+		, "url": "http://github.com/fb55/node-htmlparser/issues"
 	}
 	, "directories": { "lib": "./lib/" }
 	, "main": "./lib/htmlparser"

From 718b5ccc080cf4a6e94a0c15bedf868b6f61896f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 21:33:24 +0200
Subject: [PATCH 021/450] use util.inherits (function was taken from node
 anyway)

---
 lib/RssHandler.js | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index 00edbf0..8481e9c 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -1,29 +1,24 @@
 var DefaultHandler = require("./DefaultHandler.js"),
-	DomUtils = require("./DomUtils.js");
+	DomUtils = require("./DomUtils.js"),
+	inherits = require("util").inherits;
 
 //TODO: make this a trully streamable handler
 function RssHandler (callback) {
 	RssHandler.super_.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
 }
 
-function inherits (ctor, superCtor) {
-	var tempCtor = function(){};
-	tempCtor.prototype = superCtor.prototype;
-	ctor.super_ = superCtor;
-	ctor.prototype = new tempCtor();
-	ctor.prototype.constructor = ctor;
-}
-
 inherits(RssHandler, DefaultHandler);
 
 function getElements(what, where, one, recurse){
 	var ret = DomUtils.getElementsByTagName(what, where, !!recurse);
-	if(one) try{ return ret[0]; } catch(e){return false;}
+	if(one)
+		if(ret && ret.length > 0) return ret[0];
+		else return false;
 	else return ret;
 }
 function fetch(what, where, recurse){
 	var ret = getElements(what, where, true, !!recurse);
-	if(ret) try{ return ret.children[0].data; } catch(e){return false;}
+	if(ret && ret.children && ret.children.length > 0) return ret.children[0].data;
 	else return false;
 }
 
@@ -32,7 +27,7 @@ RssHandler.prototype.done = function() {
 	var feedRoot;
 	var tmp;
 
-	var found = getElements(function (value) { return(value === "rss" || value === "feed"); }, this.dom);
+	var found = getElements(function(value) { return value === "rss" || value === "feed"; }, this.dom);
 	if (found.length) {
 		feedRoot = found[0];
 	}

From 4da4cfdf0de4a5805d8345686d62d4174c151430 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 21:33:55 +0200
Subject: [PATCH 022/450] Code cleanup, preparation for big changes

---
 lib/Parser.js | 233 +++++++++++++++++++++-----------------------------
 1 file changed, 99 insertions(+), 134 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index aafbaa5..26c01af 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,18 +1,17 @@
 var ElementType = require("./ElementType.js");
 
 function Parser (handler, options){
-	this._options = options ? options : { };
-	if(this._options.includeLocation === undefined){
-		this._options.includeLocation = false; //Do not track element position in document by default
-	}
+	this._options = options ? options : {
+		includeLocation: false, //Do not track element position in document by default
+		xmlMode: false //Special behaviour for script/style tags by default
+	};
 
-	this.validateHandler(handler);
+	validateHandler(handler);
 	this._handler = handler;
-	
+
 	this._buffer = "";
 	this._done = false;
 	this._elements = [];
-	this._elementsCurrent = 0;
 	this._current = 0;
 	this._location = {
 		 row: 0
@@ -73,8 +72,8 @@ Parser.prototype.done = function(){
 			, type: this._parseState
 			};
 		if(tagTypes[this._parseState]){
-			element.name = this.parseTagName(element.data);
-			this.parseAttribs(element);
+			element.name = parseTagName(element.data);
+			parseAttribs(element);
 		}
 		this._elements.push(element);
 	}
@@ -88,7 +87,6 @@ Parser.prototype.reset = function(){
 	this._buffer = "";
 	this._prevTagSep = "";
 	this._done = false;
-	this._elementsCurrent = 0;
 	this._current = 0;
 	this._location = {
 		 row: 0
@@ -103,47 +101,32 @@ Parser.prototype.reset = function(){
 };
 
 //**Private**//
-//Methods//
-//Takes an array of elements and parses any found attributes
-Parser.prototype.parseTagAttribs = function(elements){
-	for(var i = 0, j = elements.length; i < j; i++){
-		var element = elements[i];
-			this.parseAttribs(element);
-	}
-	
-	return(elements);
-};
-
-//Takes an element and adds an "attribs" property for any element attributes found 
-Parser.prototype.parseAttribs = function(element){
+//Takes an element and adds an "attribs" property for any element attributes found
+var parseAttribs = function(element){
 	//Only parse attributes for tags
 	if(!tagTypes[element.type]) return;
-	
+
 	var pos = element.data.search(_reWhitespace);
 	if(pos === -1) return;
 	var attribRaw = element.data.substr(pos);
 	if(attribRaw === "") return;
 
-	var match;
 	_reAttrib.lastIndex = 0;
-	while (match = _reAttrib.exec(attribRaw)){
-		if(element.attribs === undefined)
-			element.attribs = {};
-
-		if(match[1]){
-			element.attribs[match[1]] = match[2];
-		} else if(match[3]){
-			element.attribs[match[3]] = match[4];
-		} else if(match[5]){
-			element.attribs[match[5]] = match[6];
-		} else if(match[7]){
-			element.attribs[match[7]] = match[7];
+	var match = _reAttrib.exec(attribRaw);
+	if(match){
+		element.attribs = {};
+		do{
+			if(match[1])		element.attribs[match[1]] = match[2];
+			else if(match[3])	element.attribs[match[3]] = match[4];
+			else if(match[5])	element.attribs[match[5]] = match[6];
+			else if(match[7])	element.attribs[match[7]] = match[7];
 		}
+		while(match = _reAttrib.exec(attribRaw));
 	}
 };
 
 //Extracts the base tag name from the data value of an element
-Parser.prototype.parseTagName = function(data){
+var parseTagName = function(data){
 	if(!data) return "";
 	var match = data.match(_reTagName);
 	if(match === null) return "";
@@ -153,14 +136,16 @@ Parser.prototype.parseTagName = function(data){
 //Parses through HTML text and returns an array of found elements
 //I admit, this function is rather large but splitting up had an noticeable impact on speed
 Parser.prototype.parseTags = function(){
-	var buffer = this._buffer, stack = this._tagStack;
-	
-	var next, tagSep, rawData, element, elementName, prevElement, rawLen;
-	
+	var buffer = this._buffer, stack = this._tagStack, handler = this._handler;
+
+	var next, type, tagSep, rawData, element, elementName, prevElement, elementType;
+
 	while (_reTags.test(buffer)){
 		next = _reTags.lastIndex - 1;
 		tagSep = buffer.charAt(next); //The currently found tag marker
 		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
+		elementType = this._parseState;
+		type = stack.slice(-1)[0];
 
 		//A new element to eventually be appended to the element list
 		element = {
@@ -169,112 +154,88 @@ Parser.prototype.parseTags = function(){
 			, type: this._parseState
 		};
 
-		elementName = this.parseTagName(element.data);
+		if(this._parseState === ElementType.Tag) elementName = parseTagName(rawData);
+		else elementName = "";
+
 
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
-		if(stack.length){ //We're parsing inside a script/comment/style tag
-			var type = stack[stack.length - 1];
-			if(type === ElementType.Script){ //We're currently in a script tag
-				if(elementName === "/script") //Actually, we're no longer in a script tag, so pop it off the stack
-					stack.pop();
-				else { //Not a closing script tag
-					if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
-						//All data from here to script close is now a text element
-						element.type = ElementType.Text;
-						//If the previous element is text, append the current text to it
-						prevElement = this._elements && this._elements[this._elements.length - 1];
-						if(prevElement && prevElement.type === ElementType.Text){
-							prevElement.data = prevElement.raw += this._prevTagSep + rawData;
-							element.raw = element.data = ""; //This causes the current element to not be added to the element list
-						}
-					}
-				}
-			}
-			else if(type === ElementType.Style){ //We're currently in a style tag
-				if(elementName === "/style") //Actually, we're no longer in a style tag, so pop it off the stack
-					stack.pop();
-				else {
-					if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
-						//All data from here to style close is now a text element
-						element.type = ElementType.Text;
-						//If the previous element is text, append the current text to it
-						prevElement = this._elements && this._elements[this._elements.length - 1];
-						if(prevElement && prevElement.type === ElementType.Text){
-							if(rawData !== ""){
-								prevElement.data = prevElement.raw += this._prevTagSep + rawData;
-								element.raw = element.data = ""; //This causes the current element to not be added to the element list
-							} else { //Element is empty, so just append the last tag marker found
-								prevElement.data = prevElement.raw += this._prevTagSep;
-							}
-						} else {//The previous element was not text
-							if(rawData !== "") element.data = rawData;
-						}
-					}
-				}
-			}
-			else if(type === ElementType.Comment){ //We're currently in a comment tag
-
-				prevElement = this._elements && this._elements[this._elements.length - 1];
-
-				if(rawData.substr(-2) === "--" && tagSep === ">"){
-					//Actually, we're no longer in a style tag, so pop it off the stack
-					stack.pop();
-					//If the previous element is a comment, append the current text to it
-					if(prevElement && prevElement.type === ElementType.Comment){
-						prevElement.raw = prevElement.data = (prevElement.raw + element.raw).replace(_reTrimComment, "");
-						element.raw = element.data = ""; //This causes the current element to not be added to the element list
-						element.type = ElementType.Text;
-					}
-					else //Previous element not a comment
-						element.type = ElementType.Comment; //Change the current element's type to a comment
-				}
-				else { //Still in a comment tag
-					element.type = ElementType.Comment;
-					//If the previous element is a comment, append the current text to it
-					if(prevElement && prevElement.type === ElementType.Comment){
-						prevElement.data = prevElement.raw += element.raw + tagSep;
-						element.raw = element.data = ""; //This causes the current element to not be added to the element list
-						element.type = ElementType.Text;
-					}
-					else
-						element.data = element.raw += tagSep;
-				}
+		if(!type){ /* nothing */ }
+		else if(type === ElementType.Script && elementName === "/script") stack.pop();
+		else if(type === ElementType.Style && elementName === "/style") stack.pop();
+		else if(!this._options.xmlMode && (type === ElementType.Script || type === ElementType.Style)){
+			//special behaviour for script & style tags
+			if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
+				//All data from here to style close is now a text element
+			    elementType = ElementType.Text;
+			    //If the previous element is text, append the current text to it
+			    prevElement = this._elements && this._elements[this._elements.length - 1];
+			    if(prevElement && prevElement.type === ElementType.Text){
+			    	prevElement.data = prevElement.raw += this._prevTagSep + rawData;
+			    	rawData = element.data = ""; //This causes the current element to not be added to the element list
+			    } else element.data = rawData; //The previous element was not text
 			}
 		}
+		else if(type === ElementType.Comment){ //We're currently in a comment tag
+
+			prevElement = this._elements && this._elements[this._elements.length - 1];
+
+    		if(rawData.substr(-2) === "--" && tagSep === ">"){
+    			stack.pop();
+    			//If the previous element is a comment, append the current text to it
+    			if(prevElement && prevElement.type === ElementType.Comment){ //Previous element was a comment
+    				prevElement.raw = prevElement.data = (prevElement.raw + rawData).replace(_reTrimComment, "");
+    				rawData = element.data = ""; //This causes the current element to not be added to the element list
+    				elementType = ElementType.Text;
+    			}
+    			else elementType = ElementType.Comment; //Change the current element's type to a comment
+    		}
+    		else { //Still in a comment tag
+    			elementType = ElementType.Comment;
+    			//If the previous element is a comment, append the current text to it
+    			if(prevElement && prevElement.type === ElementType.Comment){
+    				prevElement.data = prevElement.raw += rawData + tagSep;
+    				rawData = element.data = ""; //This causes the current element to not be added to the element list
+    				elementType = ElementType.Text;
+    			}
+    			else
+    				element.data = rawData += tagSep;
+    		}
+    	}
+
+
 
 		//Processing of non-special tags
-		if(element.type === ElementType.Tag){
-			if(element.raw.substring(0, 3) === "!--"){ //This tag is really comment
-				element.type = ElementType.Comment;
-				rawLen = element.raw.length;
+		if(elementType === ElementType.Tag){
+			if(rawData.substring(0, 3) === "!--"){ //This tag is really comment
+				elementType = ElementType.Comment;
 				//Check if the comment is terminated in the current element
-				if(tagSep === ">" && element.raw.substr(-2) === "--")
-					element.raw = element.data = element.raw.replace(_reTrimComment, "");
+				if(tagSep === ">" && rawData.substr(-2) === "--")
+					rawData = element.data = rawData.replace(_reTrimComment, "");
 				else { //It's not so push the comment onto the tag stack
-					element.raw += tagSep;
+					rawData += tagSep;
 					stack.push(ElementType.Comment);
 				}
 			}
 			else {
 				element.name = elementName;
-				
-				if(element.raw.charAt(0) === "!" || element.raw.charAt(0) === "?"){
-					element.type = ElementType.Directive;
+
+				if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
+					elementType = ElementType.Directive;
 					//TODO: what about CDATA?
 				}
 				else if(elementName.charAt(0) === "/"){
 					element.data = elementName;
-					if(elementName === "/script") element.type = ElementType.Script;
-					else if(elementName === "/style") element.type = ElementType.Style;
+					if(elementName === "/script") elementType = ElementType.Script;
+					else if(elementName === "/style") elementType = ElementType.Style;
 				}
 				else if(elementName === "script"){
-					element.type = ElementType.Script;
+					elementType = ElementType.Script;
 					//Special tag, push onto the tag stack if not terminated
 					if(element.data.substr(-1) !== "/") stack.push(ElementType.Script);
 				}
 				else if(elementName === "style"){
-					element.type = ElementType.Style;
+					elementType = ElementType.Style;
 					//Special tag, push onto the tag stack if not terminated
 					if(element.data.substr(-1) !== "/") stack.push(ElementType.Style);
 				}
@@ -282,23 +243,26 @@ Parser.prototype.parseTags = function(){
 		}
 
 		//Add all tags and non-empty text elements to the element list
-		if(element.raw !== "" || element.type !== ElementType.Text){
+		if(rawData !== "" || elementType !== ElementType.Text){
+			element.raw = rawData;
+			element.type = elementType;
+
 			if(this._options.includeLocation && !element.location){
-				element.location = this.getLocation(element.type === ElementType.Tag);
+				element.location = this.getLocation(elementType === ElementType.Tag);
 			}
-			this.parseAttribs(element);
+			parseAttribs(element);
 			this._elements.push(element);
 			//If tag self-terminates, add an explicit, separate closing tag
 			if(    element.data.substr(-1) === "/"
-				&& element.type !== ElementType.Text
-				&& element.type !== ElementType.Comment
-				&& element.type !== ElementType.Directive
+				&& elementType !== ElementType.Text
+				&& elementType !== ElementType.Comment
+				&& elementType !== ElementType.Directive
 				){
 				this._elements.push({
 					  raw: "/" + element.name
 					, data: "/" + element.name
 					, name: "/" + element.name
-					, type: element.type
+					, type: elementType
 				});
 			}
 		}
@@ -315,6 +279,7 @@ Parser.prototype.parseTags = function(){
 	}
 	this._buffer = this._buffer.substring(this._current);
 	this._current = 0;
+	_reTags.lastIndex = 0;
 
 	this.writeHandler();
 };
@@ -324,9 +289,9 @@ Parser.prototype.getLocation = function(startTag){
 		l = this._location,
 		end = this._current,
 		chunk = startTag && l.charOffset === 0 && end === 0;
-	
+
 	if(startTag) end--;
-	
+
 	for (; l.charOffset < end; l.charOffset++){
 		c = this._buffer[l.charOffset];
 		if(c === '\n'){
@@ -342,7 +307,7 @@ Parser.prototype.getLocation = function(startTag){
 };
 
 //Checks the handler to make it is an object with the right "interface"
-Parser.prototype.validateHandler = function(handler){
+var validateHandler = function(handler){
 	if(typeof handler !== "object")
 		throw Error("Handler is not an object");
 	["reset", "done", "writeTag", "writeText", "writeComment", "writeDirective"].forEach(function(name){

From db4f6383b56921ad1e87d74004e3c54428e05531 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 21:48:13 +0200
Subject: [PATCH 023/450] Create element object later

---
 lib/Parser.js | 60 +++++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 26c01af..5f70387 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -138,25 +138,25 @@ var parseTagName = function(data){
 Parser.prototype.parseTags = function(){
 	var buffer = this._buffer, stack = this._tagStack, handler = this._handler;
 
-	var next, type, tagSep, rawData, element, elementName, prevElement, elementType;
+	var next, type, tagSep, rawData, element, elementName, prevElement, elementType, elementData, includeName;
 
 	while (_reTags.test(buffer)){
 		next = _reTags.lastIndex - 1;
 		tagSep = buffer.charAt(next); //The currently found tag marker
 		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
+		includeName = false;
 		elementType = this._parseState;
+		
+		if(elementType === ElementType.Text){
+			elementData = rawData;
+			elementName = "";
+		}
+		else{
+			elementData = rawData.trim();
+			elementName = parseTagName(elementData);
+		}
 		type = stack.slice(-1)[0];
 
-		//A new element to eventually be appended to the element list
-		element = {
-				raw: rawData
-			, data: (this._parseState === ElementType.Text) ? rawData : rawData.trim()
-			, type: this._parseState
-		};
-
-		if(this._parseState === ElementType.Tag) elementName = parseTagName(rawData);
-		else elementName = "";
-
 
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
@@ -172,8 +172,8 @@ Parser.prototype.parseTags = function(){
 			    prevElement = this._elements && this._elements[this._elements.length - 1];
 			    if(prevElement && prevElement.type === ElementType.Text){
 			    	prevElement.data = prevElement.raw += this._prevTagSep + rawData;
-			    	rawData = element.data = ""; //This causes the current element to not be added to the element list
-			    } else element.data = rawData; //The previous element was not text
+			    	rawData = elementData = ""; //This causes the current element to not be added to the element list
+			    } else elementData = rawData; //The previous element was not text
 			}
 		}
 		else if(type === ElementType.Comment){ //We're currently in a comment tag
@@ -185,7 +185,7 @@ Parser.prototype.parseTags = function(){
     			//If the previous element is a comment, append the current text to it
     			if(prevElement && prevElement.type === ElementType.Comment){ //Previous element was a comment
     				prevElement.raw = prevElement.data = (prevElement.raw + rawData).replace(_reTrimComment, "");
-    				rawData = element.data = ""; //This causes the current element to not be added to the element list
+    				rawData = elementData = ""; //This causes the current element to not be added to the element list
     				elementType = ElementType.Text;
     			}
     			else elementType = ElementType.Comment; //Change the current element's type to a comment
@@ -195,11 +195,11 @@ Parser.prototype.parseTags = function(){
     			//If the previous element is a comment, append the current text to it
     			if(prevElement && prevElement.type === ElementType.Comment){
     				prevElement.data = prevElement.raw += rawData + tagSep;
-    				rawData = element.data = ""; //This causes the current element to not be added to the element list
+    				rawData = elementData = ""; //This causes the current element to not be added to the element list
     				elementType = ElementType.Text;
     			}
     			else
-    				element.data = rawData += tagSep;
+    				elementData = rawData += tagSep;
     		}
     	}
 
@@ -211,49 +211,53 @@ Parser.prototype.parseTags = function(){
 				elementType = ElementType.Comment;
 				//Check if the comment is terminated in the current element
 				if(tagSep === ">" && rawData.substr(-2) === "--")
-					rawData = element.data = rawData.replace(_reTrimComment, "");
+					rawData = elementData = rawData.replace(_reTrimComment, "");
 				else { //It's not so push the comment onto the tag stack
 					rawData += tagSep;
 					stack.push(ElementType.Comment);
 				}
 			}
 			else {
-				element.name = elementName;
+				includeName = true;
 
 				if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
 					elementType = ElementType.Directive;
 					//TODO: what about CDATA?
 				}
 				else if(elementName.charAt(0) === "/"){
-					element.data = elementName;
+					elementData = elementName;
 					if(elementName === "/script") elementType = ElementType.Script;
 					else if(elementName === "/style") elementType = ElementType.Style;
 				}
 				else if(elementName === "script"){
 					elementType = ElementType.Script;
 					//Special tag, push onto the tag stack if not terminated
-					if(element.data.substr(-1) !== "/") stack.push(ElementType.Script);
+					if(elementData.substr(-1) !== "/") stack.push(ElementType.Script);
 				}
 				else if(elementName === "style"){
 					elementType = ElementType.Style;
 					//Special tag, push onto the tag stack if not terminated
-					if(element.data.substr(-1) !== "/") stack.push(ElementType.Style);
+					if(elementData.substr(-1) !== "/") stack.push(ElementType.Style);
 				}
 			}
 		}
 
 		//Add all tags and non-empty text elements to the element list
 		if(rawData !== "" || elementType !== ElementType.Text){
-			element.raw = rawData;
-			element.type = elementType;
-
-			if(this._options.includeLocation && !element.location){
-				element.location = this.getLocation(elementType === ElementType.Tag);
-			}
+			element = {
+				raw: rawData,
+				data: elementData,
+				type: elementType
+			};
+			
+			if(includeName) element.name = elementName;
+			if(this._options.includeLocation) element.location = this.getLocation(elementType === ElementType.Tag);
+			
 			parseAttribs(element);
 			this._elements.push(element);
+
 			//If tag self-terminates, add an explicit, separate closing tag
-			if(    element.data.substr(-1) === "/"
+			if( elementData.substr(-1) === "/"
 				&& elementType !== ElementType.Text
 				&& elementType !== ElementType.Comment
 				&& elementType !== ElementType.Directive

From 7529828f7affe34591f5b3e55a56c2a42c0e41b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 22:05:22 +0200
Subject: [PATCH 024/450] Prepared big changes

The element stack will be removed, a new event structure will be
introduced
---
 lib/Parser.js | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 5f70387..b02a26f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -255,17 +255,38 @@ Parser.prototype.parseTags = function(){
 			
 			parseAttribs(element);
 			this._elements.push(element);
+			
+			/*
+			switch(elementType){
+				case ElementType.Text:
+					this._handler.ontext(element);
+				case ElementType.Tag:
+				case ElementType.Style:
+				case ElementType.Script:
+					if(elementName[0] === "/") this._handler.onclosetag(elementName.substr(1));
+					else this._handler.onopentag(element);
+					break;
+				case ElementType.Comment:
+					this._handler.oncomment(element);
+					break;
+				case ElementType.Directive:
+					this._handler.onprocessinginstruction;
+					break;
+				default: throw Error("Unsupported type: " + elementType);
+			}
+			*/
 
 			//If tag self-terminates, add an explicit, separate closing tag
 			if( elementData.substr(-1) === "/"
 				&& elementType !== ElementType.Text
 				&& elementType !== ElementType.Comment
 				&& elementType !== ElementType.Directive
-				){
+			){
+				//this._handler.onclosetag(elementName);
 				this._elements.push({
-					  raw: "/" + element.name
-					, data: "/" + element.name
-					, name: "/" + element.name
+					  raw: "/" + elementName
+					, data: "/" + elementName
+					, name: "/" + elementName
 					, type: elementType
 				});
 			}

From ce7636e9a4b8575e2eea9c094892d30d18e2862c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 22:06:06 +0200
Subject: [PATCH 025/450] Some steps preparing for the upcoming changes

---
 lib/EventedHandler.js | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index 1787e43..a64bf75 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -10,8 +10,8 @@ var EventedHandler = function(cbs){
 	this.error = cbs.onerror; //if nothing was set, the error is thrown
 	
 	//functions to be called within writeTag
-	this.onOpenTag = openTagCB(cbs.onopentag, cbs.onattribute);
-	this.onCloseTag = cbs.onclosetag || emptyFunction;
+	this.onopentag = openTagCB(cbs.onopentag, cbs.onattribute);
+	this.onclosetag = cbs.onclosetag || emptyFunction;
 	
 	//privates
 	this._stack = [];
@@ -49,16 +49,16 @@ EventedHandler.prototype.writeTag = function(element){
 			var i = this._stack.length - 1;
 			while(i !== -1 && this._stack[i--].name !== name){}
 			if( (i+=1) !== 0)
-				while(i < this._stack.length) this.onCloseTag(this._stack.pop().name);
+				while(i < this._stack.length) this.onclosetag(this._stack.pop().name);
 		}
 		else if(name === "br"){ //special case for <br>s
-			this.onOpenTag(name, attributes);
-			this.onCloseTag(name);
+			this.onopentag(name, attributes);
+			this.onclosetag(name);
 		}
 	}
 	else{
-		this.onOpenTag(name, attributes);
-		if(empty) this.onCloseTag(name);
+		this.onopentag(name, attributes);
+		if(empty) this.onclosetag(name);
 		else this._stack.push(element);
 	}
 };

From f22f65d9627c35aecc06bcc749b3718acf5531e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 22:19:26 +0200
Subject: [PATCH 026/450] Removed regexp for comment trimming

use String#slice instead
---
 lib/Parser.js | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index b02a26f..4552cd9 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,7 +1,7 @@
 var ElementType = require("./ElementType.js");
 
 function Parser (handler, options){
-	this._options = options ? options : {
+	this._options = options || {
 		includeLocation: false, //Do not track element position in document by default
 		xmlMode: false //Special behaviour for script/style tags by default
 	};
@@ -10,7 +10,9 @@ function Parser (handler, options){
 	this._handler = handler;
 
 	this._buffer = "";
+	this._prevTagSep = "";
 	this._done = false;
+	this._tagStack = [];
 	this._elements = [];
 	this._current = 0;
 	this._location = {
@@ -20,13 +22,10 @@ function Parser (handler, options){
 		, inBuffer: 0
 	};
 	this._parseState = ElementType.Text;
-	this._prevTagSep = '';
-	this._tagStack = [];
 }
 
 //**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reTrimComment = /(^\!--|--$)/g; //Remove comment tag markup from comment contents
 var _reWhitespace = /\s/; //Used to find any whitespace to split on
 var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
 
@@ -182,9 +181,10 @@ Parser.prototype.parseTags = function(){
 
     		if(rawData.substr(-2) === "--" && tagSep === ">"){
     			stack.pop();
+    			rawData = rawData.slice(0, -2);
     			//If the previous element is a comment, append the current text to it
     			if(prevElement && prevElement.type === ElementType.Comment){ //Previous element was a comment
-    				prevElement.raw = prevElement.data = (prevElement.raw + rawData).replace(_reTrimComment, "");
+    				prevElement.data = prevElement.raw += rawData;
     				rawData = elementData = ""; //This causes the current element to not be added to the element list
     				elementType = ElementType.Text;
     			}
@@ -209,9 +209,10 @@ Parser.prototype.parseTags = function(){
 		if(elementType === ElementType.Tag){
 			if(rawData.substring(0, 3) === "!--"){ //This tag is really comment
 				elementType = ElementType.Comment;
+				rawData = rawData.substr(3);
 				//Check if the comment is terminated in the current element
 				if(tagSep === ">" && rawData.substr(-2) === "--")
-					rawData = elementData = rawData.replace(_reTrimComment, "");
+					elementData = rawData = rawData.slice(0, -2);
 				else { //It's not so push the comment onto the tag stack
 					rawData += tagSep;
 					stack.push(ElementType.Comment);

From b6937eacfb59e0ec62b52c2c375940344a1a14c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 22:46:16 +0200
Subject: [PATCH 027/450] Deleted browser related content in tests

---
 tests/01-basic.js                     | 29 -------------------------
 tests/02-single_tag_1.js              | 31 +--------------------------
 tests/03-single_tag_2.js              | 31 +--------------------------
 tests/04-unescaped_in_script.js       | 31 +--------------------------
 tests/05-tags_in_comment.js           | 31 +--------------------------
 tests/06-comment_in_script.js         | 31 +--------------------------
 tests/07-unescaped_in_style.js        | 31 +--------------------------
 tests/08-extra_spaces_in_tag.js       | 29 -------------------------
 tests/09-unquoted_attrib.js           | 31 +--------------------------
 tests/10-singular_attribute.js        | 31 +--------------------------
 tests/11-text_outside_tags.js         | 31 +--------------------------
 tests/12-text_only.js                 | 31 +--------------------------
 tests/13-comment_in_text.js           | 31 +--------------------------
 tests/14-comment_in_text_in_script.js | 31 +--------------------------
 tests/15-non-verbose.js               | 31 +--------------------------
 tests/16-ignore_whitespace.js         | 31 +--------------------------
 tests/17-xml_namespace.js             | 31 +--------------------------
 tests/18-enforce_empty_tags.js        | 31 +--------------------------
 tests/19-ignore_empty_tags.js         | 31 +--------------------------
 tests/20-rss.js                       | 31 +--------------------------
 tests/21-atom.js                      | 31 +--------------------------
 tests/22-position_data.js             | 31 +--------------------------
 22 files changed, 20 insertions(+), 658 deletions(-)

diff --git a/tests/01-basic.js b/tests/01-basic.js
index 7846898..4f1ce6b 100644
--- a/tests/01-basic.js
+++ b/tests/01-basic.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Basic test";
 exports.options = {
 	  handler: {}
@@ -57,5 +30,3 @@ exports.expected =
 		     ]
 		  }
 		];
-
-})();
diff --git a/tests/02-single_tag_1.js b/tests/02-single_tag_1.js
index 1735b5e..0180f55 100644
--- a/tests/02-single_tag_1.js
+++ b/tests/02-single_tag_1.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Single Tag 1";
 exports.options = {
 	  handler: {}
@@ -34,6 +7,4 @@ exports.html = "<br>text</br>";
 exports.expected =
 	[ { raw: 'br', data: 'br', type: 'tag', name: 'br' }
 	, { raw: 'text', data: 'text', type: 'text' }
-	];
-
-})();
+	];
\ No newline at end of file
diff --git a/tests/03-single_tag_2.js b/tests/03-single_tag_2.js
index 2e6e92c..9363dda 100644
--- a/tests/03-single_tag_2.js
+++ b/tests/03-single_tag_2.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Single Tag 2";
 exports.options = {
 	  handler: {}
@@ -35,6 +8,4 @@ exports.expected =
 	[ { raw: 'br', data: 'br', type: 'tag', name: 'br' }
 	, { raw: 'text', data: 'text', type: 'text' }
 	, { raw: 'br', data: 'br', type: 'tag', name: 'br' }
-	];
-
-})();
+	];
\ No newline at end of file
diff --git a/tests/04-unescaped_in_script.js b/tests/04-unescaped_in_script.js
index fb2cc3a..8f0bc3a 100644
--- a/tests/04-unescaped_in_script.js
+++ b/tests/04-unescaped_in_script.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Unescaped chars in script";
 exports.options = {
 	  handler: {}
@@ -51,6 +24,4 @@ exports.expected =
        }
      ]
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/05-tags_in_comment.js b/tests/05-tags_in_comment.js
index 68a0779..9f66f6b 100644
--- a/tests/05-tags_in_comment.js
+++ b/tests/05-tags_in_comment.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Special char in comment";
 exports.options = {
 	  handler: {}
@@ -43,6 +16,4 @@ exports.expected =
        }
      ]
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/06-comment_in_script.js b/tests/06-comment_in_script.js
index 2d04ec0..af8468a 100644
--- a/tests/06-comment_in_script.js
+++ b/tests/06-comment_in_script.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Script source in comment";
 exports.options = {
 	  handler: {}
@@ -43,6 +16,4 @@ exports.expected =
        }
      ]
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/07-unescaped_in_style.js b/tests/07-unescaped_in_style.js
index 563a64a..c5817fc 100644
--- a/tests/07-unescaped_in_style.js
+++ b/tests/07-unescaped_in_style.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Unescaped chars in style";
 exports.options = {
 	  handler: {}
@@ -44,6 +17,4 @@ exports.expected =
        }
      ]
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/08-extra_spaces_in_tag.js b/tests/08-extra_spaces_in_tag.js
index 1767565..5c85bed 100644
--- a/tests/08-extra_spaces_in_tag.js
+++ b/tests/08-extra_spaces_in_tag.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Extra spaces in tag";
 exports.options = {
 	  handler: {}
@@ -45,5 +18,3 @@ exports.expected =
      ]
   }
 ];
-
-})();
diff --git a/tests/09-unquoted_attrib.js b/tests/09-unquoted_attrib.js
index da6bac7..d448a54 100644
--- a/tests/09-unquoted_attrib.js
+++ b/tests/09-unquoted_attrib.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Unquoted attributes";
 exports.options = {
 	  handler: {}
@@ -44,6 +17,4 @@ exports.expected =
        }
      ]
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/10-singular_attribute.js b/tests/10-singular_attribute.js
index 6c22e1a..d749b94 100644
--- a/tests/10-singular_attribute.js
+++ b/tests/10-singular_attribute.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Singular attribute";
 exports.options = {
 	  handler: {}
@@ -38,6 +11,4 @@ exports.expected =
   , name: 'option'
   , attribs: { value: 'foo', selected: 'selected' }
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/11-text_outside_tags.js b/tests/11-text_outside_tags.js
index ae63136..ae40c76 100644
--- a/tests/11-text_outside_tags.js
+++ b/tests/11-text_outside_tags.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Text outside tags";
 exports.options = {
 	  handler: {}
@@ -45,6 +18,4 @@ exports.expected =
   , data: '\nline two'
   , type: 'text'
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/12-text_only.js b/tests/12-text_only.js
index 64fab9e..9612840 100644
--- a/tests/12-text_only.js
+++ b/tests/12-text_only.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Only text";
 exports.options = {
 	  handler: {}
@@ -36,6 +9,4 @@ exports.expected =
   , data: 'this is the text'
   , type: 'text'
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/13-comment_in_text.js b/tests/13-comment_in_text.js
index e201ef6..c40d891 100644
--- a/tests/13-comment_in_text.js
+++ b/tests/13-comment_in_text.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Comment within text";
 exports.options = {
 	  handler: {}
@@ -44,6 +17,4 @@ exports.expected =
   , data: ' the text'
   , type: 'text'
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/14-comment_in_text_in_script.js b/tests/14-comment_in_text_in_script.js
index 215a02e..8534610 100644
--- a/tests/14-comment_in_text_in_script.js
+++ b/tests/14-comment_in_text_in_script.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Comment within text within script";
 exports.options = {
 	  handler: {}
@@ -52,6 +25,4 @@ exports.expected =
 
      ]
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/15-non-verbose.js b/tests/15-non-verbose.js
index 829fce4..9d5a30f 100644
--- a/tests/15-non-verbose.js
+++ b/tests/15-non-verbose.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Option 'verbose' set to 'false'";
 exports.options = {
 	  handler: { verbose: false }
@@ -41,6 +14,4 @@ exports.expected =
        }
      ]
   }
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/16-ignore_whitespace.js b/tests/16-ignore_whitespace.js
index 68f4439..beb0f34 100644
--- a/tests/16-ignore_whitespace.js
+++ b/tests/16-ignore_whitespace.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Options 'ignoreWhitespace' set to 'true'";
 exports.options = {
 	  handler: { ignoreWhitespace: true }
@@ -66,6 +39,4 @@ exports.expected =
   }
 	  ]
 	}
-];
-
-})();
+];
\ No newline at end of file
diff --git a/tests/17-xml_namespace.js b/tests/17-xml_namespace.js
index 562f26b..a2c0d1f 100644
--- a/tests/17-xml_namespace.js
+++ b/tests/17-xml_namespace.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "XML Namespace";
 exports.options = {
 	  handler: {}
@@ -33,6 +6,4 @@ exports.options = {
 exports.html = "<ns:tag>text</ns:tag>";
 exports.expected =
 	[ { raw: 'ns:tag', data: 'ns:tag', type: 'tag', name: 'ns:tag', children: [ { raw: 'text', data: 'text', type: 'text' } ] }
-	];
-
-})();
+	];
\ No newline at end of file
diff --git a/tests/18-enforce_empty_tags.js b/tests/18-enforce_empty_tags.js
index 3ea3757..01af3e3 100644
--- a/tests/18-enforce_empty_tags.js
+++ b/tests/18-enforce_empty_tags.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Enforce empty tags";
 exports.options = {
 	  handler: {}
@@ -35,6 +8,4 @@ exports.expected =
 	[
 		  { raw: 'link', data: 'link', type: 'tag', name: 'link' }
 		, { raw: 'text', data: 'text', type: 'text' }
-	];
-
-})();
+	];
\ No newline at end of file
diff --git a/tests/19-ignore_empty_tags.js b/tests/19-ignore_empty_tags.js
index 4f47a59..abb508f 100644
--- a/tests/19-ignore_empty_tags.js
+++ b/tests/19-ignore_empty_tags.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Ignore empty tags";
 exports.options = {
 	  handler: { enforceEmptyTags: false }
@@ -36,6 +9,4 @@ exports.expected =
 		  { raw: 'link', data: 'link', type: 'tag', name: 'link', children: [
 		  	{ raw: 'text', data: 'text', type: 'text' }
 		  ] }
-	];
-
-})();
+	];
\ No newline at end of file
diff --git a/tests/20-rss.js b/tests/20-rss.js
index 52442d6..8179702 100644
--- a/tests/20-rss.js
+++ b/tests/20-rss.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "RSS (2.0)";
 exports.options = {
 	  handler: {}
@@ -115,6 +88,4 @@ exports.expected = {
 			, pubDate: new Date("Tue, 20 May 2003 08:56:02 GMT")
 			}
 		]
-	};
-
-})();
+	};
\ No newline at end of file
diff --git a/tests/21-atom.js b/tests/21-atom.js
index 4d8c279..f78fdd2 100644
--- a/tests/21-atom.js
+++ b/tests/21-atom.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Atom (1.0)";
 exports.options = {
 	  handler: {}
@@ -75,6 +48,4 @@ exports.expected = {
 			, pubDate: new Date("2003-12-13T18:30:02Z")
 			}
 		]
-	};
-
-})();
+	};
\ No newline at end of file
diff --git a/tests/22-position_data.js b/tests/22-position_data.js
index fcd7c90..b9fecb6 100644
--- a/tests/22-position_data.js
+++ b/tests/22-position_data.js
@@ -1,30 +1,3 @@
-(function () {
-
-function RunningInNode () {
-	return(
-		(typeof require) == "function"
-		&&
-		(typeof exports) == "object"
-		&&
-		(typeof module) == "object"
-		&&
-		(typeof __filename) == "string"
-		&&
-		(typeof __dirname) == "string"
-		);
-}
-
-if (!RunningInNode()) {
-	if (!this.Tautologistics)
-		this.Tautologistics = {};
-	if (!this.Tautologistics.NodeHtmlParser)
-		this.Tautologistics.NodeHtmlParser = {};
-	if (!this.Tautologistics.NodeHtmlParser.Tests)
-		this.Tautologistics.NodeHtmlParser.Tests = [];
-	exports = {};
-	this.Tautologistics.NodeHtmlParser.Tests.push(exports);
-}
-
 exports.name = "Postion data";
 exports.options = {
 	  handler: {}
@@ -95,6 +68,4 @@ exports.expected = [
 			}
 		}]
 	}
-	];
-
-})();
+	];
\ No newline at end of file

From dc5fe9c9ae212a09ea31948499d8c31672042191 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 22:47:06 +0200
Subject: [PATCH 028/450] Added a test

https://github.com/tautologistics/node-htmlparser/issues/29
---
 tests/23-template_script_tags.js | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 tests/23-template_script_tags.js

diff --git a/tests/23-template_script_tags.js b/tests/23-template_script_tags.js
new file mode 100644
index 0000000..fb63af1
--- /dev/null
+++ b/tests/23-template_script_tags.js
@@ -0,0 +1,15 @@
+exports.name = "Template script tags";
+exports.options = {
+	  handler: {}
+	, parser: {}
+};
+exports.html = "<script type=\"text/template\"> <h1>Heading1</h1></script>";
+exports.expected = [ { raw: 'script type="text/template"',
+    data: 'script type="text/template"',
+    type: 'script',
+    name: 'script',
+    attribs: { type: 'text/template' },
+    children: 
+     [ { raw: '<h1>Heading1</h1>',
+         data: '<h1>Heading1</h1>',
+         type: 'text' } ] } ];
\ No newline at end of file

From 5c7ec40364f86ecc822aa2e9d89826d874595fa3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 25 Oct 2011 22:48:48 +0200
Subject: [PATCH 029/450] Fixed test 23

---
 tests/23-template_script_tags.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/23-template_script_tags.js b/tests/23-template_script_tags.js
index fb63af1..6e256d2 100644
--- a/tests/23-template_script_tags.js
+++ b/tests/23-template_script_tags.js
@@ -10,6 +10,6 @@ exports.expected = [ { raw: 'script type="text/template"',
     name: 'script',
     attribs: { type: 'text/template' },
     children: 
-     [ { raw: '<h1>Heading1</h1>',
-         data: '<h1>Heading1</h1>',
+     [ { raw: ' <h1>Heading1</h1>',
+         data: ' <h1>Heading1</h1>',
          type: 'text' } ] } ];
\ No newline at end of file

From bf7f439bfee88906e2bea08a0a54564cabb01d78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 26 Oct 2011 10:11:53 +0200
Subject: [PATCH 030/450] Removed trash in runtests.js

---
 tests/00-runtests.js | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index f20464c..486d69c 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -1,24 +1,3 @@
-/***********************************************
-Copyright 2010, Chris Winberry <chris@winberry.net>. All rights reserved.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to
-deal in the Software without restriction, including without limitation the
-rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-sell copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
- 
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
- 
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-IN THE SOFTWARE.
-***********************************************/
-
 var sys = require("sys");
 var fs = require("fs");
 var htmlparser = require("../lib/htmlparser");
@@ -74,6 +53,6 @@ for (var i = 1; i < testFiles.length; i++) {
 		sys.puts(sys.inspect(test.expected, false, null));
 	}
 }
+sys.puts("Total time: " + totalTime);
 sys.puts("Total tests: " + testCount);
 sys.puts("Failed tests: " + failedCount);
-sys.puts("Total time: " + totalTime);

From 6051bf19fd240db54e2ebbaf0ec2317d0c1fe271 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 26 Oct 2011 10:13:39 +0200
Subject: [PATCH 031/450] Replaced _reTags with indexOf

---
 lib/Parser.js | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 4552cd9..c58ed57 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -32,7 +32,6 @@ var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an el
 //Regular expressions used for parsing (stateful)
 var _reAttrib = //Find attributes in a tag
 	/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
-var _reTags = /[<>]/g; //Find tag markers
 
 var tagTypes = {};
 tagTypes[ ElementType.Script ] = true;
@@ -137,13 +136,20 @@ var parseTagName = function(data){
 Parser.prototype.parseTags = function(){
 	var buffer = this._buffer, stack = this._tagStack, handler = this._handler;
 
-	var next, type, tagSep, rawData, element, elementName, prevElement, elementType, elementData, includeName;
+	var next, type, tagSep, rawData, element, elementName, prevElement, elementType, elementData, includeName = false;
+	
+	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
-	while (_reTags.test(buffer)){
-		next = _reTags.lastIndex - 1;
-		tagSep = buffer.charAt(next); //The currently found tag marker
+	while(opening !== -1 || closing !== -1){
+		if(closing === -1 || (opening !== -1 && opening < closing)){
+			next = opening;
+			opening = buffer.indexOf(tagSep = "<", next + 1);
+		}
+		else{
+			next = closing;
+			closing = buffer.indexOf(tagSep = ">", next + 1);
+		}
 		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
-		includeName = false;
 		elementType = this._parseState;
 		
 		if(elementType === ElementType.Text){
@@ -198,8 +204,7 @@ Parser.prototype.parseTags = function(){
     				rawData = elementData = ""; //This causes the current element to not be added to the element list
     				elementType = ElementType.Text;
     			}
-    			else
-    				elementData = rawData += tagSep;
+    			else elementData = rawData += tagSep;
     		}
     	}
 
@@ -251,7 +256,10 @@ Parser.prototype.parseTags = function(){
 				type: elementType
 			};
 			
-			if(includeName) element.name = elementName;
+			if(includeName){
+				element.name = elementName;
+				includeName = false;
+			}
 			if(this._options.includeLocation) element.location = this.getLocation(elementType === ElementType.Tag);
 			
 			parseAttribs(element);
@@ -278,10 +286,10 @@ Parser.prototype.parseTags = function(){
 			*/
 
 			//If tag self-terminates, add an explicit, separate closing tag
-			if( elementData.substr(-1) === "/"
-				&& elementType !== ElementType.Text
+			if( elementType !== ElementType.Text
 				&& elementType !== ElementType.Comment
 				&& elementType !== ElementType.Directive
+				&& elementData.substr(-1) === "/"
 			){
 				//this._handler.onclosetag(elementName);
 				this._elements.push({
@@ -305,7 +313,6 @@ Parser.prototype.parseTags = function(){
 	}
 	this._buffer = this._buffer.substring(this._current);
 	this._current = 0;
-	_reTags.lastIndex = 0;
 
 	this.writeHandler();
 };

From b51764486672bf81ca8ec766ab4405c33aea0bbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 26 Oct 2011 15:43:51 +0200
Subject: [PATCH 032/450] Added doctype to first test

---
 tests/01-basic.js | 50 ++++++++++++++++++++++-------------------------
 1 file changed, 23 insertions(+), 27 deletions(-)

diff --git a/tests/01-basic.js b/tests/01-basic.js
index 4f1ce6b..2901583 100644
--- a/tests/01-basic.js
+++ b/tests/01-basic.js
@@ -3,30 +3,26 @@ exports.options = {
 	  handler: {}
 	, parser: {}
 };
-exports.html = "<html><title>The Title</title><body>Hello world</body></html>";
-exports.expected =
-	[ { raw: 'html'
-		  , data: 'html'
-		  , type: 'tag'
-		  , name: 'html'
-		  , children: 
-		     [ { raw: 'title'
-		       , data: 'title'
-		       , type: 'tag'
-		       , name: 'title'
-		       , children: [ { raw: 'The Title', data: 'The Title', type: 'text' } ]
-		       }
-		     , { raw: 'body'
-		       , data: 'body'
-		       , type: 'tag'
-		       , name: 'body'
-		       , children: 
-		          [ { raw: 'Hello world'
-		            , data: 'Hello world'
-		            , type: 'text'
-		            }
-		          ]
-		       }
-		     ]
-		  }
-		];
+exports.html = "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>";
+exports.expected = [ { raw: '!DOCTYPE html',
+    data: '!DOCTYPE html',
+    type: 'directive',
+    name: '!DOCTYPE' },
+  { raw: 'html',
+    data: 'html',
+    type: 'tag',
+    name: 'html',
+    children: 
+     [ { raw: 'title',
+         data: 'title',
+         type: 'tag',
+         name: 'title',
+         children: [ { raw: 'The Title', data: 'The Title', type: 'text' } ] },
+       { raw: 'body',
+         data: 'body',
+         type: 'tag',
+         name: 'body',
+         children: 
+          [ { raw: 'Hello world',
+              data: 'Hello world',
+              type: 'text' } ] } ] } ];

From bcf8aeb3d4ce3555c81be709dc935e88ab71a357 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 26 Oct 2011 16:13:20 +0200
Subject: [PATCH 033/450] Removed char loop from Parser#getLocation

Use String#split & String#replace instead
---
 lib/Parser.js | 50 ++++++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index c58ed57..5377291 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -16,10 +16,10 @@ function Parser (handler, options){
 	this._elements = [];
 	this._current = 0;
 	this._location = {
-		 row: 0
-		, col: 0
-		, charOffset: 0
-		, inBuffer: 0
+		row: 0,
+		col: 0,
+		charOffset: 0,
+		inBuffer: 0
 	};
 	this._parseState = ElementType.Text;
 }
@@ -28,10 +28,10 @@ function Parser (handler, options){
 //Regular expressions used for cleaning up and parsing (stateless)
 var _reWhitespace = /\s/; //Used to find any whitespace to split on
 var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
+var _reRow = RegExp("\r","g");
 
-//Regular expressions used for parsing (stateful)
-var _reAttrib = //Find attributes in a tag
-	/([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
+//Find attributes in a tag
+var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
 
 var tagTypes = {};
 tagTypes[ ElementType.Script ] = true;
@@ -49,8 +49,7 @@ Parser.prototype.parseComplete = function(data){
 
 //Parses a piece of an HTML document
 Parser.prototype.parseChunk = function(data){
-	if(this._done)
-		this.handleError(Error("Attempted to parse chunk after parsing already done"));
+	if(this._done) this.handleError(Error("Attempted to parse chunk after parsing already done"));
 	this._buffer += data; //FIXME: this can be a bottleneck
 	this.parseTags();
 };
@@ -318,24 +317,27 @@ Parser.prototype.parseTags = function(){
 };
 
 Parser.prototype.getLocation = function(startTag){
-	var c,
-		l = this._location,
+	var c, end, chunk,
+		l = this._location;
+	if(startTag){
+		end = this._current-1,
+		chunk = l.charOffset === 0 && end === -1;
+	} else {
 		end = this._current,
-		chunk = startTag && l.charOffset === 0 && end === 0;
-
-	if(startTag) end--;
-
-	for (; l.charOffset < end; l.charOffset++){
-		c = this._buffer[l.charOffset];
-		if(c === '\n'){
-			l.inBuffer++;
-			l.col = 0;
-		} else if(c !== '\r')
-			l.col++;
+		chunk = false;
 	}
+	
+	var str = this._buffer.substring(l.charOffset, l.charOffset = end);
+	var rows = str.split("\n"),
+		rowNum = rows.length - 1;
+	
+	l.inBuffer += rowNum;
+	if(rowNum !== 0) l.col = rows[rowNum].replace(_reRow,"").length;
+	else l.col += str.replace(_reRow,"").length;
+	
 	return {
-		 line: l.row + l.inBuffer + 1
-		, col: l.col + (chunk ? 0: 1)
+		line: l.row + l.inBuffer + 1,
+		col: l.col + (chunk ? 0: 1)
 	};
 };
 

From 8175f9ea506543812a7a028295158874fd6a6cf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 18:34:11 +0200
Subject: [PATCH 034/450] Renamed htmlparser.js to index.js

The old name was misleading.
---
 lib/{htmlparser.js => index.js} | 0
 package.json                    | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename lib/{htmlparser.js => index.js} (100%)

diff --git a/lib/htmlparser.js b/lib/index.js
similarity index 100%
rename from lib/htmlparser.js
rename to lib/index.js
diff --git a/package.json b/package.json
index e3950ab..c5e6fa3 100644
--- a/package.json
+++ b/package.json
@@ -13,7 +13,7 @@
 		, "url": "http://github.com/fb55/node-htmlparser/issues"
 	}
 	, "directories": { "lib": "./lib/" }
-	, "main": "./lib/htmlparser"
+	, "main": "./lib/"
 	, "engines": { "node": ">0" }
 	, "licenses": [{
 		  "type": "MIT"

From e614ee0509767e65620fc726520a6020a0891acf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 18:35:50 +0200
Subject: [PATCH 035/450] Don't allow <tagname <tagname

---
 lib/Parser.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 5377291..3942130 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -60,7 +60,7 @@ Parser.prototype.done = function(){
 	this._done = true;
 
 	//Push any unparsed text into a final element in the element list
-	if(this._buffer.length){
+	if(this._buffer){
 		var rawData = this._buffer;
 		this._buffer = "";
 		var element = {
@@ -151,7 +151,7 @@ Parser.prototype.parseTags = function(){
 		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
 		elementType = this._parseState;
 		
-		if(elementType === ElementType.Text){
+		if(elementType === ElementType.Text || tagSep === "<"){
 			elementData = rawData;
 			elementName = "";
 		}

From 59292dc72f6d35308ffc3df573672e05e07e2b07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 20:02:24 +0200
Subject: [PATCH 036/450] corrected reference in runtests

---
 tests/00-runtests.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 486d69c..071bf64 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -1,6 +1,6 @@
 var sys = require("sys");
 var fs = require("fs");
-var htmlparser = require("../lib/htmlparser");
+var htmlparser = require("..");
 
 var testFolder = ".";
 var chunkSize = 5;

From cb0ab4ba812f8f64d43b2fb1b044302338519708 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 20:02:59 +0200
Subject: [PATCH 037/450] Small changes to Parser#getLocation

---
 lib/Parser.js | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 3942130..7bab01f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -327,13 +327,14 @@ Parser.prototype.getLocation = function(startTag){
 		chunk = false;
 	}
 	
-	var str = this._buffer.substring(l.charOffset, l.charOffset = end);
-	var rows = str.split("\n"),
+	var rows = this._buffer.substring(l.charOffset, l.charOffset = end).split("\n"),
 		rowNum = rows.length - 1;
 	
 	l.inBuffer += rowNum;
-	if(rowNum !== 0) l.col = rows[rowNum].replace(_reRow,"").length;
-	else l.col += str.replace(_reRow,"").length;
+	
+	var num = rows[rowNum].replace(_reRow,"").length;
+	if(rowNum == 0) l.col += num;
+	else l.col = num;
 	
 	return {
 		line: l.row + l.inBuffer + 1,

From a997bc0715651c925aecca8aa721e78fd0176c53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 20:07:17 +0200
Subject: [PATCH 038/450] Updated readme

---
 README.md | 99 ++++++++++++++++++++-----------------------------------
 1 file changed, 36 insertions(+), 63 deletions(-)

diff --git a/README.md b/README.md
index 52467ba..edd9028 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,15 @@
 #NodeHtmlParser
-A forgiving HTML/XML/RSS parser written in JS for both the browser and NodeJS (yes, despite the name it works just fine in any modern browser). The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
+A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
 
 ##Installing
-
-	npm install htmlparser
+	`npm install htmlparser`
 
 ##Running Tests
+	`node tests/00-runtests.js`
 
-###Run tests under node:
-	node runtests.js
-
-###Run tests in browser:
-View runtests.html in any browser
-
-##Usage In Node
+##Usage
 	var htmlparser = require("htmlparser");
-	var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< /  script><!--<!-- Waah! -- -->";
+	var rawHtml = "Xyz &lt;script language= javascript>var foo = '&lt;&lt;bar>>';&lt; /  script>&lt;!--&lt;!-- Waah! -- -->";
 	var handler = new htmlparser.DefaultHandler(function (error, dom) {
 		if (error)
 			[...do something for errors...]
@@ -26,16 +20,6 @@ View runtests.html in any browser
 	parser.parseComplete(rawHtml);
 	sys.puts(sys.inspect(handler.dom, false, null));
 
-##Usage In Browser
-	var handler = new Tautologistics.NodeHtmlParser.DefaultHandler(function (error, dom) {
-		if (error)
-			[...do something for errors...]
-		else
-			[...parsing done, do something...]
-	});
-	var parser = new Tautologistics.NodeHtmlParser.Parser(handler);
-	parser.parseComplete(document.body.innerHTML);
-	alert(JSON.stringify(handler.dom, null, 2));
 
 ##Example output
 	[ { raw: 'Xyz ', data: 'Xyz ', type: 'text' }
@@ -45,47 +29,47 @@ View runtests.html in any browser
 	  , name: 'script'
 	  , attribs: { language: 'javascript' }
 	  , children: 
-	     [ { raw: 'var foo = \'<bar>\';<'
-	       , data: 'var foo = \'<bar>\';<'
+	     [ { raw: 'var foo = \'&lt;bar>\';&lt;'
+	       , data: 'var foo = \'&lt;bar>\';&lt;'
 	       , type: 'text'
 	       }
 	     ]
 	  }
-	, { raw: '<!-- Waah! -- '
-	  , data: '<!-- Waah! -- '
+	, { raw: '&lt;!-- Waah! -- '
+	  , data: '&lt;!-- Waah! -- '
 	  , type: 'comment'
 	  }
 	]
 
 ##Streaming To Parser
-	while (...) {
+	`while (...) {
 		...
 		parser.parseChunk(chunk);
 	}
-	parser.done();	
+	parser.done();`
 
 ##Parsing RSS/Atom Feeds
 
-	new htmlparser.RssHandler(function (error, dom) {
+	`new htmlparser.RssHandler(function (error, dom) {
 		...
-	});
+	});`
 
 ##DefaultHandler Options
 
 ###Usage
-	var handler = new htmlparser.DefaultHandler(
+	`var handler = new htmlparser.DefaultHandler(
 		  function (error) { ... }
 		, { verbose: false, ignoreWhitespace: true }
-		);
+		);`
 	
 ###Option: ignoreWhitespace
 Indicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is "false".
 
 ####Example: true
 The following HTML:
-	<font>
-		<br>this is the text
-	<font>
+	`&lt;font>
+		&lt;br>this is the text
+	&lt;font>`
 becomes:
 	[ { raw: 'font'
 	  , data: 'font'
@@ -104,11 +88,11 @@ becomes:
 
 ####Example: false
 The following HTML:
-	<font>
-		<br>this is the text
-	<font>
+	`&lt;font>
+		&lt;br>this is the text
+	&lt;font>`
 becomes:
-	[ { raw: 'font'
+	`[ { raw: 'font'
 	  , data: 'font'
 	  , type: 'tag'
 	  , name: 'font'
@@ -122,65 +106,54 @@ becomes:
 	     , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
 	     ]
 	  }
-	]
+	]`
 
 ###Option: verbose
-Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "<" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true". 
+Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "&lt;" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true". 
 
 ####Example: true
 The following HTML:
-	<a href="test.html">xxx</a>
+	`&lt;a href="test.html">xxx&lt;/a>`
 becomes:
-	[ { raw: 'a href="test.html"'
+	`[ { raw: 'a href="test.html"'
 	  , data: 'a href="test.html"'
 	  , type: 'tag'
 	  , name: 'a'
 	  , attribs: { href: 'test.html' }
 	  , children: [ { raw: 'xxx', data: 'xxx', type: 'text' } ]
 	  }
-	]
+	]`
 
 ####Example: false
 The following HTML:
-	<a href="test.html">xxx</a>
+	`&lt;a href="test.html">xxx&lt;/a>`
 becomes:
-	[ { type: 'tag'
+	`[ { type: 'tag'
 	  , name: 'a'
 	  , attribs: { href: 'test.html' }
 	  , children: [ { data: 'xxx', type: 'text' } ]
 	  }
-	]
+	]`
 
 ###Option: enforceEmptyTags
 Indicates whether the DOM should prevent children on tags marked as empty in the HTML spec. Typically this should be set to "true" HTML parsing and "false" for XML parsing. The default value is "true".
 
 ####Example: true
 The following HTML:
-	<link>text</link>
+	`&lt;link>text&lt;/link>`
 becomes:
-	[ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
+	`[ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
 	, { raw: 'text', data: 'text', type: 'text' }
-	]
+	]`
 
 ####Example: false
 The following HTML:
-	<link>text</link>
+	`&lt;link>text&lt;/link>`
 becomes:
-	[ { raw: 'link'
+	`[ { raw: 'link'
 	  , data: 'link'
 	  , type: 'tag'
 	  , name: 'link'
 	  , children: [ { raw: 'text', data: 'text', type: 'text' } ]
 	  }
-	]
-
-##DomUtils
-
-###TBD (see utils_example.js for now)
-
-##Related Projects
-
-Looking for CSS selectors to search the DOM? Try Node-SoupSelect, a port of SoupSelect to NodeJS: http://github.com/harryf/node-soupselect
-
-There's also a port of hpricot to NodeJS that uses HtmlParser for HTML parsing: http://github.com/silentrob/Apricot
-
+	]`
\ No newline at end of file

From 63b6e52eaa9bb80609f2362ebc495f0fd97ffd47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 20:08:56 +0200
Subject: [PATCH 039/450] readme, again

---
 README.md | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index edd9028..6210a50 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 
 ##Usage
 	var htmlparser = require("htmlparser");
-	var rawHtml = "Xyz &lt;script language= javascript>var foo = '&lt;&lt;bar>>';&lt; /  script>&lt;!--&lt;!-- Waah! -- -->";
+	var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< /  script><!--<!-- Waah! -- -->";
 	var handler = new htmlparser.DefaultHandler(function (error, dom) {
 		if (error)
 			[...do something for errors...]
@@ -29,14 +29,14 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 	  , name: 'script'
 	  , attribs: { language: 'javascript' }
 	  , children: 
-	     [ { raw: 'var foo = \'&lt;bar>\';&lt;'
-	       , data: 'var foo = \'&lt;bar>\';&lt;'
+	     [ { raw: 'var foo = \'<bar>\';<'
+	       , data: 'var foo = \'<bar>\';<'
 	       , type: 'text'
 	       }
 	     ]
 	  }
-	, { raw: '&lt;!-- Waah! -- '
-	  , data: '&lt;!-- Waah! -- '
+	, { raw: '<!-- Waah! -- '
+	  , data: '<!-- Waah! -- '
 	  , type: 'comment'
 	  }
 	]
@@ -67,9 +67,9 @@ Indicates whether the DOM should exclude text nodes that consists solely of whit
 
 ####Example: true
 The following HTML:
-	`&lt;font>
-		&lt;br>this is the text
-	&lt;font>`
+	`<font>
+		<br>this is the text
+	<font>`
 becomes:
 	[ { raw: 'font'
 	  , data: 'font'
@@ -88,9 +88,9 @@ becomes:
 
 ####Example: false
 The following HTML:
-	`&lt;font>
-		&lt;br>this is the text
-	&lt;font>`
+	`<font>
+		<br>this is the text
+	<font>`
 becomes:
 	`[ { raw: 'font'
 	  , data: 'font'
@@ -109,11 +109,11 @@ becomes:
 	]`
 
 ###Option: verbose
-Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "&lt;" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true". 
+Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "<" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true". 
 
 ####Example: true
 The following HTML:
-	`&lt;a href="test.html">xxx&lt;/a>`
+	`<a href="test.html">xxx</a>`
 becomes:
 	`[ { raw: 'a href="test.html"'
 	  , data: 'a href="test.html"'
@@ -126,7 +126,7 @@ becomes:
 
 ####Example: false
 The following HTML:
-	`&lt;a href="test.html">xxx&lt;/a>`
+	`<a href="test.html">xxx</a>`
 becomes:
 	`[ { type: 'tag'
 	  , name: 'a'
@@ -140,7 +140,7 @@ Indicates whether the DOM should prevent children on tags marked as empty in the
 
 ####Example: true
 The following HTML:
-	`&lt;link>text&lt;/link>`
+	`<link>text</link>`
 becomes:
 	`[ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
 	, { raw: 'text', data: 'text', type: 'text' }
@@ -148,7 +148,7 @@ becomes:
 
 ####Example: false
 The following HTML:
-	`&lt;link>text&lt;/link>`
+	`<link>text</link>`
 becomes:
 	`[ { raw: 'link'
 	  , data: 'link'

From 06c05e3499f51464545a05517bcd28da373b4bd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 20:11:36 +0200
Subject: [PATCH 040/450] Added lines to readme

---
 README.md | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 6210a50..f77485f 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,6 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 	parser.done();`
 
 ##Parsing RSS/Atom Feeds
-
 	`new htmlparser.RssHandler(function (error, dom) {
 		...
 	});`
@@ -67,11 +66,14 @@ Indicates whether the DOM should exclude text nodes that consists solely of whit
 
 ####Example: true
 The following HTML:
+
 	`<font>
 		<br>this is the text
 	<font>`
+
 becomes:
-	[ { raw: 'font'
+
+	`[ { raw: 'font'
 	  , data: 'font'
 	  , type: 'tag'
 	  , name: 'font'
@@ -84,14 +86,17 @@ becomes:
 	     , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
 	     ]
 	  }
-	]
+	]`
 
 ####Example: false
 The following HTML:
+
 	`<font>
 		<br>this is the text
 	<font>`
+
 becomes:
+
 	`[ { raw: 'font'
 	  , data: 'font'
 	  , type: 'tag'
@@ -113,8 +118,11 @@ Indicates whether to include extra information on each node in the DOM. This inf
 
 ####Example: true
 The following HTML:
+
 	`<a href="test.html">xxx</a>`
+
 becomes:
+
 	`[ { raw: 'a href="test.html"'
 	  , data: 'a href="test.html"'
 	  , type: 'tag'
@@ -126,8 +134,11 @@ becomes:
 
 ####Example: false
 The following HTML:
+
 	`<a href="test.html">xxx</a>`
+
 becomes:
+
 	`[ { type: 'tag'
 	  , name: 'a'
 	  , attribs: { href: 'test.html' }
@@ -140,16 +151,22 @@ Indicates whether the DOM should prevent children on tags marked as empty in the
 
 ####Example: true
 The following HTML:
+
 	`<link>text</link>`
+
 becomes:
+
 	`[ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
 	, { raw: 'text', data: 'text', type: 'text' }
 	]`
 
 ####Example: false
 The following HTML:
+
 	`<link>text</link>`
+
 becomes:
+
 	`[ { raw: 'link'
 	  , data: 'link'
 	  , type: 'tag'

From 1dcb73863fe368b0718e991ffa5226f438bfd59e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 20:13:43 +0200
Subject: [PATCH 041/450] removed backticks from readme

---
 README.md | 56 +++++++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index f77485f..df19abc 100644
--- a/README.md
+++ b/README.md
@@ -2,10 +2,10 @@
 A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
 
 ##Installing
-	`npm install htmlparser`
+	npm install htmlparser
 
 ##Running Tests
-	`node tests/00-runtests.js`
+	node tests/00-runtests.js
 
 ##Usage
 	var htmlparser = require("htmlparser");
@@ -42,24 +42,24 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 	]
 
 ##Streaming To Parser
-	`while (...) {
+	while (...) {
 		...
 		parser.parseChunk(chunk);
 	}
-	parser.done();`
+	parser.done();
 
 ##Parsing RSS/Atom Feeds
-	`new htmlparser.RssHandler(function (error, dom) {
+	new htmlparser.RssHandler(function (error, dom) {
 		...
-	});`
+	});
 
 ##DefaultHandler Options
 
 ###Usage
-	`var handler = new htmlparser.DefaultHandler(
+	var handler = new htmlparser.DefaultHandler(
 		  function (error) { ... }
 		, { verbose: false, ignoreWhitespace: true }
-		);`
+		);
 	
 ###Option: ignoreWhitespace
 Indicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is "false".
@@ -67,13 +67,13 @@ Indicates whether the DOM should exclude text nodes that consists solely of whit
 ####Example: true
 The following HTML:
 
-	`<font>
+	<font>
 		<br>this is the text
-	<font>`
+	<font>
 
 becomes:
 
-	`[ { raw: 'font'
+	[ { raw: 'font'
 	  , data: 'font'
 	  , type: 'tag'
 	  , name: 'font'
@@ -86,18 +86,18 @@ becomes:
 	     , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
 	     ]
 	  }
-	]`
+	]
 
 ####Example: false
 The following HTML:
 
-	`<font>
+	<font>
 		<br>this is the text
-	<font>`
+	<font>
 
 becomes:
 
-	`[ { raw: 'font'
+	[ { raw: 'font'
 	  , data: 'font'
 	  , type: 'tag'
 	  , name: 'font'
@@ -111,7 +111,7 @@ becomes:
 	     , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
 	     ]
 	  }
-	]`
+	]
 
 ###Option: verbose
 Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "<" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true". 
@@ -119,32 +119,32 @@ Indicates whether to include extra information on each node in the DOM. This inf
 ####Example: true
 The following HTML:
 
-	`<a href="test.html">xxx</a>`
+	<a href="test.html">xxx</a>
 
 becomes:
 
-	`[ { raw: 'a href="test.html"'
+	[ { raw: 'a href="test.html"'
 	  , data: 'a href="test.html"'
 	  , type: 'tag'
 	  , name: 'a'
 	  , attribs: { href: 'test.html' }
 	  , children: [ { raw: 'xxx', data: 'xxx', type: 'text' } ]
 	  }
-	]`
+	]
 
 ####Example: false
 The following HTML:
 
-	`<a href="test.html">xxx</a>`
+	<a href="test.html">xxx</a>
 
 becomes:
 
-	`[ { type: 'tag'
+	[ { type: 'tag'
 	  , name: 'a'
 	  , attribs: { href: 'test.html' }
 	  , children: [ { data: 'xxx', type: 'text' } ]
 	  }
-	]`
+	]
 
 ###Option: enforceEmptyTags
 Indicates whether the DOM should prevent children on tags marked as empty in the HTML spec. Typically this should be set to "true" HTML parsing and "false" for XML parsing. The default value is "true".
@@ -152,25 +152,25 @@ Indicates whether the DOM should prevent children on tags marked as empty in the
 ####Example: true
 The following HTML:
 
-	`<link>text</link>`
+	<link>text</link>
 
 becomes:
 
-	`[ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
+	[ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
 	, { raw: 'text', data: 'text', type: 'text' }
-	]`
+	]
 
 ####Example: false
 The following HTML:
 
-	`<link>text</link>`
+	<link>text</link>
 
 becomes:
 
-	`[ { raw: 'link'
+	[ { raw: 'link'
 	  , data: 'link'
 	  , type: 'tag'
 	  , name: 'link'
 	  , children: [ { raw: 'text', data: 'text', type: 'text' } ]
 	  }
-	]`
\ No newline at end of file
+	]
\ No newline at end of file

From 551fe4001e8e893a7a04aab5df150bfa5c638573 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 20:18:48 +0200
Subject: [PATCH 042/450] Removed last try-catch-blocks from RssHandler

---
 lib/RssHandler.js | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index 8481e9c..bd6db17 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -67,8 +67,8 @@ RssHandler.prototype.done = function() {
 				feed.id = tmp;
 			if(tmp = fetch("title", feedRoot.children))
 				feed.title = tmp;
-			try{ feed.link = getElements("link", feedRoot.children, true).attribs.href;
-			}catch (ex){}
+			if((tmp = getElements("link", feedRoot.children, true)) && (tmp = tmp.attribs) && (tmp = tmp.href))
+				feed.link = tmp;
 			if(tmp = fetch("subtitle", feedRoot.children))
 				feed.description = tmp;
 			if(tmp = fetch("updated", feedRoot.children))
@@ -82,8 +82,8 @@ RssHandler.prototype.done = function() {
 					entry.id = tmp;
 				if(tmp = fetch("title", item.children))
 					entry.title = tmp;
-				try { entry.link = getElements("link", item.children, true).attribs.href;
-				} catch(ex){}
+				if((tmp = getElements("link", item.children, true)) && (tmp = tmp.attribs) && (tmp = tmp.href))
+					entry.link = tmp;
 				if(tmp = fetch("summary", item.children))
 					entry.description = tmp;
 				if(tmp = fetch("updated", item.children))

From de62ff5a3ca20a47a39a0269f938667c311e71f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 20:48:59 +0200
Subject: [PATCH 043/450] parseAttributes now returns an object

---
 lib/Parser.js | 50 ++++++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 24 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 7bab01f..64c16fa 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,6 +1,6 @@
 var ElementType = require("./ElementType.js");
 
-function Parser (handler, options){
+function Parser(handler, options){
 	this._options = options || {
 		includeLocation: false, //Do not track element position in document by default
 		xmlMode: false //Special behaviour for script/style tags by default
@@ -99,7 +99,7 @@ Parser.prototype.reset = function(){
 
 //**Private**//
 //Takes an element and adds an "attribs" property for any element attributes found
-var parseAttribs = function(element){
+var parseAttributes = function(element){
 	//Only parse attributes for tags
 	if(!tagTypes[element.type]) return;
 
@@ -109,17 +109,16 @@ var parseAttribs = function(element){
 	if(attribRaw === "") return;
 
 	_reAttrib.lastIndex = 0;
-	var match = _reAttrib.exec(attribRaw);
-	if(match){
-		element.attribs = {};
-		do{
-			if(match[1])		element.attribs[match[1]] = match[2];
-			else if(match[3])	element.attribs[match[3]] = match[4];
-			else if(match[5])	element.attribs[match[5]] = match[6];
-			else if(match[7])	element.attribs[match[7]] = match[7];
-		}
-		while(match = _reAttrib.exec(attribRaw));
-	}
+	var match, attrs = {};
+	
+	while(match = _reAttrib.exec(attribRaw)){
+    	if(match[1])		attrs[match[1]] = match[2];
+    	else if(match[3])	attrs[match[3]] = match[4];
+    	else if(match[5])	attrs[match[5]] = match[6];
+    	else if(match[7])	attrs[match[7]] = match[7];
+    }
+    
+	return attrs;
 };
 
 //Extracts the base tag name from the data value of an element
@@ -135,37 +134,38 @@ var parseTagName = function(data){
 Parser.prototype.parseTags = function(){
 	var buffer = this._buffer, stack = this._tagStack, handler = this._handler;
 
-	var next, type, tagSep, rawData, element, elementName, prevElement, elementType, elementData, includeName = false;
+	var next, type, tagSep, rawData, element, elementName, prevElement, elementType, elementData, attributes, includeName = false;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
 	while(opening !== -1 || closing !== -1){
 		if(closing === -1 || (opening !== -1 && opening < closing)){
 			next = opening;
-			opening = buffer.indexOf(tagSep = "<", next + 1);
+			tagSep = "<";
+			opening = buffer.indexOf(tagSep, next + 1);
 		}
 		else{
 			next = closing;
-			closing = buffer.indexOf(tagSep = ">", next + 1);
+			tagSep = ">";
+			closing = buffer.indexOf(tagSep, next + 1);
 		}
 		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
 		elementType = this._parseState;
 		
-		if(elementType === ElementType.Text || tagSep === "<"){
-			elementData = rawData;
-			elementName = "";
-		}
-		else{
+		if(elementType === ElementType.Tag && tagSep === ">"){
 			elementData = rawData.trim();
 			elementName = parseTagName(elementData);
 		}
+		else{
+			elementData = rawData;
+			elementName = "";
+		}
 		type = stack.slice(-1)[0];
 
 
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
-		if(!type){ /* nothing */ }
-		else if(type === ElementType.Script && elementName === "/script") stack.pop();
+		if(type === ElementType.Script && elementName === "/script") stack.pop();
 		else if(type === ElementType.Style && elementName === "/style") stack.pop();
 		else if(!this._options.xmlMode && (type === ElementType.Script || type === ElementType.Style)){
 			//special behaviour for script & style tags
@@ -261,7 +261,9 @@ Parser.prototype.parseTags = function(){
 			}
 			if(this._options.includeLocation) element.location = this.getLocation(elementType === ElementType.Tag);
 			
-			parseAttribs(element);
+			attributes = parseAttributes(element);
+			if(attributes) element.attribs = attributes;
+			
 			this._elements.push(element);
 			
 			/*

From 586fa63d759e4fa34e707edf1bb5d6cfaab116b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 21:00:47 +0200
Subject: [PATCH 044/450] parseAttributes now accepts an elements data

---
 lib/Parser.js | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 64c16fa..f00fbfd 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -99,13 +99,10 @@ Parser.prototype.reset = function(){
 
 //**Private**//
 //Takes an element and adds an "attribs" property for any element attributes found
-var parseAttributes = function(element){
-	//Only parse attributes for tags
-	if(!tagTypes[element.type]) return;
-
-	var pos = element.data.search(_reWhitespace);
+var parseAttributes = function(data){
+	var pos = data.search(_reWhitespace);
 	if(pos === -1) return;
-	var attribRaw = element.data.substr(pos);
+	var attribRaw = data.substr(pos);
 	if(attribRaw === "") return;
 
 	_reAttrib.lastIndex = 0;
@@ -123,7 +120,6 @@ var parseAttributes = function(element){
 
 //Extracts the base tag name from the data value of an element
 var parseTagName = function(data){
-	if(!data) return "";
 	var match = data.match(_reTagName);
 	if(match === null) return "";
 	return match[1] + match[2];
@@ -261,26 +257,30 @@ Parser.prototype.parseTags = function(){
 			}
 			if(this._options.includeLocation) element.location = this.getLocation(elementType === ElementType.Tag);
 			
-			attributes = parseAttributes(element);
-			if(attributes) element.attribs = attributes;
+			//Only parse attributes for tags
+			if(tagTypes[element.type]){
+				attributes = parseAttributes(elementData);
+				if(attributes) element.attribs = attributes;
+			}
 			
 			this._elements.push(element);
 			
 			/*
 			switch(elementType){
 				case ElementType.Text:
-					this._handler.ontext(element);
+					this._handler.ontext(rawData);
+					break;
 				case ElementType.Tag:
 				case ElementType.Style:
 				case ElementType.Script:
 					if(elementName[0] === "/") this._handler.onclosetag(elementName.substr(1));
-					else this._handler.onopentag(element);
+					else this._handler.onopentag(elementName, parseAttributes(elementData));
 					break;
 				case ElementType.Comment:
-					this._handler.oncomment(element);
+					this._handler.oncomment(rawData);
 					break;
 				case ElementType.Directive:
-					this._handler.onprocessinginstruction;
+					this._handler.onprocessinginstruction(rawData);
 					break;
 				default: throw Error("Unsupported type: " + elementType);
 			}

From 847edf7bade396612667c5f065003b76f22f9016 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 28 Oct 2011 21:23:25 +0200
Subject: [PATCH 045/450] minor improvements

---
 lib/Parser.js | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index f00fbfd..5a7370f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -128,7 +128,7 @@ var parseTagName = function(data){
 //Parses through HTML text and returns an array of found elements
 //I admit, this function is rather large but splitting up had an noticeable impact on speed
 Parser.prototype.parseTags = function(){
-	var buffer = this._buffer, stack = this._tagStack, handler = this._handler;
+	var buffer = this._buffer, stack = this._tagStack;
 
 	var next, type, tagSep, rawData, element, elementName, prevElement, elementType, elementData, attributes, includeName = false;
 	
@@ -148,7 +148,7 @@ Parser.prototype.parseTags = function(){
 		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
 		elementType = this._parseState;
 		
-		if(elementType === ElementType.Tag && tagSep === ">"){
+		if(elementType === ElementType.Tag){
 			elementData = rawData.trim();
 			elementName = parseTagName(elementData);
 		}
@@ -294,9 +294,9 @@ Parser.prototype.parseTags = function(){
 			){
 				//this._handler.onclosetag(elementName);
 				this._elements.push({
-					  raw: "/" + elementName
-					, data: "/" + elementName
-					, name: "/" + elementName
+					  raw: elementName = "/" + elementName
+					, data: elementName
+					, name: elementName
 					, type: elementType
 				});
 			}

From e6d8f747fc3b8a305db49b5a2aa3ea041c6598c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 29 Oct 2011 10:16:52 +0200
Subject: [PATCH 046/450] Restructured some code

---
 lib/Parser.js | 40 +++++++++++++++++++---------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 5a7370f..58fd89b 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -103,7 +103,6 @@ var parseAttributes = function(data){
 	var pos = data.search(_reWhitespace);
 	if(pos === -1) return;
 	var attribRaw = data.substr(pos);
-	if(attribRaw === "") return;
 
 	_reAttrib.lastIndex = 0;
 	var match, attrs = {};
@@ -126,7 +125,6 @@ var parseTagName = function(data){
 };
 
 //Parses through HTML text and returns an array of found elements
-//I admit, this function is rather large but splitting up had an noticeable impact on speed
 Parser.prototype.parseTags = function(){
 	var buffer = this._buffer, stack = this._tagStack;
 
@@ -156,31 +154,16 @@ Parser.prototype.parseTags = function(){
 			elementData = rawData;
 			elementName = "";
 		}
-		type = stack.slice(-1)[0];
-
+		
+		type = stack[stack.length-1];
 
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
-		if(type === ElementType.Script && elementName === "/script") stack.pop();
-		else if(type === ElementType.Style && elementName === "/style") stack.pop();
-		else if(!this._options.xmlMode && (type === ElementType.Script || type === ElementType.Style)){
-			//special behaviour for script & style tags
-			if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
-				//All data from here to style close is now a text element
-			    elementType = ElementType.Text;
-			    //If the previous element is text, append the current text to it
-			    prevElement = this._elements && this._elements[this._elements.length - 1];
-			    if(prevElement && prevElement.type === ElementType.Text){
-			    	prevElement.data = prevElement.raw += this._prevTagSep + rawData;
-			    	rawData = elementData = ""; //This causes the current element to not be added to the element list
-			    } else elementData = rawData; //The previous element was not text
-			}
-		}
-		else if(type === ElementType.Comment){ //We're currently in a comment tag
+		if(type === ElementType.Comment){ //We're currently in a comment tag
 
 			prevElement = this._elements && this._elements[this._elements.length - 1];
 
-    		if(rawData.substr(-2) === "--" && tagSep === ">"){
+    		if(tagSep === ">" && rawData.substr(-2) === "--"){
     			stack.pop();
     			rawData = rawData.slice(0, -2);
     			//If the previous element is a comment, append the current text to it
@@ -202,6 +185,21 @@ Parser.prototype.parseTags = function(){
     			else elementData = rawData += tagSep;
     		}
     	}
+		else if(type === ElementType.Script && elementName === "/script") stack.pop();
+		else if(type === ElementType.Style && elementName === "/style") stack.pop();
+		else if(!this._options.xmlMode && (type === ElementType.Script || type === ElementType.Style)){
+			//special behaviour for script & style tags
+			if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
+				//All data from here to style close is now a text element
+			    elementType = ElementType.Text;
+			    //If the previous element is text, append the current text to it
+			    prevElement = this._elements && this._elements[this._elements.length - 1];
+			    if(prevElement && prevElement.type === ElementType.Text){
+			    	prevElement.data = prevElement.raw += this._prevTagSep + rawData;
+			    	rawData = elementData = ""; //This causes the current element to not be added to the element list
+			    } else elementData = rawData; //The previous element was not text
+			}
+		}
 
 
From fd28431a04d200f0d422b802fcce5bebdbeb036a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 29 Oct 2011 11:43:15 +0200
Subject: [PATCH 047/450] bugfix

---
 lib/Parser.js | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 58fd89b..b322874 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -70,7 +70,8 @@ Parser.prototype.done = function(){
 			};
 		if(tagTypes[this._parseState]){
 			element.name = parseTagName(element.data);
-			parseAttribs(element);
+			var attrs = parseAttributes(element.data);
+			if(attrs) element.attribs = attrs;
 		}
 		this._elements.push(element);
 	}
@@ -157,6 +158,7 @@ Parser.prototype.parseTags = function(){
 		
 		type = stack[stack.length-1];
 
+
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
 		if(type === ElementType.Comment){ //We're currently in a comment tag
@@ -310,7 +312,7 @@ Parser.prototype.parseTags = function(){
 		this._location.inBuffer = 0;
 		this._location.charOffset = 0;
 	}
-	this._buffer = this._buffer.substring(this._current);
+	this._buffer = buffer.substring(this._current);
 	this._current = 0;
 
 	this.writeHandler();

From 74a43d33012f06bd7da62b1466cdf571b16b9c90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 29 Oct 2011 11:49:43 +0200
Subject: [PATCH 048/450] Small adjustments

---
 lib/Parser.js | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index b322874..70d2397 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -133,7 +133,7 @@ Parser.prototype.parseTags = function(){
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
-	while(opening !== -1 || closing !== -1){
+	while(opening !== closing){ //just false if both are -1
 		if(closing === -1 || (opening !== -1 && opening < closing)){
 			next = opening;
 			tagSep = "<";
@@ -163,15 +163,16 @@ Parser.prototype.parseTags = function(){
 		//element if we're actually parsing a special area (script/comment/style tag)
 		if(type === ElementType.Comment){ //We're currently in a comment tag
 
-			prevElement = this._elements && this._elements[this._elements.length - 1];
+			prevElement = this._elements[this._elements.length - 1];
 
-    		if(tagSep === ">" && rawData.substr(-2) === "--"){
+    		if(tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
     			stack.pop();
     			rawData = rawData.slice(0, -2);
     			//If the previous element is a comment, append the current text to it
     			if(prevElement && prevElement.type === ElementType.Comment){ //Previous element was a comment
     				prevElement.data = prevElement.raw += rawData;
-    				rawData = elementData = ""; //This causes the current element to not be added to the element list
+    				//This causes the current element to not be added to the element list
+    				rawData = elementData = "";
     				elementType = ElementType.Text;
     			}
     			else elementType = ElementType.Comment; //Change the current element's type to a comment
@@ -181,7 +182,8 @@ Parser.prototype.parseTags = function(){
     			//If the previous element is a comment, append the current text to it
     			if(prevElement && prevElement.type === ElementType.Comment){
     				prevElement.data = prevElement.raw += rawData + tagSep;
-    				rawData = elementData = ""; //This causes the current element to not be added to the element list
+    				//This causes the current element to not be added to the element list
+    				rawData = elementData = "";
     				elementType = ElementType.Text;
     			}
     			else elementData = rawData += tagSep;
@@ -195,10 +197,11 @@ Parser.prototype.parseTags = function(){
 				//All data from here to style close is now a text element
 			    elementType = ElementType.Text;
 			    //If the previous element is text, append the current text to it
-			    prevElement = this._elements && this._elements[this._elements.length - 1];
+			    prevElement = this._elements[this._elements.length - 1];
 			    if(prevElement && prevElement.type === ElementType.Text){
 			    	prevElement.data = prevElement.raw += this._prevTagSep + rawData;
-			    	rawData = elementData = ""; //This causes the current element to not be added to the element list
+			    	//This causes the current element to not be added to the element list
+			    	rawData = elementData = "";
 			    } else elementData = rawData; //The previous element was not text
 			}
 		}

From 13c907542a0741796084f6d1dd1e0e06fb2b882d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 4 Nov 2011 09:31:31 +0100
Subject: [PATCH 049/450] 1.2.0

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index c5e6fa3..35a642c 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	  "name": "htmlparser2"
 	, "description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface (EventedHandler)."
-	, "version": "1.1.0"
+	, "version": "1.2.0"
 	, "author": "Felix Boehm <me@feedic.com>"
 	, "contributors": [ "Chris Winberry <chris@winberry.net>" ]
 	, "repository": {

From bd0f9a012deac796852d4591b80397d69f5051b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 4 Nov 2011 17:53:15 +0100
Subject: [PATCH 050/450] Added RDF feed support to RssHandler

Idea & part of code from
https://github.com/tautologistics/node-htmlparser/pull/35
---
 lib/RssHandler.js    | 98 +++++++++++++++++++++++++-------------------
 tests/00-runtests.js |  3 ++
 tests/24-rdf.js      | 15 +++++++
 3 files changed, 74 insertions(+), 42 deletions(-)
 create mode 100644 tests/24-rdf.js

diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index bd6db17..ac0b9eb 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -4,11 +4,13 @@ var DefaultHandler = require("./DefaultHandler.js"),
 
 //TODO: make this a trully streamable handler
 function RssHandler (callback) {
-	RssHandler.super_.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
+	DefaultHandler.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
 }
 
 inherits(RssHandler, DefaultHandler);
 
+RssHandler.prototype.done = DefaultHandler.prototype.done;
+
 function getElements(what, where, one, recurse){
 	var ret = DomUtils.getElementsByTagName(what, where, !!recurse);
 	if(one)
@@ -22,79 +24,91 @@ function fetch(what, where, recurse){
 	else return false;
 }
 
+var isValidFeed = function(value) {
+	return value === "rss" || value === "feed" || value === "rdf:RDF";
+}
+
 RssHandler.prototype.done = function() {
-	var feed = { };
+	var feed = {};
 	var feedRoot;
-	var tmp;
+	var tmp, items, childs;
 
-	var found = getElements(function(value) { return value === "rss" || value === "feed"; }, this.dom);
-	if (found.length) {
-		feedRoot = found[0];
-	}
+	feedRoot = getElements(isValidFeed, this.dom, true);
 	if (feedRoot) {
-		if (feedRoot.name === "rss") {
-			feed.type = "rss";
-			feedRoot = feedRoot.children[0]; //<channel/>
-			feed.id = "";
-			if(tmp = fetch("title", feedRoot.children))
+		if(feedRoot.name === "rdf:RDF"){
+			items = getElements("item", feedRoot.children);
+			childs = getElements("channel", feedRoot.children, true).children;
+		}
+		else if(feedRoot.name === "rss"){
+			childs = feedRoot.children[0].children;
+			items = getElements("item", childs);
+		}
+		else{
+			childs = feedRoot.children;
+			items = getElements("entry", childs);
+		}
+		
+		if (feedRoot.name === "feed"){
+			feed.type = "atom";
+			if(tmp = fetch("id", childs))
+				feed.id = tmp;
+			if(tmp = fetch("title", childs))
 				feed.title = tmp;
-			if(tmp = fetch("link", feedRoot.children))
+			if((tmp = getElements("link", childs, true)) && (tmp = tmp.attribs) && (tmp = tmp.href))
 				feed.link = tmp;
-			if(tmp = fetch("description", feedRoot.children))
+			if(tmp = fetch("subtitle", childs))
 				feed.description = tmp;
-			if(tmp = fetch("lastBuildDate", feedRoot.children))
+			if(tmp = fetch("updated", childs))
 				feed.updated = new Date(tmp);
-			if(tmp = fetch("managingEditor", feedRoot.children))
+			if(tmp = fetch("email", childs, true))
 				feed.author = tmp;
-			feed.items = [];
-			getElements("item", feedRoot.children).forEach(function (item, index, list) {
-				var entry = {};
-				if(tmp = fetch("guid", item.children))
+			feed.items = Array(items.length);
+			items.forEach(function(item, i){
+				var entry = {}, tmp;
+				if(tmp = fetch("id", item.children))
 					entry.id = tmp;
 				if(tmp = fetch("title", item.children))
 					entry.title = tmp;
-				if(tmp = fetch("link", item.children))
+				if((tmp = getElements("link", item.children, true)) && (tmp = tmp.attribs) && (tmp = tmp.href))
 					entry.link = tmp;
-				if(tmp = fetch("description", item.children))
+				if(tmp = fetch("summary", item.children))
 					entry.description = tmp;
-				if(tmp = fetch("pubDate", item.children))
+				if(tmp = fetch("updated", item.children))
 					entry.pubDate = new Date(tmp);
-				feed.items.push(entry);
+				feed.items[i] = entry;
 			});
 		} else {
-			feed.type = "atom";
-			if(tmp = fetch("id", feedRoot.children))
-				feed.id = tmp;
-			if(tmp = fetch("title", feedRoot.children))
+			feed.type = feedRoot.name;
+			feed.id = "";
+			if(tmp = fetch("title", childs))
 				feed.title = tmp;
-			if((tmp = getElements("link", feedRoot.children, true)) && (tmp = tmp.attribs) && (tmp = tmp.href))
+			if(tmp = fetch("link", childs))
 				feed.link = tmp;
-			if(tmp = fetch("subtitle", feedRoot.children))
+			if(tmp = fetch("description", childs))
 				feed.description = tmp;
-			if(tmp = fetch("updated", feedRoot.children))
+			if(tmp = fetch("lastBuildDate", childs))
 				feed.updated = new Date(tmp);
-			if(tmp = fetch("email", feedRoot.children, true))
+			if(tmp = fetch("managingEditor", childs))
 				feed.author = tmp;
-			feed.items = [];
-			getElements("entry", feedRoot.children).forEach(function (item, index, list) {
-				var entry = {};
-				if(tmp = fetch("id", item.children))
+			feed.items = Array(items.length);
+			items.forEach(function(item, i){
+				var entry = {}, tmp;
+				if(tmp = fetch("guid", item.children))
 					entry.id = tmp;
 				if(tmp = fetch("title", item.children))
 					entry.title = tmp;
-				if((tmp = getElements("link", item.children, true)) && (tmp = tmp.attribs) && (tmp = tmp.href))
+				if(tmp = fetch("link", item.children))
 					entry.link = tmp;
-				if(tmp = fetch("summary", item.children))
+				if(tmp = fetch("description", item.children))
 					entry.description = tmp;
-				if(tmp = fetch("updated", item.children))
+				if(tmp = fetch("pubDate", item.children))
 					entry.pubDate = new Date(tmp);
-				feed.items.push(entry);
+				feed.items[i] = entry;
 			});
 		}
-
 		this.dom = feed;
 	}
-	RssHandler.super_.prototype.done.call(this);
+	this.done;
 };
 
 module.exports = RssHandler;
\ No newline at end of file
diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 071bf64..55e3204 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -9,7 +9,10 @@ var testFiles = fs.readdirSync(testFolder);
 var testCount = 0;
 var failedCount = 0;
 var totalTime = 0;
+var name = __filename.split("/").slice(-1)[0];
 for (var i = 1; i < testFiles.length; i++) {
+	if(testFiles[i] === name) continue;
+	console.log(testFiles[i], __filename);
 	testCount++;
 	var moduleName = testFiles[i];
 	var test = require(testFolder + "/" + moduleName);
diff --git a/tests/24-rdf.js b/tests/24-rdf.js
new file mode 100644
index 0000000..2019d3a
--- /dev/null
+++ b/tests/24-rdf.js
@@ -0,0 +1,15 @@
+exports.name = "RDF test";
+exports.type = "rss";
+exports.options = {handler: {}, parser: {}};
+exports.html = '<?xml version="1.0" encoding="UTF-8"?>\n<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">\n\t<channel rdf:about="http://sfbay.craigslist.org/ccc/">\n\t\t<title>craigslist | all community in SF bay area</title>\n\t\t<link>http://sfbay.craigslist.org/ccc/</link>\n\t\t<description/>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:publisher>webmaster@craigslist.org</dc:publisher>\n\t\t<dc:creator>webmaster@craigslist.org</dc:creator>\n\t\t<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>\n\t\t<dc:title>craigslist | all community in SF bay area</dc:title>\n\t\t<dc:type>Collection</dc:type>\n\t\t<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>\n\t\t<syn:updateFrequency>4</syn:updateFrequency>\n\t\t<syn:updatePeriod>hourly</syn:updatePeriod>\n\t\t<items>\n\t\t\t<rdf:Seq>\n\t\t\t\t<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>\n\t\t\t</rdf:Seq>\n\t\t</items>\n\t</channel>\n\t<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">\n\t\t<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</link>\n\t\t<description><![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:35:17-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</dc:source>\n\t\t<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>\n\t</item>\n\t<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">\n\t\t<title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</link>\n\t\t<description><![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:34:54-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</dc:source>\n\t\t<dc:title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>\n\t</item>\n</rdf:RDF>';
+exports.expected = { type: 'rdf:RDF',
+  id: '',
+  title: 'craigslist | all community in SF bay area',
+  link: 'http://sfbay.craigslist.org/ccc/',
+  items: 
+   [ { title: '![CDATA[ Music Equipment Repair and Consignment ]]',
+       link: '\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n',
+       description: '![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065' },
+     { title: '![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]',
+       link: '\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n',
+       description: '![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.' } ] };
\ No newline at end of file

From 692104ce11a49cbb7b8a8e560d18c8786732f86a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 4 Nov 2011 18:25:41 +0100
Subject: [PATCH 051/450] restructured code

---
 lib/ElementType.js   | 12 ++++++------
 tests/00-runtests.js |  1 -
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/ElementType.js b/lib/ElementType.js
index c112c91..70194d2 100644
--- a/lib/ElementType.js
+++ b/lib/ElementType.js
@@ -1,9 +1,9 @@
 //Types of elements found in the DOM
 module.exports = {
-	Text: "text" //Plain text
-	, Directive: "directive" //Special tag <!...>
-	, Comment: "comment" //Special tag <!--...-->
-	, Script: "script" //Special tag <script>...</script>
-	, Style: "style" //Special tag <style>...</style>
-	, Tag: "tag" //Any tag that isn't special
+	Text: "text", /*Plain text*/
+	Directive: "directive", /*Special tag <!...>*/
+	Comment: "comment", /*Special tag <!--...-->*/
+	Script: "script", /*Special tag <script>...</script>*/
+	Style: "style", /*Special tag <style>...</style>*/
+	Tag: "tag" /*Any tag that isn't special*/
 };
\ No newline at end of file
diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 55e3204..8df0fc7 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -12,7 +12,6 @@ var totalTime = 0;
 var name = __filename.split("/").slice(-1)[0];
 for (var i = 1; i < testFiles.length; i++) {
 	if(testFiles[i] === name) continue;
-	console.log(testFiles[i], __filename);
 	testCount++;
 	var moduleName = testFiles[i];
 	var test = require(testFolder + "/" + moduleName);

From 03dd39e225e9ba31010479c02480fb76b3097fb7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 4 Nov 2011 18:26:07 +0100
Subject: [PATCH 052/450] Removed duplicated code

---
 lib/DefaultHandler.js | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 545e688..7073fd5 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -81,16 +81,18 @@ DefaultHandler.prototype.writeTag = DefaultHandler.prototype.writeDirective = De
 DefaultHandler.prototype.handleElement = function(element) {
 	if (this._done)
 		this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()"));
+	var couldBeContainer = element.type === ElementType.Tag || element.type === ElementType.Script || element.type === ElementType.Style;
 	if (!this._options.verbose) {
 		//element.raw = null; //FIXME: Not clean
 		//FIXME: Serious performance problem using delete
 		delete element.raw;
-		if (element.type === "tag" || element.type === "script" || element.type === "style")
+		if(couldBeContainer)
 			delete element.data;
 	}
-	if (!this._lastTag()) { //There are no parent elements
+	var lastTag = this._lastTag();
+	if (!lastTag) { //There are no parent elements
 		//If the element can be a container, add it to the tag stack and the top level list
-		if (element.type !== ElementType.Text && element.type !== ElementType.Comment && element.type !== ElementType.Directive) {
+		if(couldBeContainer){
 			if (element.name.charAt(0) !== "/") { //Ignore closing tags that obviously don't have an opening tag
 				this.dom.push(element);
 				if (!this.isEmptyTag(element)) { //Don't add tags to the tag stack that can't have children
@@ -104,31 +106,31 @@ DefaultHandler.prototype.handleElement = function(element) {
 	else { //There are parent elements
 		//If the element can be a container, add it as a child of the element
 		//on top of the tag stack and then add it to the tag stack
-		if (element.type !== ElementType.Text && element.type !== ElementType.Comment && element.type !== ElementType.Directive) {
+		if(couldBeContainer){
 			if (element.name.charAt(0) === "/") {
 				//This is a closing tag, scan the tagStack to find the matching opening tag
 				//and pop the stack up to the opening tag's parent
 				var baseName = element.name.substring(1);
 				if (!this.isEmptyTag(element)) {
 					var pos = this._tagStack.length - 1;
-					while (pos > -1 && this._tagStack[pos--].name !== baseName) { }
-					if (pos > -1 || this._tagStack[0].name === baseName)
-						while (pos < this._tagStack.length - 1)
+					while (pos !== -1 && this._tagStack[pos--].name !== baseName) { }
+					if (pos !== -1 || this._tagStack[0].name === baseName)
+						while(pos < this._tagStack.length - 1)
 							this._tagStack.pop();
 				}
 			}
 			else { //This is not a closing tag
-				if (!this._lastTag().children)
-					this._lastTag().children = [];
-				this._lastTag().children.push(element);
+				if (!lastTag.children)
+					lastTag.children = [];
+				lastTag.children.push(element);
 				if (!this.isEmptyTag(element)) //Don't add tags to the tag stack that can't have children
 					this._tagStack.push(element);
 			}
 		}
 		else { //This is not a container element
-			if (!this._lastTag().children)
-				this._lastTag().children = [];
-			this._lastTag().children.push(element);
+			if (!lastTag.children)
+				lastTag.children = [];
+			lastTag.children.push(element);
 		}
 	}
 };

From eb37d11ebbaf35aa833d2f36955ad9fdbf39d1c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 4 Nov 2011 20:33:48 +0100
Subject: [PATCH 053/450] Splited writeTag in both handlers to smaller
 functions + added a test for events

---
 lib/DefaultHandler.js | 164 ++++++++++++++++--------------------------
 lib/EventedHandler.js |  50 +++++++------
 tests/00-runtests.js  |  22 ++++--
 tests/25-events.js    |  20 ++++++
 4 files changed, 126 insertions(+), 130 deletions(-)
 create mode 100644 tests/25-events.js

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 7073fd5..471618b 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -1,42 +1,28 @@
 var ElementType = require("./ElementType.js");
 
-function DefaultHandler (callback, options) {
+function DefaultHandler(callback, options){
 	this.dom = [];
 	this._done = false;
 	this._tagStack = [];
-	this._options = options ? options : { };
-	if (this._options.ignoreWhitespace === undefined)
-		this._options.ignoreWhitespace = false; //Keep whitespace-only text nodes
-	if (this._options.verbose === undefined)
-		this._options.verbose = true; //Keep data property for tags and raw property for all
-	if (this._options.enforceEmptyTags === undefined)
-		this._options.enforceEmptyTags = true; //Don't allow children for HTML tags defined as empty in spec
-	if ((typeof callback) === "function")
-		this._callback = callback;
+	if(options){
+		this._options = options;
+		if(typeof this._options.verbose === "undefined")
+			this._options.verbose = true;
+		if (typeof this._options.enforceEmptyTags === "undefined")
+			this._options.enforceEmptyTags = true;
+	}
+	this._callback = callback;
 }
 
-DefaultHandler.prototype._lastTag = function() {
-	var stack = this._tagStack;
-	return(stack.length ? stack[stack.length - 1] : null);
+//default options
+DefaultHandler.prototype._options = {
+	ignoreWhitespace: false,	//Keep whitespace-only text nodes
+    verbose: true,				//Keep data property for tags and raw property for all
+    enforceEmptyTags: true		//Don't allow children for HTML tags defined as empty in spec
 };
 
 //HTML Tags that shouldn't contain child nodes
-var _emptyTags = {
-	area: true
-	, base: true
-	, basefont: true
-	, br: true
-	, col: true
-	, frame: true
-	, hr: true
-	, img: true
-	, input: true
-	, isindex: true
-	, link: true
-	, meta: true
-	, param: true
-	, embed: true
-};
+var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:true,img:true,input:true,isindex:true,link:true,meta:true,param:true,embed:true};
 
 //**Public**//
 //Methods//
@@ -51,88 +37,60 @@ DefaultHandler.prototype.done = function() {
 	this._done = true;
 	this.handleCallback(null);
 };
-DefaultHandler.prototype.writeText = function(element) {
-	if(this._options.ignoreWhitespace)
-		if(element.data.trim() === "")
-			return;
-	this.handleElement(element);
-};
 
 //Methods//
 DefaultHandler.prototype.error =
-DefaultHandler.prototype.handleCallback = function(error) {
-		if ((typeof this._callback) !== "function")
-			if (error)
-				throw error;
-			else
-				return;
-		this._callback(error, this.dom);
+DefaultHandler.prototype.handleCallback = function(error){
+		if(typeof this._callback === "function")
+			this._callback(error, this.dom);
+		else if(error) throw error;
 };
 
-DefaultHandler.prototype.isEmptyTag = function(element) {
-	var name = element.name.toLowerCase();
-	if (name.charAt(0) === '/') {
-		name = name.substring(1);
-	}
-	return this._options.enforceEmptyTags && _emptyTags[name];
+DefaultHandler.prototype._isEmptyTag = function(name) {
+	return this._options.enforceEmptyTags && emptyTags[name];
 };
 
-DefaultHandler.prototype.writeTag = DefaultHandler.prototype.writeDirective = DefaultHandler.prototype.writeComment =
-DefaultHandler.prototype.handleElement = function(element) {
-	if (this._done)
-		this.handleCallback(new Error("Writing to the handler after done() called is not allowed without a reset()"));
-	var couldBeContainer = element.type === ElementType.Tag || element.type === ElementType.Script || element.type === ElementType.Style;
-	if (!this._options.verbose) {
-		//element.raw = null; //FIXME: Not clean
-		//FIXME: Serious performance problem using delete
-		delete element.raw;
-		if(couldBeContainer)
-			delete element.data;
-	}
-	var lastTag = this._lastTag();
-	if (!lastTag) { //There are no parent elements
-		//If the element can be a container, add it to the tag stack and the top level list
-		if(couldBeContainer){
-			if (element.name.charAt(0) !== "/") { //Ignore closing tags that obviously don't have an opening tag
-				this.dom.push(element);
-				if (!this.isEmptyTag(element)) { //Don't add tags to the tag stack that can't have children
-					this._tagStack.push(element);
-				}
-			}
-		}
-		else //Otherwise just add to the top level list
-			this.dom.push(element);
-	}
-	else { //There are parent elements
-		//If the element can be a container, add it as a child of the element
-		//on top of the tag stack and then add it to the tag stack
-		if(couldBeContainer){
-			if (element.name.charAt(0) === "/") {
-				//This is a closing tag, scan the tagStack to find the matching opening tag
-				//and pop the stack up to the opening tag's parent
-				var baseName = element.name.substring(1);
-				if (!this.isEmptyTag(element)) {
-					var pos = this._tagStack.length - 1;
-					while (pos !== -1 && this._tagStack[pos--].name !== baseName) { }
-					if (pos !== -1 || this._tagStack[0].name === baseName)
-						while(pos < this._tagStack.length - 1)
-							this._tagStack.pop();
-				}
-			}
-			else { //This is not a closing tag
-				if (!lastTag.children)
-					lastTag.children = [];
-				lastTag.children.push(element);
-				if (!this.isEmptyTag(element)) //Don't add tags to the tag stack that can't have children
-					this._tagStack.push(element);
-			}
-		}
-		else { //This is not a container element
-			if (!lastTag.children)
-				lastTag.children = [];
-			lastTag.children.push(element);
-		}
+DefaultHandler.prototype._closeTag = function(name){
+	//Ignore closing tags that obviously don't have an opening tag
+	if(!this._tagStack || this._isEmptyTag(name)) return;
+	
+	var pos = this._tagStack.length - 1;
+	while (pos !== -1 && this._tagStack[pos--].name !== name) { }
+	if ( ++pos !== 0 || this._tagStack[0].name === name)
+	    while(pos < this._tagStack.length)
+	    	this._tagStack.pop();
+};
+
+DefaultHandler.prototype._addDomElement = function(element){
+	if(!this._options.verbose) delete element.raw;
+	
+	var lastTag = this._tagStack[this._tagStack.length-1];
+	if(!lastTag) this.dom.push(element);
+	else{ //There are parent elements
+		if(!lastTag.children) lastTag.children = [element];
+		else lastTag.children.push(element);
 	}
+}
+
+DefaultHandler.prototype._openTag = function(element){
+	if(!this._options.verbose) delete element.data;
+	
+	this._addDomElement(element);
+	
+	//Don't add tags to the tag stack that can't have children
+	if(!this._isEmptyTag(element.name)) this._tagStack.push(element);
+}
+
+DefaultHandler.prototype.writeText = function(element){
+	if(this._options.ignoreWhitespace && element.data.trim() === "") return;
+	this._addDomElement(element);
+};
+
+DefaultHandler.prototype.writeDirective = DefaultHandler.prototype.writeComment = DefaultHandler.prototype._addDomElement;
+
+DefaultHandler.prototype.writeTag = function(element) {
+	if(element.name.charAt(0) === "/") this._closeTag(element.name.substr(1));
+	else this._openTag(element);
 };
 
 module.exports = DefaultHandler;
\ No newline at end of file
diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index a64bf75..428992f 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -25,9 +25,13 @@ var stripData = function(callback){
 	};
 };
 var openTagCB = function(openTag, attribute){
-	function open(name, attributes){ openTag({name:name, attributes:attributes}); }
 	function attr(name, attributes){ for(var i in attributes) attribute({name:i, value:attributes[i]}); }
 	if(openTag){
+		var open;
+		if(openTag.length === 1){ //to be compatible with sax.js
+			open = function open(name, attributes){ openTag({name:name, attributes:attributes}); }
+		}
+		else open = openTag
 		if(attribute) return function(name, attributes){open(name,attributes); attr(null, attributes);};
 		else return open;
 	}
@@ -38,29 +42,31 @@ var openTagCB = function(openTag, attribute){
 //HTML Tags that shouldn't contain child nodes
 var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:true,img:true,input:true,isindex:true,link:true,meta:true,param:true,embed:true};
 
-EventedHandler.prototype.writeTag = function(element){
-	var closing = element.name.charAt(0) === "/",
-		name = closing ? element.name.substring(1) : element.name,
-		attributes = element.attribs || {},
-		empty = emptyTags[name];
-	
-	if(closing){
-		if(!empty){
-			var i = this._stack.length - 1;
-			while(i !== -1 && this._stack[i--].name !== name){}
-			if( (i+=1) !== 0)
-				while(i < this._stack.length) this.onclosetag(this._stack.pop().name);
-		}
-		else if(name === "br"){ //special case for <br>s
-			this.onopentag(name, attributes);
-			this.onclosetag(name);
-		}
+EventedHandler.prototype._openTag = function(name, attrs){
+	this.onopentag(name, attrs);
+	if(emptyTags[name]) this.onclosetag(name);
+	else this._stack.push(name);
+};
+
+EventedHandler.prototype._closeTag = function(name){
+	if(!emptyTags[name] && this._stack){
+		var i = this._stack.length-1;
+		while(i !== -1 && this._stack[i--] !== name){};
+		if( ++i !== 0 || this._stack[0] === name)
+			while(i < this._stack.length)
+				this.onclosetag(this._stack.pop());
 	}
-	else{
-		this.onopentag(name, attributes);
-		if(empty) this.onclosetag(name);
-		else this._stack.push(element);
+	else if(name === "br"){//many browsers (eg. Safari) convert </br> to <br>
+		this.onopentag(name, {});
+		this.onclosetag(name);
 	}
 };
 
+EventedHandler.prototype.writeTag = function(element){
+	if(element.name.charAt(0) === "/")
+		this._closeTag(element.name.substr(1));
+	else
+		this._openTag(element.name, element.attribs || {});
+};
+
 module.exports = EventedHandler;
\ No newline at end of file
diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 8df0fc7..238dc59 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -10,6 +10,7 @@ var testCount = 0;
 var failedCount = 0;
 var totalTime = 0;
 var name = __filename.split("/").slice(-1)[0];
+var handler;
 for (var i = 1; i < testFiles.length; i++) {
 	if(testFiles[i] === name) continue;
 	testCount++;
@@ -21,14 +22,22 @@ for (var i = 1; i < testFiles.length; i++) {
 	}
 	console.log(testFiles[i]);
 	var start = Date.now();
-	var handler = (test.type === "rss") ?
-		new htmlparser.RssHandler(handlerCallback, test.options.handler)
-		:
-		new htmlparser.DefaultHandler(handlerCallback, test.options.handler)
-		;
+	if(test.type === "rss"){
+		handler = new htmlparser.RssHandler(handlerCallback, test.options.handler);
+	}
+	else if(test.type === "event"){
+		handler = new htmlparser.EventedHandler(test.options.handler);
+	}
+	else{
+		handler = new htmlparser.DefaultHandler(handlerCallback, test.options.handler);
+	}
 	var parser = new htmlparser.Parser(handler, test.options.parser);
 	parser.parseComplete(test.html);
 	var resultComplete = handler.dom;
+	if(test.type === "event"){
+		resultComplete = test.result;
+		test.result = [];
+	}
 	var chunkPos = 0;
 	parser.reset();
 	while (chunkPos < test.html.length) {
@@ -37,6 +46,9 @@ for (var i = 1; i < testFiles.length; i++) {
 	}
 	parser.done();
 	var resultChunk = handler.dom;
+	if(test.type === "event"){
+		resultChunk = test.result;
+	}
 	var testResult =
 		sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
 		&&
diff --git a/tests/25-events.js b/tests/25-events.js
new file mode 100644
index 0000000..1a27bd0
--- /dev/null
+++ b/tests/25-events.js
@@ -0,0 +1,20 @@
+exports.name = "Events";
+exports.type = "event";
+exports.result = [];
+exports.options = {handler: {
+	onopentag: function(name, attributes){
+		exports.result.push({event:"open", name: name, attributes: attributes});
+	},
+	onclosetag: function(name){
+		exports.result.push({event:"close", name: name});
+	},
+	ontext: function(text){
+		exports.result.push({event:"text", text: text});
+	}
+}, parser: {}};
+exports.html = "<h1 class=test>adsf</h1>";
+exports.expected = [ { event: 'open',
+    name: 'h1',
+    attributes: { class: 'test' } },
+  { event: 'text', text: 'adsf' },
+  { event: 'close', name: 'h1' } ];
\ No newline at end of file

From 38da6e0d0f6b05d3bd15a8aacc98f617f7996ea2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 4 Nov 2011 21:46:01 +0100
Subject: [PATCH 054/450] added a limit to the elements fetched inside
 RssHandler

---
 lib/RssHandler.js | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index ac0b9eb..377d9f5 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -12,14 +12,15 @@ inherits(RssHandler, DefaultHandler);
 RssHandler.prototype.done = DefaultHandler.prototype.done;
 
 function getElements(what, where, one, recurse){
-	var ret = DomUtils.getElementsByTagName(what, where, !!recurse);
-	if(one)
+	if(one){
+		var ret = DomUtils.getElementsByTagName(what, where, recurse, 1)
 		if(ret && ret.length > 0) return ret[0];
 		else return false;
-	else return ret;
+	}
+	else return DomUtils.getElementsByTagName(what, where, recurse);
 }
 function fetch(what, where, recurse){
-	var ret = getElements(what, where, true, !!recurse);
+	var ret = getElements(what, where, true, recurse);
 	if(ret && ret.children && ret.children.length > 0) return ret.children[0].data;
 	else return false;
 }

From 5101cc2b1d6eb4cd0ededcad348baca65b81869e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 4 Nov 2011 22:36:12 +0100
Subject: [PATCH 055/450] improved DomUtils

what I really wanted to do is now in comments (it failed multiple times)
---
 lib/DomUtils.js   | 110 +++++++++++++++++++++++-----------------------
 lib/RssHandler.js |  10 ++---
 2 files changed, 59 insertions(+), 61 deletions(-)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index a6e0bdf..9f4c2b5 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -1,74 +1,76 @@
-module.exports = {
-	testElement: function(options, element) {
-		if (!element) return false;
-		
-		var type = element.type;
+var ElementType = require("./ElementType.js");
 
-		for (var key in options) {
-			if (key === "tag_name") {
-				if (type !== "tag" && type !== "script" && type !== "style") return false;
-				if (!options.tag_name(element.name)) return false;
-			} else if (key === "tag_type") {
-				if (!options.tag_type(type)) return false;
-			} else if (key === "tag_contains") {
-				if (type !== "text" && type !== "comment" && type !== "directive") return false;
-				if (!options.tag_contains(element.data)) return false;
-			} else if (!element.attribs || !options[key](element.attribs[key]))
-				return false;
-		}
-	
-		return true;
-	}
+function getTest (checkVal) {
+	return function (value) { return value === checkVal; };
+}
 
-	, getElements: function(options, currentElement, recurse, limit) {
-		if (!currentElement) return [];
+function testElement(options, element) {
+	if (!element) return false;
+    
+    var type = element.type;
 
-		recurse = (recurse === undefined || recurse === null) || !!recurse;
+    for (var key in options) {
+    	if (key === "tag_name") {
+    		if (type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
+    		if (!options.tag_name(element.name)) return false;
+    	} else if (key === "tag_type") {
+    		if (!options.tag_type(type)) return false;
+    	} else if (key === "tag_contains") {
+    		if (type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
+    		if (!options.tag_contains(element.data)) return false;
+    	} else if (!element.attribs || !options[key](element.attribs[key]))
+    		return false;
+    }
 
-		var parsed_limit = parseInt(limit, 10);
-		limit = isNaN(parsed_limit) ? -1 : parsed_limit;
+    return true;
+}
 
-		var found = [];
-		var elementList;
+module.exports = {
+	testElement: testElement, 
+	
+	getElements: function(options, currentElement, recurse, limit){
+		recurse = recurse === undefined || recurse === null || recurse;
+		if(isNaN(limit)) limit = -1;
 
-		function getTest (checkVal) {
-			return function (value) { return value === checkVal; };
-		}
-		for (var key in options) {
-			if (typeof options[key] !== "function") {
+		for(var key in options){
+			if (typeof options[key] !== "function")
 				options[key] = getTest(options[key]);
-			}
-		}
-
-		if (this.testElement(options, currentElement)) {
-			found.push(currentElement);
 		}
-
-		if (limit >= 0 && found.length >= limit) return found;
-
-		if(recurse && currentElement.children) elementList = currentElement.children;
-		else if(Array.isArray(currentElement)) elementList = currentElement;
-		else return found;
-
-		for (var i = 0; i < elementList.length; i++) {
-			found = found.concat(this.getElements(options, elementList[i], recurse, limit));
-
-			if (limit >= 0 && found.length >= limit) break;
-		}
-
-		return found;
+		return this.testAttr(testElement.bind(null, options), currentElement, recurse, limit);
 	}
 	
 	, getElementById: function(id, currentElement, recurse) {
 		var result = this.getElements({ id: id }, currentElement, recurse, 1);
 		return result.length ? result[0] : null;
+		//function(elem){return elem.attribs && elem.attribs.id === id;}
 	}
 	
 	, getElementsByTagName: function(name, currentElement, recurse, limit) {
 		return this.getElements({ tag_name: name }, currentElement, recurse, limit);
+		/*function(elem){
+			var type = elem.type;
+			if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
+			return elem.name === name;
+		};*/
 	}
 	
-	, getElementsByTagType: function(type, currentElement, recurse, limit) {
-		return this.getElements({ tag_type: type }, currentElement, recurse, limit);
+	, getElementsByTagType: function(type, currentElement, recurse, limit){
+		return this.testAttr(function(elem){return elem.type === type;}, currentElement, recurse, limit);
+		//function(elem){return elem.type === type;}
 	}
-};
\ No newline at end of file
+	
+	, testAttr: function(test, element, recurse, limit){
+		var found = [], elementList;
+		if(!element) return found;
+		if(test(element)) found.push(element);
+		
+		if(recurse && element.children) elementList = element.children;
+		else if(Array.isArray(element)) elementList = element;
+		else return found;
+		
+		for(var i = 0, j = elementList.length; i < j && (limit < 0 || found.length < limit); i++){
+			found = found.concat(this.testAttr(test, elementList[i], recurse, limit));
+		}
+		
+		return found;
+	}};
\ No newline at end of file
diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index 377d9f5..f9b63d4 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -12,16 +12,12 @@ inherits(RssHandler, DefaultHandler);
 RssHandler.prototype.done = DefaultHandler.prototype.done;
 
 function getElements(what, where, one, recurse){
-	if(one){
-		var ret = DomUtils.getElementsByTagName(what, where, recurse, 1)
-		if(ret && ret.length > 0) return ret[0];
-		else return false;
-	}
-	else return DomUtils.getElementsByTagName(what, where, recurse);
+	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
+	else	return DomUtils.getElementsByTagName(what, where, recurse);
 }
 function fetch(what, where, recurse){
 	var ret = getElements(what, where, true, recurse);
-	if(ret && ret.children && ret.children.length > 0) return ret.children[0].data;
+	if(ret && (ret = ret.children) && ret.length > 0) return ret[0].data;
 	else return false;
 }
 

From 0e78ab532b395cdd681f46ed02098e066163d6c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 4 Nov 2011 22:53:34 +0100
Subject: [PATCH 056/450] some small adjustments

---
 lib/DefaultHandler.js | 5 ++---
 lib/Parser.js         | 2 +-
 tests/00-runtests.js  | 7 ++-----
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 471618b..2201731 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -57,8 +57,7 @@ DefaultHandler.prototype._closeTag = function(name){
 	var pos = this._tagStack.length - 1;
 	while (pos !== -1 && this._tagStack[pos--].name !== name) { }
 	if ( ++pos !== 0 || this._tagStack[0].name === name)
-	    while(pos < this._tagStack.length)
-	    	this._tagStack.pop();
+	    this._tagStack.splice(pos, this._tagStack.length);
 };
 
 DefaultHandler.prototype._addDomElement = function(element){
@@ -86,7 +85,7 @@ DefaultHandler.prototype.writeText = function(element){
 	this._addDomElement(element);
 };
 
-DefaultHandler.prototype.writeDirective = DefaultHandler.prototype.writeComment = DefaultHandler.prototype._addDomElement;
+DefaultHandler.prototype.writeComment = DefaultHandler.prototype.writeDirective = DefaultHandler.prototype._addDomElement;
 
 DefaultHandler.prototype.writeTag = function(element) {
 	if(element.name.charAt(0) === "/") this._closeTag(element.name.substr(1));
diff --git a/lib/Parser.js b/lib/Parser.js
index 70d2397..a464cc7 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -212,7 +212,7 @@ Parser.prototype.parseTags = function(){
 		if(elementType === ElementType.Tag){
 			if(rawData.substring(0, 3) === "!--"){ //This tag is really comment
 				elementType = ElementType.Comment;
-				rawData = rawData.substr(3);
+				elementData = rawData = rawData.substr(3);
 				//Check if the comment is terminated in the current element
 				if(tagSep === ">" && rawData.substr(-2) === "--")
 					elementData = rawData = rawData.slice(0, -2);
diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 238dc59..63d8317 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -49,11 +49,8 @@ for (var i = 1; i < testFiles.length; i++) {
 	if(test.type === "event"){
 		resultChunk = test.result;
 	}
-	var testResult =
-		sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
-		&&
-		sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null)
-		;
+	var testResult = sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
+					&& sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null);
 	var took = Date.now() - start;
 	totalTime += took;
 	sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED") + " (took: " + took + "ms)");

From bc12cd8717f4ed1270538d39e2428c3d4f25d5fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 12:42:32 +0100
Subject: [PATCH 057/450] Replaced _tagStack with _contentFlags, tweaked
 DefaultHandler

That fixed https://github.com/tautologistics/node-htmlparser/issues/29.
---
 lib/DefaultHandler.js            |  30 +++++-
 lib/Parser.js                    | 159 +++++++++++++++----------------
 tests/23-template_script_tags.js |   6 +-
 3 files changed, 107 insertions(+), 88 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 2201731..6f3b663 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -3,6 +3,7 @@ var ElementType = require("./ElementType.js");
 function DefaultHandler(callback, options){
 	this.dom = [];
 	this._done = false;
+	this._inSpecialTag = false;
 	this._tagStack = [];
 	if(options){
 		this._options = options;
@@ -30,6 +31,7 @@ var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:
 DefaultHandler.prototype.reset = function() {
 	this.dom = [];
 	this._done = false;
+	this._inSpecialTag = false;
 	this._tagStack = [];
 };
 //Signals the handler that parsing is done
@@ -56,17 +58,33 @@ DefaultHandler.prototype._closeTag = function(name){
 	
 	var pos = this._tagStack.length - 1;
 	while (pos !== -1 && this._tagStack[pos--].name !== name) { }
-	if ( ++pos !== 0 || this._tagStack[0].name === name)
-	    this._tagStack.splice(pos, this._tagStack.length);
+	if ( pos !== -1 || this._tagStack[0].name === name)
+	    this._tagStack.splice(pos+1);
 };
 
 DefaultHandler.prototype._addDomElement = function(element){
 	if(!this._options.verbose) delete element.raw;
 	
-	var lastTag = this._tagStack[this._tagStack.length-1];
+	var lastTag = this._tagStack[this._tagStack.length-1], tmp;
 	if(!lastTag) this.dom.push(element);
 	else{ //There are parent elements
-		if(!lastTag.children) lastTag.children = [element];
+		if(!lastTag.children){
+			lastTag.children = [element];
+			return;
+		}
+		tmp = lastTag.children[lastTag.children.length-1];
+		if(element.type === ElementType.Comment && tmp.type === ElementType.Comment){
+			tmp.data += element.data;
+			if(this._options.verbose) tmp.raw = tmp.data;
+		}
+		else if(this._inSpecialTag && element.type === ElementType.Text){
+			if(tmp.type !== ElementType.Text) lastTag.children.push(element);
+			else {
+				tmp.data += element.data;
+				if(this._options.verbose)
+					tmp.raw = tmp.data;
+			}
+		}
 		else lastTag.children.push(element);
 	}
 }
@@ -76,6 +94,10 @@ DefaultHandler.prototype._openTag = function(element){
 	
 	this._addDomElement(element);
 	
+	if(element.type === ElementType.Script || element.type === ElementType.Style){
+		this._inSpecialTag = true;
+	}
+	
 	//Don't add tags to the tag stack that can't have children
 	if(!this._isEmptyTag(element.name)) this._tagStack.push(element);
 }
diff --git a/lib/Parser.js b/lib/Parser.js
index a464cc7..7566fca 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -11,8 +11,8 @@ function Parser(handler, options){
 
 	this._buffer = "";
 	this._prevTagSep = "";
+	this._contentFlags = 0;
 	this._done = false;
-	this._tagStack = [];
 	this._elements = [];
 	this._current = 0;
 	this._location = {
@@ -64,10 +64,10 @@ Parser.prototype.done = function(){
 		var rawData = this._buffer;
 		this._buffer = "";
 		var element = {
-			  raw: rawData
-			, data: this._parseState === ElementType.Text ? rawData : rawData.trim()
-			, type: this._parseState
-			};
+			raw: rawData,
+			data: this._parseState === ElementType.Text ? rawData : rawData.trim(),
+			type: this._parseState
+		};
 		if(tagTypes[this._parseState]){
 			element.name = parseTagName(element.data);
 			var attrs = parseAttributes(element.data);
@@ -76,7 +76,7 @@ Parser.prototype.done = function(){
 		this._elements.push(element);
 	}
 
-	this.writeHandler(true);
+	this.writeHandler();
 	this._handler.done();
 };
 
@@ -86,6 +86,7 @@ Parser.prototype.reset = function(){
 	this._prevTagSep = "";
 	this._done = false;
 	this._current = 0;
+	this._contentFlags = 0;
 	this._location = {
 		 row: 0
 		, col: 0
@@ -93,7 +94,6 @@ Parser.prototype.reset = function(){
 		, inBuffer: 0
 	};
 	this._parseState = ElementType.Text;
-	this._tagStack = [];
 	this._elements = [];
 	this._handler.reset();
 };
@@ -125,11 +125,18 @@ var parseTagName = function(data){
 	return match[1] + match[2];
 };
 
+//Special tags that are threated differently
+var SpecialTags = {};
+SpecialTags[ElementType.Style]  = 1; //2^0
+SpecialTags[ElementType.Script] = 2; //2^1
+SpecialTags["w"] = 4; //2^2 - if set, append prev tag sep to data
+SpecialTags[ElementType.Comment] = 8; //2^8
+
 //Parses through HTML text and returns an array of found elements
 Parser.prototype.parseTags = function(){
-	var buffer = this._buffer, stack = this._tagStack;
+	var buffer = this._buffer;
 
-	var next, type, tagSep, rawData, element, elementName, prevElement, elementType, elementData, attributes, includeName = false;
+	var next, tagSep, rawData, element, elementName, prevElement, elementType, elementData, attributes, includeName = false;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
@@ -155,55 +162,44 @@ Parser.prototype.parseTags = function(){
 			elementData = rawData;
 			elementName = "";
 		}
-		
-		type = stack[stack.length-1];
 
 
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
-		if(type === ElementType.Comment){ //We're currently in a comment tag
-
-			prevElement = this._elements[this._elements.length - 1];
+		if(this._contentFlags === 0){ /*do nothing*/ }
+		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){ //We're currently in a comment tag
+			elementType = ElementType.Comment; //Change the current element's type to a comment
 
     		if(tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
-    			stack.pop();
-    			rawData = rawData.slice(0, -2);
-    			//If the previous element is a comment, append the current text to it
-    			if(prevElement && prevElement.type === ElementType.Comment){ //Previous element was a comment
-    				prevElement.data = prevElement.raw += rawData;
-    				//This causes the current element to not be added to the element list
-    				rawData = elementData = "";
-    				elementType = ElementType.Text;
-    			}
-    			else elementType = ElementType.Comment; //Change the current element's type to a comment
-    		}
-    		else { //Still in a comment tag
-    			elementType = ElementType.Comment;
-    			//If the previous element is a comment, append the current text to it
-    			if(prevElement && prevElement.type === ElementType.Comment){
-    				prevElement.data = prevElement.raw += rawData + tagSep;
-    				//This causes the current element to not be added to the element list
-    				rawData = elementData = "";
-    				elementType = ElementType.Text;
-    			}
-    			else elementData = rawData += tagSep;
+    			this._contentFlags -= SpecialTags[ElementType.Comment];
+    			elementData = rawData = rawData.slice(0, -2);
     		}
+    		else elementData = rawData += tagSep;
+    		this._prevTagSep = tagSep;
     	}
-		else if(type === ElementType.Script && elementName === "/script") stack.pop();
-		else if(type === ElementType.Style && elementName === "/style") stack.pop();
-		else if(!this._options.xmlMode && (type === ElementType.Script || type === ElementType.Style)){
-			//special behaviour for script & style tags
-			if(rawData.substring(0, 3) !== "!--"){ //Make sure we're not in a comment
-				//All data from here to style close is now a text element
-			    elementType = ElementType.Text;
-			    //If the previous element is text, append the current text to it
-			    prevElement = this._elements[this._elements.length - 1];
-			    if(prevElement && prevElement.type === ElementType.Text){
-			    	prevElement.data = prevElement.raw += this._prevTagSep + rawData;
-			    	//This causes the current element to not be added to the element list
-			    	rawData = elementData = "";
-			    } else elementData = rawData; //The previous element was not text
+    	//if it's a closing tag, remove the flag
+    	else if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "/script"){
+    		this._contentFlags %= SpecialTags["w"]; //remove the written flag
+			this._contentFlags -= SpecialTags[ElementType.Script];
+		}
+		else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementName === "/style"){
+			this._contentFlags %= SpecialTags["w"]; //remove the written flag
+			this._contentFlags -= SpecialTags[ElementType.Style];
+		}
+		//special behaviour for script & style tags
+		//Make sure we're not in a comment
+		else if(!this._options.xmlMode && rawData.substring(0, 3) !== "!--"){
+			//All data from here to style close is now a text element
+			elementType = ElementType.Text;
+			//If the previous element is text, append the last tag sep to element
+			if(this._contentFlags >= SpecialTags["w"]){
+			    elementData = rawData = this._prevTagSep + rawData;
+			}
+			else{ //The previous element was not text
+			    this._contentFlags += SpecialTags["w"];
+			    elementData = rawData;
 			}
+			this._prevTagSep = tagSep;
 		}
 
 
@@ -212,13 +208,14 @@ Parser.prototype.parseTags = function(){
 		if(elementType === ElementType.Tag){
 			if(rawData.substring(0, 3) === "!--"){ //This tag is really comment
 				elementType = ElementType.Comment;
-				elementData = rawData = rawData.substr(3);
+				this._contentFlags %= SpecialTags["w"]; //remove the written flag
 				//Check if the comment is terminated in the current element
 				if(tagSep === ">" && rawData.substr(-2) === "--")
-					elementData = rawData = rawData.slice(0, -2);
+					elementData = rawData = rawData.slice(3, -2);
 				else { //It's not so push the comment onto the tag stack
-					rawData += tagSep;
-					stack.push(ElementType.Comment);
+					elementData = rawData = rawData.substr(3) + tagSep;
+					this._contentFlags += SpecialTags[ElementType.Comment];
+					this._prevTagSep = tagSep;
 				}
 			}
 			else {
@@ -236,12 +233,18 @@ Parser.prototype.parseTags = function(){
 				else if(elementName === "script"){
 					elementType = ElementType.Script;
 					//Special tag, push onto the tag stack if not terminated
-					if(elementData.substr(-1) !== "/") stack.push(ElementType.Script);
+					if(elementData.substr(-1) !== "/"){
+						this._contentFlags += SpecialTags[ElementType.Script];
+						this._prevTagSep = tagSep;
+					}
 				}
 				else if(elementName === "style"){
 					elementType = ElementType.Style;
 					//Special tag, push onto the tag stack if not terminated
-					if(elementData.substr(-1) !== "/") stack.push(ElementType.Style);
+					if(elementData.substr(-1) !== "/"){
+						this._contentFlags += SpecialTags[ElementType.Style];
+						this._prevTagSep = tagSep;
+					}
 				}
 			}
 		}
@@ -271,42 +274,35 @@ Parser.prototype.parseTags = function(){
 			/*
 			switch(elementType){
 				case ElementType.Text:
-					this._handler.ontext(rawData);
-					break;
-				case ElementType.Tag:
-				case ElementType.Style:
-				case ElementType.Script:
-					if(elementName[0] === "/") this._handler.onclosetag(elementName.substr(1));
-					else this._handler.onopentag(elementName, parseAttributes(elementData));
+					this._handler.writeText(element);
 					break;
 				case ElementType.Comment:
-					this._handler.oncomment(rawData);
+					this._handler.writeComment(element);
 					break;
 				case ElementType.Directive:
-					this._handler.onprocessinginstruction(rawData);
+					this._handler.writeDirective(element);
 					break;
-				default: throw Error("Unsupported type: " + elementType);
+				//case ElementType.Tag:
+				//case ElementType.Style:
+				//case ElementType.Script:
+				default:
+					if(elementName[0] === "/") this._handler._closeTag(elementName.substr(1));
+					else this._handler._openTag(elementName, parseAttributes(elementData));
 			}
 			*/
 
 			//If tag self-terminates, add an explicit, separate closing tag
-			if( elementType !== ElementType.Text
-				&& elementType !== ElementType.Comment
-				&& elementType !== ElementType.Directive
-				&& elementData.substr(-1) === "/"
-			){
-				//this._handler.onclosetag(elementName);
+			if(tagTypes[elementType] && elementData.substr(-1) === "/"){
+				//this._handler._closeTag(elementName);
 				this._elements.push({
-					  raw: elementName = "/" + elementName
-					, data: elementName
-					, name: elementName
-					, type: elementType
+					raw: elementName = "/" + elementName,
+					data: elementName, name: elementName,
+					type: elementType
 				});
 			}
 		}
 		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
 		this._current = next + 1;
-		this._prevTagSep = tagSep;
 	}
 
 	if(this._options.includeLocation){
@@ -332,15 +328,18 @@ Parser.prototype.getLocation = function(startTag){
 		chunk = false;
 	}
 	
-	var rows = this._buffer.substring(l.charOffset, l.charOffset = end).split("\n"),
+	var rows = this._buffer.substring(l.charOffset, end).split("\n"),
 		rowNum = rows.length - 1;
 	
+	l.charOffset = end;
 	l.inBuffer += rowNum;
 	
 	var num = rows[rowNum].replace(_reRow,"").length;
-	if(rowNum == 0) l.col += num;
+	if(rowNum === 0) l.col += num;
 	else l.col = num;
 	
+	if(arguments.length === 0) return;
+	
 	return {
 		line: l.row + l.inBuffer + 1,
 		col: l.col + (chunk ? 0: 1)
@@ -358,9 +357,7 @@ var validateHandler = function(handler){
 };
 
 //Writes parsed elements out to the handler
-Parser.prototype.writeHandler = function(forceFlush){
-	if(this._tagStack.length && !forceFlush)
-		return;
+Parser.prototype.writeHandler = function(){
 	while (this._elements.length){
 		var element = this._elements.shift();
 		switch (element.type){
diff --git a/tests/23-template_script_tags.js b/tests/23-template_script_tags.js
index 6e256d2..24864fd 100644
--- a/tests/23-template_script_tags.js
+++ b/tests/23-template_script_tags.js
@@ -3,13 +3,13 @@ exports.options = {
 	  handler: {}
 	, parser: {}
 };
-exports.html = "<script type=\"text/template\"> <h1>Heading1</h1></script>";
+exports.html = "<script type=\"text/template\"><h1>Heading1</h1></script>";
 exports.expected = [ { raw: 'script type="text/template"',
     data: 'script type="text/template"',
     type: 'script',
     name: 'script',
     attribs: { type: 'text/template' },
     children: 
-     [ { raw: ' <h1>Heading1</h1>',
-         data: ' <h1>Heading1</h1>',
+     [ { raw: '<h1>Heading1</h1>',
+         data: '<h1>Heading1</h1>',
          type: 'text' } ] } ];
\ No newline at end of file

From dbc23014abe7f831db3691224def8f56e308e0c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 12:53:42 +0100
Subject: [PATCH 058/450] Made openTag & closeTag public methods

Just deleted the "_" in front of them
---
 lib/DefaultHandler.js | 10 +++++-----
 lib/EventedHandler.js |  8 ++++----
 lib/Parser.js         |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 6f3b663..b42906f 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -5,7 +5,7 @@ function DefaultHandler(callback, options){
 	this._done = false;
 	this._inSpecialTag = false;
 	this._tagStack = [];
-	if(options){
+	if(options){ //otherwise, the prototype is used
 		this._options = options;
 		if(typeof this._options.verbose === "undefined")
 			this._options.verbose = true;
@@ -52,7 +52,7 @@ DefaultHandler.prototype._isEmptyTag = function(name) {
 	return this._options.enforceEmptyTags && emptyTags[name];
 };
 
-DefaultHandler.prototype._closeTag = function(name){
+DefaultHandler.prototype.closeTag = function(name){
 	//Ignore closing tags that obviously don't have an opening tag
 	if(!this._tagStack || this._isEmptyTag(name)) return;
 	
@@ -89,7 +89,7 @@ DefaultHandler.prototype._addDomElement = function(element){
 	}
 }
 
-DefaultHandler.prototype._openTag = function(element){
+DefaultHandler.prototype.openTag = function(element){
 	if(!this._options.verbose) delete element.data;
 	
 	this._addDomElement(element);
@@ -110,8 +110,8 @@ DefaultHandler.prototype.writeText = function(element){
 DefaultHandler.prototype.writeComment = DefaultHandler.prototype.writeDirective = DefaultHandler.prototype._addDomElement;
 
 DefaultHandler.prototype.writeTag = function(element) {
-	if(element.name.charAt(0) === "/") this._closeTag(element.name.substr(1));
-	else this._openTag(element);
+	if(element.name.charAt(0) === "/") this.closeTag(element.name.substr(1));
+	else this.openTag(element);
 };
 
 module.exports = DefaultHandler;
\ No newline at end of file
diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index 428992f..1ed2b02 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -42,13 +42,13 @@ var openTagCB = function(openTag, attribute){
 //HTML Tags that shouldn't contain child nodes
 var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:true,img:true,input:true,isindex:true,link:true,meta:true,param:true,embed:true};
 
-EventedHandler.prototype._openTag = function(name, attrs){
+EventedHandler.prototype.openTag = function(name, attrs){
 	this.onopentag(name, attrs);
 	if(emptyTags[name]) this.onclosetag(name);
 	else this._stack.push(name);
 };
 
-EventedHandler.prototype._closeTag = function(name){
+EventedHandler.prototype.closeTag = function(name){
 	if(!emptyTags[name] && this._stack){
 		var i = this._stack.length-1;
 		while(i !== -1 && this._stack[i--] !== name){};
@@ -64,9 +64,9 @@ EventedHandler.prototype._closeTag = function(name){
 
 EventedHandler.prototype.writeTag = function(element){
 	if(element.name.charAt(0) === "/")
-		this._closeTag(element.name.substr(1));
+		this.closeTag(element.name.substr(1));
 	else
-		this._openTag(element.name, element.attribs || {});
+		this.openTag(element.name, element.attribs || {});
 };
 
 module.exports = EventedHandler;
\ No newline at end of file
diff --git a/lib/Parser.js b/lib/Parser.js
index 7566fca..53a5cd7 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -286,14 +286,14 @@ Parser.prototype.parseTags = function(){
 				//case ElementType.Style:
 				//case ElementType.Script:
 				default:
-					if(elementName[0] === "/") this._handler._closeTag(elementName.substr(1));
-					else this._handler._openTag(elementName, parseAttributes(elementData));
+					if(elementName[0] === "/") this._handler.closeTag(elementName.substr(1));
+					else this._handler.openTag(elementName, parseAttributes(elementData));
 			}
 			*/
 
 			//If tag self-terminates, add an explicit, separate closing tag
 			if(tagTypes[elementType] && elementData.substr(-1) === "/"){
-				//this._handler._closeTag(elementName);
+				//this._handler.closeTag(elementName);
 				this._elements.push({
 					raw: elementName = "/" + elementName,
 					data: elementName, name: elementName,

From 000bd0231095924ef39a659a0e032e989cf6ab9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 13:00:54 +0100
Subject: [PATCH 059/450] Improved _addDomElement

---
 lib/DefaultHandler.js | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index b42906f..6fc2bcd 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -65,25 +65,22 @@ DefaultHandler.prototype.closeTag = function(name){
 DefaultHandler.prototype._addDomElement = function(element){
 	if(!this._options.verbose) delete element.raw;
 	
-	var lastTag = this._tagStack[this._tagStack.length-1], tmp;
+	var lastTag = this._tagStack[this._tagStack.length-1], lastChild;
 	if(!lastTag) this.dom.push(element);
 	else{ //There are parent elements
 		if(!lastTag.children){
 			lastTag.children = [element];
 			return;
 		}
-		tmp = lastTag.children[lastTag.children.length-1];
-		if(element.type === ElementType.Comment && tmp.type === ElementType.Comment){
-			tmp.data += element.data;
-			if(this._options.verbose) tmp.raw = tmp.data;
+		lastChild = lastTag.children[lastTag.children.length-1];
+		if(element.type === ElementType.Comment && lastChild.type === ElementType.Comment){
+			lastChild.data += element.data;
+			if(this._options.verbose) lastChild.raw = lastChild.data;
 		}
-		else if(this._inSpecialTag && element.type === ElementType.Text){
-			if(tmp.type !== ElementType.Text) lastTag.children.push(element);
-			else {
-				tmp.data += element.data;
-				if(this._options.verbose)
-					tmp.raw = tmp.data;
-			}
+		else if(this._inSpecialTag && element.type === ElementType.Text && lastChild.type === ElementType.Text){
+			lastChild.data += element.data;
+		    if(this._options.verbose)
+		    	lastChild.raw = lastChild.data;
 		}
 		else lastTag.children.push(element);
 	}

From 6f2032f5d6f608aab8966cb9418322f9d95c0f35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 13:02:22 +0100
Subject: [PATCH 060/450] Set feeds to xmlMode

because they are xml, so <script> & <style> tags shouldn't get a
special treatment
---
 tests/20-rss.js  | 2 +-
 tests/21-atom.js | 2 +-
 tests/24-rdf.js  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/20-rss.js b/tests/20-rss.js
index 8179702..1e4af05 100644
--- a/tests/20-rss.js
+++ b/tests/20-rss.js
@@ -1,7 +1,7 @@
 exports.name = "RSS (2.0)";
 exports.options = {
 	  handler: {}
-	, parser: {}
+	, parser: {xmlMode:true}
 };
 exports.type = "rss";
 //http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
diff --git a/tests/21-atom.js b/tests/21-atom.js
index f78fdd2..b067206 100644
--- a/tests/21-atom.js
+++ b/tests/21-atom.js
@@ -1,7 +1,7 @@
 exports.name = "Atom (1.0)";
 exports.options = {
 	  handler: {}
-	, parser: {}
+	, parser: {xmlMode:true}
 };
 exports.type = "rss";
 //http://en.wikipedia.org/wiki/Atom_%28standard%29
diff --git a/tests/24-rdf.js b/tests/24-rdf.js
index 2019d3a..bd4a06e 100644
--- a/tests/24-rdf.js
+++ b/tests/24-rdf.js
@@ -1,6 +1,6 @@
 exports.name = "RDF test";
 exports.type = "rss";
-exports.options = {handler: {}, parser: {}};
+exports.options = {handler: {}, parser: {xmlMode:true}};
 exports.html = '<?xml version="1.0" encoding="UTF-8"?>\n<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">\n\t<channel rdf:about="http://sfbay.craigslist.org/ccc/">\n\t\t<title>craigslist | all community in SF bay area</title>\n\t\t<link>http://sfbay.craigslist.org/ccc/</link>\n\t\t<description/>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:publisher>webmaster@craigslist.org</dc:publisher>\n\t\t<dc:creator>webmaster@craigslist.org</dc:creator>\n\t\t<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>\n\t\t<dc:title>craigslist | all community in SF bay area</dc:title>\n\t\t<dc:type>Collection</dc:type>\n\t\t<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>\n\t\t<syn:updateFrequency>4</syn:updateFrequency>\n\t\t<syn:updatePeriod>hourly</syn:updatePeriod>\n\t\t<items>\n\t\t\t<rdf:Seq>\n\t\t\t\t<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>\n\t\t\t</rdf:Seq>\n\t\t</items>\n\t</channel>\n\t<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">\n\t\t<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</link>\n\t\t<description><![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:35:17-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</dc:source>\n\t\t<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>\n\t</item>\n\t<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">\n\t\t<title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</link>\n\t\t<description><![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:34:54-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</dc:source>\n\t\t<dc:title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>\n\t</item>\n</rdf:RDF>';
 exports.expected = { type: 'rdf:RDF',
   id: '',

From 183ca0ca6d7efdcb36d1f9fd4a1cf52399258981 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 17:58:45 +0100
Subject: [PATCH 061/450] minor changes

---
 lib/DefaultHandler.js           | 13 +++++--------
 lib/EventedHandler.js           |  5 ++++-
 tests/08-extra_spaces_in_tag.js |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 6fc2bcd..d4f6b93 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -29,10 +29,7 @@ var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:
 //Methods//
 //Resets the handler back to starting state
 DefaultHandler.prototype.reset = function() {
-	this.dom = [];
-	this._done = false;
-	this._inSpecialTag = false;
-	this._tagStack = [];
+	DefaultHandler.call(this, this._callback);
 };
 //Signals the handler that parsing is done
 DefaultHandler.prototype.done = function() {
@@ -89,11 +86,11 @@ DefaultHandler.prototype._addDomElement = function(element){
 DefaultHandler.prototype.openTag = function(element){
 	if(!this._options.verbose) delete element.data;
 	
-	this._addDomElement(element);
+	var isSpecial = element.type === ElementType.Script || element.type === ElementType.Style;
 	
-	if(element.type === ElementType.Script || element.type === ElementType.Style){
-		this._inSpecialTag = true;
-	}
+	if(isSpecial) this._inSpecialTag = true;
+	
+	this._addDomElement(element);
 	
 	//Don't add tags to the tag stack that can't have children
 	if(!this._isEmptyTag(element.name)) this._tagStack.push(element);
diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index 1ed2b02..d8bc7dd 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -42,7 +42,10 @@ var openTagCB = function(openTag, attribute){
 //HTML Tags that shouldn't contain child nodes
 var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:true,img:true,input:true,isindex:true,link:true,meta:true,param:true,embed:true};
 
-EventedHandler.prototype.openTag = function(name, attrs){
+EventedHandler.prototype.openTag = function(name, attrs /*, type*/){
+	if(arguments.length === 1){ //TODO
+		attrs = name.attribs; name = name.name;
+	}
 	this.onopentag(name, attrs);
 	if(emptyTags[name]) this.onclosetag(name);
 	else this._stack.push(name);
diff --git a/tests/08-extra_spaces_in_tag.js b/tests/08-extra_spaces_in_tag.js
index 5c85bed..fca4335 100644
--- a/tests/08-extra_spaces_in_tag.js
+++ b/tests/08-extra_spaces_in_tag.js
@@ -3,7 +3,7 @@ exports.options = {
 	  handler: {}
 	, parser: {}
 };
-exports.html = "<\n font	\n size='14' \n>the text<\n /	\nfont	 \n>";
+exports.html = "<\n font\t\n size='14' \n>the text<\n /	\nfont	 \n>";
 exports.expected =
 [ { raw: '\n font	\n size=\'14\' \n'
   , data: 'font	\n size=\'14\''

From 46066802912035b9afc5a13be4c301978e195698 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 18:03:24 +0100
Subject: [PATCH 062/450] Removed _elements, removed support for writeTag
 inside handlers

Use openTag and closeTag instead
---
 lib/DefaultHandler.js |   5 -
 lib/EventedHandler.js |   7 -
 lib/Parser.js         | 300 +++++++++++++++++-------------------------
 3 files changed, 118 insertions(+), 194 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index d4f6b93..94f7caf 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -103,9 +103,4 @@ DefaultHandler.prototype.writeText = function(element){
 
 DefaultHandler.prototype.writeComment = DefaultHandler.prototype.writeDirective = DefaultHandler.prototype._addDomElement;
 
-DefaultHandler.prototype.writeTag = function(element) {
-	if(element.name.charAt(0) === "/") this.closeTag(element.name.substr(1));
-	else this.openTag(element);
-};
-
 module.exports = DefaultHandler;
\ No newline at end of file
diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index d8bc7dd..f58973f 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -65,11 +65,4 @@ EventedHandler.prototype.closeTag = function(name){
 	}
 };
 
-EventedHandler.prototype.writeTag = function(element){
-	if(element.name.charAt(0) === "/")
-		this.closeTag(element.name.substr(1));
-	else
-		this.openTag(element.name, element.attribs || {});
-};
-
 module.exports = EventedHandler;
\ No newline at end of file
diff --git a/lib/Parser.js b/lib/Parser.js
index 53a5cd7..9e134ea 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,10 +1,7 @@
 var ElementType = require("./ElementType.js");
 
 function Parser(handler, options){
-	this._options = options || {
-		includeLocation: false, //Do not track element position in document by default
-		xmlMode: false //Special behaviour for script/style tags by default
-	};
+	if(options) this._options = options;
 
 	validateHandler(handler);
 	this._handler = handler;
@@ -13,13 +10,11 @@ function Parser(handler, options){
 	this._prevTagSep = "";
 	this._contentFlags = 0;
 	this._done = false;
-	this._elements = [];
 	this._current = 0;
 	this._location = {
 		row: 0,
 		col: 0,
-		charOffset: 0,
-		inBuffer: 0
+		charOffset: 0
 	};
 	this._parseState = ElementType.Text;
 }
@@ -38,6 +33,11 @@ tagTypes[ ElementType.Script ] = true;
 tagTypes[ ElementType.Style ] = true;
 tagTypes[ ElementType.Tag ] = true;
 
+Parser.prototype._options = {
+	includeLocation: false, //Do not track element position in document by default
+	xmlMode: false //Special behaviour for script/style tags by default
+};
+
 //**Public**//
 //Methods//
 //Parses a complete HTML and pushes it to the handler
@@ -61,40 +61,27 @@ Parser.prototype.done = function(){
 
 	//Push any unparsed text into a final element in the element list
 	if(this._buffer){
-		var rawData = this._buffer;
-		this._buffer = "";
-		var element = {
-			raw: rawData,
-			data: this._parseState === ElementType.Text ? rawData : rawData.trim(),
-			type: this._parseState
-		};
-		if(tagTypes[this._parseState]){
-			element.name = parseTagName(element.data);
-			var attrs = parseAttributes(element.data);
-			if(attrs) element.attribs = attrs;
+		var data = this._buffer;
+		if(this._parseState === ElementType.Tag){
+			data = data.trim();
+			var name = parseTagName(data);
+			if(name.charAt(0) === "/") this._handler.closeTag(name.substr(1));
+			else this._handler.openTag({
+				name: name, raw: data, data: data, attribs: parseAttributes(data)
+			});
 		}
-		this._elements.push(element);
+		else this._handler.writeText({
+				raw: data, data: data, type: ElementType.Text
+			});
+		
+		this._buffer = "";
 	}
-
-	this.writeHandler();
 	this._handler.done();
 };
 
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
-	this._buffer = "";
-	this._prevTagSep = "";
-	this._done = false;
-	this._current = 0;
-	this._contentFlags = 0;
-	this._location = {
-		 row: 0
-		, col: 0
-		, charOffset: 0
-		, inBuffer: 0
-	};
-	this._parseState = ElementType.Text;
-	this._elements = [];
+	Parser.call(this, this._handler);
 	this._handler.reset();
 };
 
@@ -109,12 +96,12 @@ var parseAttributes = function(data){
 	var match, attrs = {};
 	
 	while(match = _reAttrib.exec(attribRaw)){
-    	if(match[1])		attrs[match[1]] = match[2];
-    	else if(match[3])	attrs[match[3]] = match[4];
-    	else if(match[5])	attrs[match[5]] = match[6];
-    	else if(match[7])	attrs[match[7]] = match[7];
-    }
-    
+		if(match[1])		attrs[match[1]] = match[2];
+		else if(match[3])	attrs[match[3]] = match[4];
+		else if(match[5])	attrs[match[5]] = match[6];
+		else if(match[7])	attrs[match[7]] = match[7];
+	}
+	
 	return attrs;
 };
 
@@ -127,16 +114,17 @@ var parseTagName = function(data){
 
 //Special tags that are threated differently
 var SpecialTags = {};
+SpecialTags[ElementType.Tag] = 0;
 SpecialTags[ElementType.Style]  = 1; //2^0
 SpecialTags[ElementType.Script] = 2; //2^1
-SpecialTags["w"] = 4; //2^2 - if set, append prev tag sep to data
+SpecialTags.w = 4; //2^2 - if set, append prev tag sep to data
 SpecialTags[ElementType.Comment] = 8; //2^8
 
 //Parses through HTML text and returns an array of found elements
 Parser.prototype.parseTags = function(){
 	var buffer = this._buffer;
 
-	var next, tagSep, rawData, element, elementName, prevElement, elementType, elementData, attributes, includeName = false;
+	var next, tagSep, rawData, element, elementName, prevElement, elementType, elementData, attributes;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
@@ -153,6 +141,7 @@ Parser.prototype.parseTags = function(){
 		}
 		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
 		elementType = this._parseState;
+		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
 		
 		if(elementType === ElementType.Tag){
 			elementData = rawData.trim();
@@ -163,28 +152,22 @@ Parser.prototype.parseTags = function(){
 			elementName = "";
 		}
 
-
 		//This section inspects the current tag stack and modifies the current
 		//element if we're actually parsing a special area (script/comment/style tag)
 		if(this._contentFlags === 0){ /*do nothing*/ }
-		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){ //We're currently in a comment tag
-			elementType = ElementType.Comment; //Change the current element's type to a comment
-
-    		if(tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
-    			this._contentFlags -= SpecialTags[ElementType.Comment];
-    			elementData = rawData = rawData.slice(0, -2);
-    		}
-    		else elementData = rawData += tagSep;
-    		this._prevTagSep = tagSep;
-    	}
-    	//if it's a closing tag, remove the flag
-    	else if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "/script"){
-    		this._contentFlags %= SpecialTags["w"]; //remove the written flag
-			this._contentFlags -= SpecialTags[ElementType.Script];
+		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
+			//We're currently in a comment tag
+			this._processComment(rawData, tagSep);
+			elementType = null;
+		}
+		//if it's a closing tag, remove the flag
+		else if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "/script"){
+			//remove the script flag (also removes the written flag)
+			this._contentFlags %= SpecialTags[ElementType.Script];
 		}
 		else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementName === "/style"){
-			this._contentFlags %= SpecialTags["w"]; //remove the written flag
-			this._contentFlags -= SpecialTags[ElementType.Style];
+			//remove the style flag (also removes the written flag)
+			this._contentFlags %= SpecialTags[ElementType.Style];
 		}
 		//special behaviour for script & style tags
 		//Make sure we're not in a comment
@@ -192,139 +175,108 @@ Parser.prototype.parseTags = function(){
 			//All data from here to style close is now a text element
 			elementType = ElementType.Text;
 			//If the previous element is text, append the last tag sep to element
-			if(this._contentFlags >= SpecialTags["w"]){
-			    elementData = rawData = this._prevTagSep + rawData;
+			if(this._contentFlags >= SpecialTags.w){
+				elementData = rawData = this._prevTagSep + rawData;
 			}
 			else{ //The previous element was not text
-			    this._contentFlags += SpecialTags["w"];
-			    elementData = rawData;
+				this._contentFlags += SpecialTags.w;
+				elementData = rawData;
 			}
 			this._prevTagSep = tagSep;
 		}
 
-
-
 		//Processing of non-special tags
 		if(elementType === ElementType.Tag){
-			if(rawData.substring(0, 3) === "!--"){ //This tag is really comment
-				elementType = ElementType.Comment;
-				this._contentFlags %= SpecialTags["w"]; //remove the written flag
-				//Check if the comment is terminated in the current element
-				if(tagSep === ">" && rawData.substr(-2) === "--")
-					elementData = rawData = rawData.slice(3, -2);
-				else { //It's not so push the comment onto the tag stack
-					elementData = rawData = rawData.substr(3) + tagSep;
-					this._contentFlags += SpecialTags[ElementType.Comment];
-					this._prevTagSep = tagSep;
-				}
-			}
-			else {
-				includeName = true;
-
-				if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
-					elementType = ElementType.Directive;
-					//TODO: what about CDATA?
-				}
-				else if(elementName.charAt(0) === "/"){
-					elementData = elementName;
-					if(elementName === "/script") elementType = ElementType.Script;
-					else if(elementName === "/style") elementType = ElementType.Style;
-				}
-				else if(elementName === "script"){
-					elementType = ElementType.Script;
-					//Special tag, push onto the tag stack if not terminated
-					if(elementData.substr(-1) !== "/"){
-						this._contentFlags += SpecialTags[ElementType.Script];
-						this._prevTagSep = tagSep;
-					}
-				}
-				else if(elementName === "style"){
-					elementType = ElementType.Style;
-					//Special tag, push onto the tag stack if not terminated
-					if(elementData.substr(-1) !== "/"){
-						this._contentFlags += SpecialTags[ElementType.Style];
-						this._prevTagSep = tagSep;
-					}
-				}
+			if(rawData.substring(0, 3) === "!--"){ //This tag is a comment
+				this._contentFlags += SpecialTags[ElementType.Comment];
+				this._processComment(rawData.substr(3), tagSep);
 			}
+			else if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
+				//ElementType.Directive
+				//TODO: what about CDATA?
+				element = {raw: elementData, data: elementData, type: ElementType.Directive, name: elementName};
+				if(this._options.includeLocation) element.location = this.getLocation(false);
+				this._handler.writeDirective(element);
+			} else
+				this._processTag(elementName, elementData, tagSep, rawData);
 		}
-
-		//Add all tags and non-empty text elements to the element list
-		if(rawData !== "" || elementType !== ElementType.Text){
-			element = {
-				raw: rawData,
-				data: elementData,
-				type: elementType
-			};
-			
-			if(includeName){
-				element.name = elementName;
-				includeName = false;
-			}
-			if(this._options.includeLocation) element.location = this.getLocation(elementType === ElementType.Tag);
-			
-			//Only parse attributes for tags
-			if(tagTypes[element.type]){
-				attributes = parseAttributes(elementData);
-				if(attributes) element.attribs = attributes;
-			}
-			
-			this._elements.push(element);
-			
-			/*
-			switch(elementType){
-				case ElementType.Text:
-					this._handler.writeText(element);
-					break;
-				case ElementType.Comment:
-					this._handler.writeComment(element);
-					break;
-				case ElementType.Directive:
-					this._handler.writeDirective(element);
-					break;
-				//case ElementType.Tag:
-				//case ElementType.Style:
-				//case ElementType.Script:
-				default:
-					if(elementName[0] === "/") this._handler.closeTag(elementName.substr(1));
-					else this._handler.openTag(elementName, parseAttributes(elementData));
-			}
-			*/
-
-			//If tag self-terminates, add an explicit, separate closing tag
-			if(tagTypes[elementType] && elementData.substr(-1) === "/"){
-				//this._handler.closeTag(elementName);
-				this._elements.push({
-					raw: elementName = "/" + elementName,
-					data: elementName, name: elementName,
-					type: elementType
-				});
-			}
+		else if(elementType === ElementType.Text && rawData !== ""){
+			element = {raw: rawData, data: elementData, type: ElementType.Text};
+			if(this._options.includeLocation) element.location = this.getLocation(false);
+			this._handler.writeText(element);
 		}
-		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
+		
 		this._current = next + 1;
 	}
 
 	if(this._options.includeLocation){
 		this.getLocation();
-		this._location.row += this._location.inBuffer;
-		this._location.inBuffer = 0;
 		this._location.charOffset = 0;
 	}
 	this._buffer = buffer.substring(this._current);
 	this._current = 0;
+};
 
-	this.writeHandler();
+Parser.prototype._processComment = function(rawData, tagSep){
+	if(tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
+		//remove the written flag (also removes the comment flag)
+		this._contentFlags %= SpecialTags.w;
+		rawData = rawData.slice(0, -2);
+	}
+	else rawData += tagSep;
+	this._prevTagSep = tagSep;
+	
+	var element = {
+		raw: rawData,
+		data: rawData,
+		type: ElementType.Comment
+	};
+	
+	if(this._options.includeLocation) element.location = this.getLocation(false);
+	
+	this._handler.writeComment(element);
+};
+
+Parser.prototype._processTag = function(name, data, tagSep, raw){
+	if(name.charAt(0) === "/"){
+		this._handler.closeTag(name.substring(1));
+		return;
+	}
+	
+	var type = ElementType.Tag;
+	if(this._options.xmlMode){ /*do nothing*/ }
+	else if(name === "script") type = ElementType.Script;
+	else if(name === "style")  type = ElementType.Style;
+	
+	var element = {
+		raw: raw, data: data, type: type, name: name
+	};
+	
+	var attribs = parseAttributes(data);
+	if(attribs) element.attribs = attribs;
+	
+	if(this._options.includeLocation)
+		element.location = this.getLocation(type === ElementType.Tag);
+	
+	this._handler.openTag(element);
+	
+	//If tag self-terminates, add an explicit, separate closing tag
+	if(data.substr(-1) === "/"){
+		this._handler.closeTag(name);
+	} else {
+		this._contentFlags += SpecialTags[type];
+		this._prevTagSep = tagSep;
+	}
 };
 
 Parser.prototype.getLocation = function(startTag){
 	var c, end, chunk,
 		l = this._location;
 	if(startTag){
-		end = this._current-1,
+		end = this._current - 1;
 		chunk = l.charOffset === 0 && end === -1;
 	} else {
-		end = this._current,
+		end = this._current;
 		chunk = false;
 	}
 	
@@ -332,7 +284,7 @@ Parser.prototype.getLocation = function(startTag){
 		rowNum = rows.length - 1;
 	
 	l.charOffset = end;
-	l.inBuffer += rowNum;
+	l.row += rowNum;
 	
 	var num = rows[rowNum].replace(_reRow,"").length;
 	if(rowNum === 0) l.col += num;
@@ -341,7 +293,7 @@ Parser.prototype.getLocation = function(startTag){
 	if(arguments.length === 0) return;
 	
 	return {
-		line: l.row + l.inBuffer + 1,
+		line: l.row + 1,
 		col: l.col + (chunk ? 0: 1)
 	};
 };
@@ -350,28 +302,12 @@ Parser.prototype.getLocation = function(startTag){
 var validateHandler = function(handler){
 	if(typeof handler !== "object")
 		throw Error("Handler is not an object");
-	["reset", "done", "writeTag", "writeText", "writeComment", "writeDirective"].forEach(function(name){
+	["reset", "done", "openTag", "closeTag", "writeText", "writeComment", "writeDirective"].forEach(function(name){
 		if(typeof handler[name] !== "function")
 			throw Error("Handler method '" + name + "' is invalid");
 	});
 };
 
-//Writes parsed elements out to the handler
-Parser.prototype.writeHandler = function(){
-	while (this._elements.length){
-		var element = this._elements.shift();
-		switch (element.type){
-			case ElementType.Comment: this._handler.writeComment(element);
-				break;
-			case ElementType.Directive: this._handler.writeDirective(element);
-				break;
-			case ElementType.Text: this._handler.writeText(element);
-				break;
-			default: this._handler.writeTag(element);
-		}
-	}
-};
-
 Parser.prototype.handleError = function(error){
 	if(typeof this._handler.error === "function")
 		this._handler.error(error);

From fb065ce1ee386fe9dec3ba2dfb60583f349900ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 18:32:27 +0100
Subject: [PATCH 063/450] RssHandler now calls a given callback

---
 lib/RssHandler.js | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index f9b63d4..4cfd933 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -3,14 +3,12 @@ var DefaultHandler = require("./DefaultHandler.js"),
 	inherits = require("util").inherits;
 
 //TODO: make this a trully streamable handler
-function RssHandler (callback) {
+function RssHandler(callback){
 	DefaultHandler.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
 }
 
 inherits(RssHandler, DefaultHandler);
 
-RssHandler.prototype.done = DefaultHandler.prototype.done;
-
 function getElements(what, where, one, recurse){
 	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
 	else	return DomUtils.getElementsByTagName(what, where, recurse);
@@ -105,7 +103,8 @@ RssHandler.prototype.done = function() {
 		}
 		this.dom = feed;
 	}
-	this.done;
+	this._done = true;
+	this.handleCallback(null, feed);
 };
 
 module.exports = RssHandler;
\ No newline at end of file

From 52cebcf6a945a734487e173c27f827c93bb7c6c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 18:51:26 +0100
Subject: [PATCH 064/450] updated readme

---
 README.md | 242 ++++++++++++++++++++++++++++++------------------------
 1 file changed, 136 insertions(+), 106 deletions(-)

diff --git a/README.md b/README.md
index df19abc..dc94dc9 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,19 @@
-#NodeHtmlParser
+#htmlparser2
 A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
 
 ##Installing
-	npm install htmlparser
+	npm install htmlparser2
 
 ##Running Tests
 	node tests/00-runtests.js
 
+##How is this different from (node-htmlparser)[https://github.com/tautologistics/node-htmlparser]?
+This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is ~20% faster than the original (messured using npm module `ben` using RssHandler with the TechCrunch feed, exact result: 54ms vs. 68ms). 
+
+Besides, it features an additional handler that provides the interface of (sax.js)[https://github.com/isaacs/sax-js] (written for my readability port (readabilitySAX)[https://github.com/fb55/readabilitysax] & performs there great). I also fixed a couple of bugs & included some pull requests for the original project (eg. RDF feed support).
+
 ##Usage
+
 	var htmlparser = require("htmlparser");
 	var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< /  script><!--<!-- Waah! -- -->";
 	var handler = new htmlparser.DefaultHandler(function (error, dom) {
@@ -22,24 +28,29 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 
 
 ##Example output
-	[ { raw: 'Xyz ', data: 'Xyz ', type: 'text' }
-	, { raw: 'script language= javascript'
-	  , data: 'script language= javascript'
-	  , type: 'script'
-	  , name: 'script'
-	  , attribs: { language: 'javascript' }
-	  , children: 
-	     [ { raw: 'var foo = \'<bar>\';<'
-	       , data: 'var foo = \'<bar>\';<'
-	       , type: 'text'
-	       }
-	     ]
-	  }
-	, { raw: '<!-- Waah! -- '
-	  , data: '<!-- Waah! -- '
-	  , type: 'comment'
-	  }
-	]
+
+	[{
+		raw: 'Xyz ',
+		data: 'Xyz ',
+		type: 'text'
+	}, {
+		raw: 'script language= javascript',
+		data: 'script language= javascript',
+		type: 'script',
+		name: 'script',
+		attribs: {
+			language: 'javascript'
+		},
+		children: [{
+			raw: 'var foo = \'<bar>\';<',
+			data: 'var foo = \'<bar>\';<',
+			type: 'text'
+		}]
+	}, {
+		raw: '<!-- Waah! -- ',
+		data: '<!-- Waah! -- ',
+		type: 'comment'
+	}]
 
 ##Streaming To Parser
 	while (...) {
@@ -56,121 +67,140 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 ##DefaultHandler Options
 
 ###Usage
-	var handler = new htmlparser.DefaultHandler(
-		  function (error) { ... }
-		, { verbose: false, ignoreWhitespace: true }
-		);
+	var handler = new htmlparser.DefaultHandler(function (error) {...}, {
+		verbose: false,
+		ignoreWhitespace: true
+	});
 	
 ###Option: ignoreWhitespace
-Indicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is "false".
+Indicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is "false". 
 
-####Example: true
-The following HTML:
+The following HTML will be used:
 
 	<font>
 		<br>this is the text
 	<font>
 
-becomes:
-
-	[ { raw: 'font'
-	  , data: 'font'
-	  , type: 'tag'
-	  , name: 'font'
-	  , children: 
-	     [ { raw: 'br', data: 'br', type: 'tag', name: 'br' }
-	     , { raw: 'this is the text\n'
-	       , data: 'this is the text\n'
-	       , type: 'text'
-	       }
-	     , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
-	     ]
-	  }
-	]
+####Example: true
 
-####Example: false
-The following HTML:
+	[{
+		raw: 'font',
+		data: 'font',
+		type: 'tag',
+		name: 'font',
+		children: [{
+			raw: 'br',
+			data: 'br',
+			type: 'tag',
+			name: 'br'
+		}, {
+			raw: 'this is the text\n',
+			data: 'this is the text\n',
+			type: 'text'
+		}, {
+			raw: 'font',
+			data: 'font',
+			type: 'tag',
+			name: 'font'
+		}]
+	}]
 
-	<font>
-		<br>this is the text
-	<font>
+####Example: false
 
-becomes:
-
-	[ { raw: 'font'
-	  , data: 'font'
-	  , type: 'tag'
-	  , name: 'font'
-	  , children: 
-	     [ { raw: '\n\t', data: '\n\t', type: 'text' }
-	     , { raw: 'br', data: 'br', type: 'tag', name: 'br' }
-	     , { raw: 'this is the text\n'
-	       , data: 'this is the text\n'
-	       , type: 'text'
-	       }
-	     , { raw: 'font', data: 'font', type: 'tag', name: 'font' }
-	     ]
-	  }
-	]
+	[{
+		raw: 'font',
+		data: 'font',
+		type: 'tag',
+		name: 'font',
+		children: [{
+			raw: '\n\t',
+			data: '\n\t',
+			type: 'text'
+		}, {
+			raw: 'br',
+			data: 'br',
+			type: 'tag',
+			name: 'br'
+		}, {
+			raw: 'this is the text\n',
+			data: 'this is the text\n',
+			type: 'text'
+		}, {
+			raw: 'font',
+			data: 'font',
+			type: 'tag',
+			name: 'font'
+		}]
+	}]
 
 ###Option: verbose
-Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "<" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true". 
+Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "<" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true".
 
-####Example: true
-The following HTML:
+The following HTML is used:
 
 	<a href="test.html">xxx</a>
 
-becomes:
+####Example: true
 
-	[ { raw: 'a href="test.html"'
-	  , data: 'a href="test.html"'
-	  , type: 'tag'
-	  , name: 'a'
-	  , attribs: { href: 'test.html' }
-	  , children: [ { raw: 'xxx', data: 'xxx', type: 'text' } ]
-	  }
-	]
+	[{
+		raw: 'a href="test.html"',
+		data: 'a href="test.html"',
+		type: 'tag',
+		name: 'a',
+		attribs: {
+			href: 'test.html'
+		},
+		children: [{
+			raw: 'xxx',
+			data: 'xxx',
+			type: 'text'
+		}]
+	}]
 
 ####Example: false
-The following HTML:
-
-	<a href="test.html">xxx</a>
-
-becomes:
 
-	[ { type: 'tag'
-	  , name: 'a'
-	  , attribs: { href: 'test.html' }
-	  , children: [ { data: 'xxx', type: 'text' } ]
-	  }
-	]
+	[{
+		type: 'tag',
+		name: 'a',
+		attribs: {
+			href: 'test.html'
+		},
+		children: [{
+			data: 'xxx',
+			type: 'text'
+		}]
+	}]
 
 ###Option: enforceEmptyTags
 Indicates whether the DOM should prevent children on tags marked as empty in the HTML spec. Typically this should be set to "true" HTML parsing and "false" for XML parsing. The default value is "true".
 
-####Example: true
-The following HTML:
+The following HTML is used:
 
 	<link>text</link>
 
-becomes:
+####Example: true
 
-	[ { raw: 'link', data: 'link', type: 'tag', name: 'link' }
-	, { raw: 'text', data: 'text', type: 'text' }
-	]
+	[{
+		raw: 'link',
+		data: 'link',
+		type: 'tag',
+		name: 'link'
+	}, {
+		raw: 'text',
+		data: 'text',
+		type: 'text'
+	}]
 
 ####Example: false
-The following HTML:
-
-	<link>text</link>
-
-becomes:
 
-	[ { raw: 'link'
-	  , data: 'link'
-	  , type: 'tag'
-	  , name: 'link'
-	  , children: [ { raw: 'text', data: 'text', type: 'text' } ]
-	  }
-	]
\ No newline at end of file
+	[{
+		raw: 'link',
+		data: 'link',
+		type: 'tag',
+		name: 'link',
+		children: [{
+			raw: 'text',
+			data: 'text',
+			type: 'text'
+		}]
+	}]
\ No newline at end of file

From 93e4a7d361a4183f33a5c66bc89dd461a460d90d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 18:53:45 +0100
Subject: [PATCH 065/450] fixed links in readme

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index dc94dc9..b842466 100644
--- a/README.md
+++ b/README.md
@@ -7,10 +7,10 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 ##Running Tests
 	node tests/00-runtests.js
 
-##How is this different from (node-htmlparser)[https://github.com/tautologistics/node-htmlparser]?
+##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
 This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is ~20% faster than the original (messured using npm module `ben` using RssHandler with the TechCrunch feed, exact result: 54ms vs. 68ms). 
 
-Besides, it features an additional handler that provides the interface of (sax.js)[https://github.com/isaacs/sax-js] (written for my readability port (readabilitySAX)[https://github.com/fb55/readabilitysax] & performs there great). I also fixed a couple of bugs & included some pull requests for the original project (eg. RDF feed support).
+Besides, it features an additional handler that provides the interface of [sax.js](https://github.com/isaacs/sax-js) (written for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax) & performs there great). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
 
 ##Usage
 

From 6fa3bfecc18677ae3d7264554c75ed15b81f3144 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 19:01:20 +0100
Subject: [PATCH 066/450] Updated readme

---
 README.md | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index b842466..4ac1489 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
 This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is ~20% faster than the original (messured using npm module `ben` using RssHandler with the TechCrunch feed, exact result: 54ms vs. 68ms). 
 
-Besides, it features an additional handler that provides the interface of [sax.js](https://github.com/isaacs/sax-js) (written for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax) & performs there great). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
+Besides, it features an additional handler that provides the interface of [sax.js](https://github.com/isaacs/sax-js) (written for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
 
 ##Usage
 
@@ -64,7 +64,18 @@ Besides, it features an additional handler that provides the interface of [sax.j
 		...
 	});
 
-##DefaultHandler Options
+##Parser options
+
+###Usage
+	var Parser = new htmlparser.Parser(handler, options);
+
+###Option: includeLocation
+Indicates whether the parser should include the location of a token as part of it. Default: false.
+
+###Option: xmlMode
+Indicates whether `<script>` and `<style>` tags should get special treatment. If false, their content will be text only. For RSS feeds and other XML content (not HTML), set this to true. Default: false.
+
+##DefaultHandler options
 
 ###Usage
 	var handler = new htmlparser.DefaultHandler(function (error) {...}, {

From d6f787b12e68f66d6a7741b4e63b2d06bf60ad34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 19:14:11 +0100
Subject: [PATCH 067/450] Minor changes

---
 lib/DefaultHandler.js | 10 +++++-----
 lib/RssHandler.js     |  3 +--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 94f7caf..e3cdd76 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -54,8 +54,8 @@ DefaultHandler.prototype.closeTag = function(name){
 	if(!this._tagStack || this._isEmptyTag(name)) return;
 	
 	var pos = this._tagStack.length - 1;
-	while (pos !== -1 && this._tagStack[pos--].name !== name) { }
-	if ( pos !== -1 || this._tagStack[0].name === name)
+	while(pos !== -1 && this._tagStack[pos--].name !== name){}
+	if (pos !== -1 || this._tagStack[0].name === name)
 	    this._tagStack.splice(pos+1);
 };
 
@@ -77,11 +77,11 @@ DefaultHandler.prototype._addDomElement = function(element){
 		else if(this._inSpecialTag && element.type === ElementType.Text && lastChild.type === ElementType.Text){
 			lastChild.data += element.data;
 		    if(this._options.verbose)
-		    	lastChild.raw = lastChild.data;
+				lastChild.raw = lastChild.data;
 		}
 		else lastTag.children.push(element);
 	}
-}
+};
 
 DefaultHandler.prototype.openTag = function(element){
 	if(!this._options.verbose) delete element.data;
@@ -94,7 +94,7 @@ DefaultHandler.prototype.openTag = function(element){
 	
 	//Don't add tags to the tag stack that can't have children
 	if(!this._isEmptyTag(element.name)) this._tagStack.push(element);
-}
+};
 
 DefaultHandler.prototype.writeText = function(element){
 	if(this._options.ignoreWhitespace && element.data.trim() === "") return;
diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index 4cfd933..339bf02 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -103,8 +103,7 @@ RssHandler.prototype.done = function() {
 		}
 		this.dom = feed;
 	}
-	this._done = true;
-	this.handleCallback(null, feed);
+	DefaultHandler.prototype.handleCallback.call(this);
 };
 
 module.exports = RssHandler;
\ No newline at end of file

From 9c24288c9473513eb70c1ac8eda0dea45bfb1872 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 19:14:54 +0100
Subject: [PATCH 068/450] Added options to EventedHandler

for now, it's only enforceEmptyTags
---
 lib/EventedHandler.js | 61 +++++++++++++++++++++++++------------------
 1 file changed, 36 insertions(+), 25 deletions(-)

diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index f58973f..eb8be54 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -1,22 +1,3 @@
-var EventedHandler = function(cbs){
-	//map the handlers to their callbacks
-	this.writeComment = stripData(cbs.oncomment);
-	this.writeDirective = stripData(cbs.onprocessinginstruction);
-	this.writeText = stripData(cbs.ontext);
-	this.done = cbs.onend || emptyFunction;
-	
-	//if someone wants to listen to that
-	this.reset = cbs.onreset || emptyFunction;
-	this.error = cbs.onerror; //if nothing was set, the error is thrown
-	
-	//functions to be called within writeTag
-	this.onopentag = openTagCB(cbs.onopentag, cbs.onattribute);
-	this.onclosetag = cbs.onclosetag || emptyFunction;
-	
-	//privates
-	this._stack = [];
-};
-
 var emptyFunction = function(){};
 var stripData = function(callback){
 	if(typeof callback !== "function") return emptyFunction;
@@ -29,9 +10,9 @@ var openTagCB = function(openTag, attribute){
 	if(openTag){
 		var open;
 		if(openTag.length === 1){ //to be compatible with sax.js
-			open = function open(name, attributes){ openTag({name:name, attributes:attributes}); }
+			open = function open(name, attributes){ openTag({name:name, attributes:attributes}); };
 		}
-		else open = openTag
+		else open = openTag;
 		if(attribute) return function(name, attributes){open(name,attributes); attr(null, attributes);};
 		else return open;
 	}
@@ -39,27 +20,57 @@ var openTagCB = function(openTag, attribute){
 		else return emptyFunction;
 };
 
+var EventedHandler = function(cbs, options){
+	//map the handlers to their callbacks
+	this.writeComment = stripData(cbs.oncomment);
+	this.writeDirective = stripData(cbs.onprocessinginstruction);
+	this.writeText = stripData(cbs.ontext);
+	this.done = cbs.onend || emptyFunction;
+	
+	if(options) this._options = options;
+	
+	//if someone wants to listen to that
+	this.reset = cbs.onreset || emptyFunction;
+	this.error = cbs.onerror; //if nothing was set, the error is thrown
+	
+	//functions to be called within writeTag
+	this.onopentag = openTagCB(cbs.onopentag, cbs.onattribute);
+	this.onclosetag = cbs.onclosetag || emptyFunction;
+	
+	//privates
+	this._stack = [];
+};
+
+EventedHandler.prototype._options = {
+	enforceEmptyTags: true //auto-close empty tags
+};
+
 //HTML Tags that shouldn't contain child nodes
 var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:true,img:true,input:true,isindex:true,link:true,meta:true,param:true,embed:true};
 
+EventedHandler.prototype.isEmptyTag = function(name){
+	return this._options.enforceEmptyTags && emptyTags[name];
+};
+
 EventedHandler.prototype.openTag = function(name, attrs /*, type*/){
 	if(arguments.length === 1){ //TODO
 		attrs = name.attribs; name = name.name;
 	}
 	this.onopentag(name, attrs);
-	if(emptyTags[name]) this.onclosetag(name);
+	if(this.isEmptyTag(name)) this.onclosetag(name);
 	else this._stack.push(name);
 };
 
 EventedHandler.prototype.closeTag = function(name){
-	if(!emptyTags[name] && this._stack){
+	if(!this.isEmptyTag(name) && this._stack){
 		var i = this._stack.length-1;
-		while(i !== -1 && this._stack[i--] !== name){};
+		while(i !== -1 && this._stack[i--] !== name){}
 		if( ++i !== 0 || this._stack[0] === name)
 			while(i < this._stack.length)
 				this.onclosetag(this._stack.pop());
 	}
-	else if(name === "br"){//many browsers (eg. Safari) convert </br> to <br>
+	//many browsers (eg. Safari) convert </br> to <br>
+	else if(this._options.enforceEmptyTags && name === "br"){
 		this.onopentag(name, {});
 		this.onclosetag(name);
 	}

From 1d46a2c5b72f7d414b9f43c3c2f059d5073943af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 19:28:09 +0100
Subject: [PATCH 069/450] fixed a potential bug

---
 lib/EventedHandler.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index eb8be54..6ed6de0 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -10,7 +10,7 @@ var openTagCB = function(openTag, attribute){
 	if(openTag){
 		var open;
 		if(openTag.length === 1){ //to be compatible with sax.js
-			open = function open(name, attributes){ openTag({name:name, attributes:attributes}); };
+			open = function(name, attributes){ openTag({name:name, attributes:attributes}); };
 		}
 		else open = openTag;
 		if(attribute) return function(name, attributes){open(name,attributes); attr(null, attributes);};
@@ -54,7 +54,7 @@ EventedHandler.prototype.isEmptyTag = function(name){
 
 EventedHandler.prototype.openTag = function(name, attrs /*, type*/){
 	if(arguments.length === 1){ //TODO
-		attrs = name.attribs; name = name.name;
+		attrs = name.attribs || {}; name = name.name;
 	}
 	this.onopentag(name, attrs);
 	if(this.isEmptyTag(name)) this.onclosetag(name);

From aea6ab607d8b153a412d6a3417770a92929b5fa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 5 Nov 2011 19:56:35 +0100
Subject: [PATCH 070/450] adjustments

---
 README.md     | 2 +-
 lib/Parser.js | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 4ac1489..e82b1fd 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 	node tests/00-runtests.js
 
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
-This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is ~20% faster than the original (messured using npm module `ben` using RssHandler with the TechCrunch feed, exact result: 54ms vs. 68ms). 
+This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
 
 Besides, it features an additional handler that provides the interface of [sax.js](https://github.com/isaacs/sax-js) (written for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
 
diff --git a/lib/Parser.js b/lib/Parser.js
index 9e134ea..a1c4e58 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -23,7 +23,6 @@ function Parser(handler, options){
 //Regular expressions used for cleaning up and parsing (stateless)
 var _reWhitespace = /\s/; //Used to find any whitespace to split on
 var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
-var _reRow = RegExp("\r","g");
 
 //Find attributes in a tag
 var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
@@ -286,7 +285,7 @@ Parser.prototype.getLocation = function(startTag){
 	l.charOffset = end;
 	l.row += rowNum;
 	
-	var num = rows[rowNum].replace(_reRow,"").length;
+	var num = rows[rowNum].replace(/\r/g,"").length;
 	if(rowNum === 0) l.col += num;
 	else l.col = num;
 	
@@ -298,7 +297,7 @@ Parser.prototype.getLocation = function(startTag){
 	};
 };
 
-//Checks the handler to make it is an object with the right "interface"
+//Checks the handler to ensure it is an object with the right interface
 var validateHandler = function(handler){
 	if(typeof handler !== "object")
 		throw Error("Handler is not an object");

From d494ee5236f5409bbf473b510fef8ee76994f2bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 8 Nov 2011 20:33:22 +0100
Subject: [PATCH 071/450] commented sax.js related part

will be added as an option
---
 lib/EventedHandler.js | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index 6ed6de0..d46a925 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -9,10 +9,10 @@ var openTagCB = function(openTag, attribute){
 	function attr(name, attributes){ for(var i in attributes) attribute({name:i, value:attributes[i]}); }
 	if(openTag){
 		var open;
-		if(openTag.length === 1){ //to be compatible with sax.js
+		/*if(openTag.length === 1){ //to be compatible with sax.js
 			open = function(name, attributes){ openTag({name:name, attributes:attributes}); };
 		}
-		else open = openTag;
+		else */open = openTag;
 		if(attribute) return function(name, attributes){open(name,attributes); attr(null, attributes);};
 		else return open;
 	}
@@ -53,9 +53,8 @@ EventedHandler.prototype.isEmptyTag = function(name){
 };
 
 EventedHandler.prototype.openTag = function(name, attrs /*, type*/){
-	if(arguments.length === 1){ //TODO
-		attrs = name.attribs || {}; name = name.name;
-	}
+    //TODO
+    attrs = name.attribs || {}; name = name.name;
 	this.onopentag(name, attrs);
 	if(this.isEmptyTag(name)) this.onclosetag(name);
 	else this._stack.push(name);

From c39734a6273845c59c86a0b751cdb4dbabd3e202 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 8 Nov 2011 20:33:37 +0100
Subject: [PATCH 072/450] 1.5

because a lot has changed
---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 35a642c..e394215 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	  "name": "htmlparser2"
 	, "description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface (EventedHandler)."
-	, "version": "1.2.0"
+	, "version": "1.5.0"
 	, "author": "Felix Boehm <me@feedic.com>"
 	, "contributors": [ "Chris Winberry <chris@winberry.net>" ]
 	, "repository": {

From 2d8b1f46d1c6b78b6c3bf870225aa5bd6c9a2dfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 8 Nov 2011 20:34:19 +0100
Subject: [PATCH 073/450] restructured tests, rewrote 00-runtests.js

---
 tests/00-runtests.js                          | 118 ++++++++----------
 tests/01-html.js                              |  18 +++
 tests/02-feed.js                              |  14 +++
 tests/03-events.js                            |  21 ++++
 tests/25-events.js                            |  20 ---
 tests/Events/01-simple.js                     |  20 +++
 tests/{20-rss.js => Feeds/01-rss.js}          |   0
 tests/{21-atom.js => Feeds/02-atom.js}        |   0
 tests/{24-rdf.js => Feeds/03-rdf.js}          |   0
 tests/{ => HTML}/01-basic.js                  |   0
 tests/{ => HTML}/02-single_tag_1.js           |   0
 tests/{ => HTML}/03-single_tag_2.js           |   0
 tests/{ => HTML}/04-unescaped_in_script.js    |   0
 tests/{ => HTML}/05-tags_in_comment.js        |   0
 tests/{ => HTML}/06-comment_in_script.js      |   0
 tests/{ => HTML}/07-unescaped_in_style.js     |   0
 tests/{ => HTML}/08-extra_spaces_in_tag.js    |   0
 tests/{ => HTML}/09-unquoted_attrib.js        |   0
 tests/{ => HTML}/10-singular_attribute.js     |   0
 tests/{ => HTML}/11-text_outside_tags.js      |   0
 tests/{ => HTML}/12-text_only.js              |   0
 tests/{ => HTML}/13-comment_in_text.js        |   0
 .../14-comment_in_text_in_script.js           |   0
 tests/{ => HTML}/15-non-verbose.js            |   0
 tests/{ => HTML}/16-ignore_whitespace.js      |   0
 tests/{ => HTML}/17-xml_namespace.js          |   0
 tests/{ => HTML}/18-enforce_empty_tags.js     |   0
 tests/{ => HTML}/19-ignore_empty_tags.js      |   0
 tests/{ => HTML}/22-position_data.js          |   0
 tests/{ => HTML}/23-template_script_tags.js   |   0
 tests/test-helper.js                          |  13 ++
 31 files changed, 138 insertions(+), 86 deletions(-)
 create mode 100644 tests/01-html.js
 create mode 100644 tests/02-feed.js
 create mode 100644 tests/03-events.js
 delete mode 100644 tests/25-events.js
 create mode 100644 tests/Events/01-simple.js
 rename tests/{20-rss.js => Feeds/01-rss.js} (100%)
 rename tests/{21-atom.js => Feeds/02-atom.js} (100%)
 rename tests/{24-rdf.js => Feeds/03-rdf.js} (100%)
 rename tests/{ => HTML}/01-basic.js (100%)
 rename tests/{ => HTML}/02-single_tag_1.js (100%)
 rename tests/{ => HTML}/03-single_tag_2.js (100%)
 rename tests/{ => HTML}/04-unescaped_in_script.js (100%)
 rename tests/{ => HTML}/05-tags_in_comment.js (100%)
 rename tests/{ => HTML}/06-comment_in_script.js (100%)
 rename tests/{ => HTML}/07-unescaped_in_style.js (100%)
 rename tests/{ => HTML}/08-extra_spaces_in_tag.js (100%)
 rename tests/{ => HTML}/09-unquoted_attrib.js (100%)
 rename tests/{ => HTML}/10-singular_attribute.js (100%)
 rename tests/{ => HTML}/11-text_outside_tags.js (100%)
 rename tests/{ => HTML}/12-text_only.js (100%)
 rename tests/{ => HTML}/13-comment_in_text.js (100%)
 rename tests/{ => HTML}/14-comment_in_text_in_script.js (100%)
 rename tests/{ => HTML}/15-non-verbose.js (100%)
 rename tests/{ => HTML}/16-ignore_whitespace.js (100%)
 rename tests/{ => HTML}/17-xml_namespace.js (100%)
 rename tests/{ => HTML}/18-enforce_empty_tags.js (100%)
 rename tests/{ => HTML}/19-ignore_empty_tags.js (100%)
 rename tests/{ => HTML}/22-position_data.js (100%)
 rename tests/{ => HTML}/23-template_script_tags.js (100%)
 create mode 100644 tests/test-helper.js

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 63d8317..68ca6fd 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -1,69 +1,55 @@
-var sys = require("sys");
 var fs = require("fs");
-var htmlparser = require("..");
 
-var testFolder = ".";
-var chunkSize = 5;
+var testCount = 0,
+	failCount = 0,
+	totalTime = 0;
 
-var testFiles = fs.readdirSync(testFolder);
-var testCount = 0;
-var failedCount = 0;
-var totalTime = 0;
-var name = __filename.split("/").slice(-1)[0];
-var handler;
-for (var i = 1; i < testFiles.length; i++) {
-	if(testFiles[i] === name) continue;
-	testCount++;
-	var moduleName = testFiles[i];
-	var test = require(testFolder + "/" + moduleName);
-	var handlerCallback = function handlerCallback (error) {
-		if (error)
-			sys.puts("Handler error: " + error);
-	}
-	console.log(testFiles[i]);
-	var start = Date.now();
-	if(test.type === "rss"){
-		handler = new htmlparser.RssHandler(handlerCallback, test.options.handler);
-	}
-	else if(test.type === "event"){
-		handler = new htmlparser.EventedHandler(test.options.handler);
-	}
-	else{
-		handler = new htmlparser.DefaultHandler(handlerCallback, test.options.handler);
-	}
-	var parser = new htmlparser.Parser(handler, test.options.parser);
-	parser.parseComplete(test.html);
-	var resultComplete = handler.dom;
-	if(test.type === "event"){
-		resultComplete = test.result;
-		test.result = [];
-	}
-	var chunkPos = 0;
-	parser.reset();
-	while (chunkPos < test.html.length) {
-		parser.parseChunk(test.html.substring(chunkPos, chunkPos + chunkSize));
-		chunkPos += chunkSize;
-	}
-	parser.done();
-	var resultChunk = handler.dom;
-	if(test.type === "event"){
-		resultChunk = test.result;
-	}
-	var testResult = sys.inspect(resultComplete, false, null) === sys.inspect(test.expected, false, null)
-					&& sys.inspect(resultChunk, false, null) === sys.inspect(test.expected, false, null);
-	var took = Date.now() - start;
-	totalTime += took;
-	sys.puts("[" + test.name + "\]: " + (testResult ? "passed" : "FAILED") + " (took: " + took + "ms)");
-	if (!testResult) {
-		failedCount++;
-		sys.puts("== Complete ==");
-		sys.puts(sys.inspect(resultComplete, false, null));
-		sys.puts("== Chunked ==");
-		sys.puts(sys.inspect(resultChunk, false, null));
-		sys.puts("== Expected ==");
-		sys.puts(sys.inspect(test.expected, false, null));
-	}
-}
-sys.puts("Total time: " + totalTime);
-sys.puts("Total tests: " + testCount);
-sys.puts("Failed tests: " + failedCount);
+function runTests(test){
+	var begin = Date.now();
+	//read files, load them, run them
+	fs.readdirSync(test.dir
+	).map(function(file){
+		return require(test.dir + file);
+	}).forEach(function(file){
+		var second = false,
+			failed = false,
+			start = Date.now()
+			took = 0;
+		
+		console.log("Testing:", file.name);
+		
+		test.test(file, function(err, dom){
+			if(err) console.log("Handler error:", err);
+			took += Date.now() - start;
+			
+			var expected = JSON.stringify(file.expected, null, 2),
+				got = JSON.stringify(dom, null, 2);
+			if(expected !== got){
+				failed = true;
+				console.log("Expected", expected, "Got", got, second);
+			}
+			
+			start = Date.now();
+			
+			if(second){
+				testCount+=1;
+				if(failed) failCount+=1;
+				
+				console.log("["+file.name+"]:",failed?"failed":"passed","(took",took,"ms)"); 
+			}
+			else second = true;
+		});
+	});
+	var took = Date.now()-begin;
+	totalTime+=took;
+	console.log(test.dir,"took",took);
+};
+
+//run all tests
+var tests = ["./01-html.js", "./02-feed.js", "./03-events.js"];
+tests.map(require).forEach(runTests);
+
+//log the results
+console.log("Total time:", totalTime);
+console.log("Total tests:", testCount);
+console.log("Failed tests:", failCount);
\ No newline at end of file
diff --git a/tests/01-html.js b/tests/01-html.js
new file mode 100644
index 0000000..3d995a5
--- /dev/null
+++ b/tests/01-html.js
@@ -0,0 +1,18 @@
+//Runs tests for HTML
+
+var helper = require("./test-helper.js"),
+	DefaultHandler = require("../lib/DefaultHandler.js");
+
+exports.dir = "./HTML/";
+
+/*
+	function test()
+	runs a test, calls the callback afterwards
+*/
+exports.test = function(test, cb){
+	var handler = new DefaultHandler(function(err, dom){
+		if(err) cb(err, 0); //return the error
+		else cb(null, dom);
+	}, test.options.handler);
+	helper.writeToParser(handler, test.options.parser, test.html);
+}
\ No newline at end of file
diff --git a/tests/02-feed.js b/tests/02-feed.js
new file mode 100644
index 0000000..569249b
--- /dev/null
+++ b/tests/02-feed.js
@@ -0,0 +1,14 @@
+//Runs tests for feeds
+
+var helper = require("./test-helper.js"),
+	RssHandler = require("../lib/RssHandler.js");
+
+exports.dir = "./Feeds/";
+
+exports.test = function(test, cb){
+	var handler = new RssHandler(function(err, dom){
+		if(err) cb(err, 0); //return the error
+		else cb(null, dom);
+	}, test.options.handler);
+	helper.writeToParser(handler, test.options.parser, test.html);
+}
\ No newline at end of file
diff --git a/tests/03-events.js b/tests/03-events.js
new file mode 100644
index 0000000..2d187d7
--- /dev/null
+++ b/tests/03-events.js
@@ -0,0 +1,21 @@
+var helper = require("./test-helper.js"),
+	EventedHandler = require("../lib/EventedHandler.js");
+
+exports.dir = "./Events/";
+
+exports.test = function(test, cb){
+	var tokens = [];
+	var cbs = {
+		onend: function(){
+			//deletes all tokens
+			cb(null, tokens.splice(0));
+		}
+	};
+	for(var i = 0; i < test.callbacks.length; i+=2){
+		cbs[test.callbacks[i]] = test.callbacks[i+1].bind(tokens);
+	};
+	var close = cbs.onclosetag;
+	cbs.onclosetag = function(b){console.log("close",b);close(b)}
+	var handler = new EventedHandler(cbs, test.options.handler);
+	helper.writeToParser(handler, test.options.parser, test.html);
+}
\ No newline at end of file
diff --git a/tests/25-events.js b/tests/25-events.js
deleted file mode 100644
index 1a27bd0..0000000
--- a/tests/25-events.js
+++ /dev/null
@@ -1,20 +0,0 @@
-exports.name = "Events";
-exports.type = "event";
-exports.result = [];
-exports.options = {handler: {
-	onopentag: function(name, attributes){
-		exports.result.push({event:"open", name: name, attributes: attributes});
-	},
-	onclosetag: function(name){
-		exports.result.push({event:"close", name: name});
-	},
-	ontext: function(text){
-		exports.result.push({event:"text", text: text});
-	}
-}, parser: {}};
-exports.html = "<h1 class=test>adsf</h1>";
-exports.expected = [ { event: 'open',
-    name: 'h1',
-    attributes: { class: 'test' } },
-  { event: 'text', text: 'adsf' },
-  { event: 'close', name: 'h1' } ];
\ No newline at end of file
diff --git a/tests/Events/01-simple.js b/tests/Events/01-simple.js
new file mode 100644
index 0000000..135c52d
--- /dev/null
+++ b/tests/Events/01-simple.js
@@ -0,0 +1,20 @@
+exports.name = "Events";
+exports.type = "event";
+exports.options = {handler: {}, parser: {}};
+exports.callbacks = [
+	"onopentag", function(name, attributes){
+		this.push({event:"open", name: name, attributes: attributes});
+	},
+	"onclosetag", function(name){
+		this.push({event:"close", name: name});
+	},
+	"ontext", function(text){
+		this.push({event:"text", text: text});
+	}
+];
+exports.html = "<h1 class=test>adsf</h1>";
+exports.expected = [ { event: 'open',
+    name: 'h1',
+    attributes: { class: 'test' } },
+  { event: 'text', text: 'adsf' },
+  { event: 'close', name: 'h1' } ];
\ No newline at end of file
diff --git a/tests/20-rss.js b/tests/Feeds/01-rss.js
similarity index 100%
rename from tests/20-rss.js
rename to tests/Feeds/01-rss.js
diff --git a/tests/21-atom.js b/tests/Feeds/02-atom.js
similarity index 100%
rename from tests/21-atom.js
rename to tests/Feeds/02-atom.js
diff --git a/tests/24-rdf.js b/tests/Feeds/03-rdf.js
similarity index 100%
rename from tests/24-rdf.js
rename to tests/Feeds/03-rdf.js
diff --git a/tests/01-basic.js b/tests/HTML/01-basic.js
similarity index 100%
rename from tests/01-basic.js
rename to tests/HTML/01-basic.js
diff --git a/tests/02-single_tag_1.js b/tests/HTML/02-single_tag_1.js
similarity index 100%
rename from tests/02-single_tag_1.js
rename to tests/HTML/02-single_tag_1.js
diff --git a/tests/03-single_tag_2.js b/tests/HTML/03-single_tag_2.js
similarity index 100%
rename from tests/03-single_tag_2.js
rename to tests/HTML/03-single_tag_2.js
diff --git a/tests/04-unescaped_in_script.js b/tests/HTML/04-unescaped_in_script.js
similarity index 100%
rename from tests/04-unescaped_in_script.js
rename to tests/HTML/04-unescaped_in_script.js
diff --git a/tests/05-tags_in_comment.js b/tests/HTML/05-tags_in_comment.js
similarity index 100%
rename from tests/05-tags_in_comment.js
rename to tests/HTML/05-tags_in_comment.js
diff --git a/tests/06-comment_in_script.js b/tests/HTML/06-comment_in_script.js
similarity index 100%
rename from tests/06-comment_in_script.js
rename to tests/HTML/06-comment_in_script.js
diff --git a/tests/07-unescaped_in_style.js b/tests/HTML/07-unescaped_in_style.js
similarity index 100%
rename from tests/07-unescaped_in_style.js
rename to tests/HTML/07-unescaped_in_style.js
diff --git a/tests/08-extra_spaces_in_tag.js b/tests/HTML/08-extra_spaces_in_tag.js
similarity index 100%
rename from tests/08-extra_spaces_in_tag.js
rename to tests/HTML/08-extra_spaces_in_tag.js
diff --git a/tests/09-unquoted_attrib.js b/tests/HTML/09-unquoted_attrib.js
similarity index 100%
rename from tests/09-unquoted_attrib.js
rename to tests/HTML/09-unquoted_attrib.js
diff --git a/tests/10-singular_attribute.js b/tests/HTML/10-singular_attribute.js
similarity index 100%
rename from tests/10-singular_attribute.js
rename to tests/HTML/10-singular_attribute.js
diff --git a/tests/11-text_outside_tags.js b/tests/HTML/11-text_outside_tags.js
similarity index 100%
rename from tests/11-text_outside_tags.js
rename to tests/HTML/11-text_outside_tags.js
diff --git a/tests/12-text_only.js b/tests/HTML/12-text_only.js
similarity index 100%
rename from tests/12-text_only.js
rename to tests/HTML/12-text_only.js
diff --git a/tests/13-comment_in_text.js b/tests/HTML/13-comment_in_text.js
similarity index 100%
rename from tests/13-comment_in_text.js
rename to tests/HTML/13-comment_in_text.js
diff --git a/tests/14-comment_in_text_in_script.js b/tests/HTML/14-comment_in_text_in_script.js
similarity index 100%
rename from tests/14-comment_in_text_in_script.js
rename to tests/HTML/14-comment_in_text_in_script.js
diff --git a/tests/15-non-verbose.js b/tests/HTML/15-non-verbose.js
similarity index 100%
rename from tests/15-non-verbose.js
rename to tests/HTML/15-non-verbose.js
diff --git a/tests/16-ignore_whitespace.js b/tests/HTML/16-ignore_whitespace.js
similarity index 100%
rename from tests/16-ignore_whitespace.js
rename to tests/HTML/16-ignore_whitespace.js
diff --git a/tests/17-xml_namespace.js b/tests/HTML/17-xml_namespace.js
similarity index 100%
rename from tests/17-xml_namespace.js
rename to tests/HTML/17-xml_namespace.js
diff --git a/tests/18-enforce_empty_tags.js b/tests/HTML/18-enforce_empty_tags.js
similarity index 100%
rename from tests/18-enforce_empty_tags.js
rename to tests/HTML/18-enforce_empty_tags.js
diff --git a/tests/19-ignore_empty_tags.js b/tests/HTML/19-ignore_empty_tags.js
similarity index 100%
rename from tests/19-ignore_empty_tags.js
rename to tests/HTML/19-ignore_empty_tags.js
diff --git a/tests/22-position_data.js b/tests/HTML/22-position_data.js
similarity index 100%
rename from tests/22-position_data.js
rename to tests/HTML/22-position_data.js
diff --git a/tests/23-template_script_tags.js b/tests/HTML/23-template_script_tags.js
similarity index 100%
rename from tests/23-template_script_tags.js
rename to tests/HTML/23-template_script_tags.js
diff --git a/tests/test-helper.js b/tests/test-helper.js
new file mode 100644
index 0000000..eaa532a
--- /dev/null
+++ b/tests/test-helper.js
@@ -0,0 +1,13 @@
+var Parser = require("../lib/Parser.js"),
+	chunkSize = 5;
+
+exports.writeToParser = function(handler, options, data){
+	var parser = new Parser(handler, options);
+	//first, try to run the test via chunks
+	for(var i = 0; i < data.length; i+=chunkSize){
+		parser.parseChunk(data.substring(i, i + chunkSize));
+	}
+	parser.done();
+	//then parse everything
+	parser.parseComplete(data);
+}
\ No newline at end of file

From e1ae2b231c66caf75ca9b1328925e0cf95bfecc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 17:19:40 +0100
Subject: [PATCH 074/450] Minor improvements

---
 README.md          |  1 -
 lib/RssHandler.js  | 14 +++++---------
 tests/03-events.js |  2 --
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index e82b1fd..f7daadb 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,6 @@ Besides, it features an additional handler that provides the interface of [sax.j
 	parser.parseComplete(rawHtml);
 	sys.puts(sys.inspect(handler.dom, false, null));
 
-
 ##Example output
 
 	[{
diff --git a/lib/RssHandler.js b/lib/RssHandler.js
index 339bf02..2b82b1b 100644
--- a/lib/RssHandler.js
+++ b/lib/RssHandler.js
@@ -30,18 +30,14 @@ RssHandler.prototype.done = function() {
 
 	feedRoot = getElements(isValidFeed, this.dom, true);
 	if (feedRoot) {
-		if(feedRoot.name === "rdf:RDF"){
-			items = getElements("item", feedRoot.children);
-			childs = getElements("channel", feedRoot.children, true).children;
-		}
-		else if(feedRoot.name === "rss"){
-			childs = feedRoot.children[0].children;
-			items = getElements("item", childs);
-		}
-		else{
+		if(feedRoot.name === "feed"){
 			childs = feedRoot.children;
 			items = getElements("entry", childs);
 		}
+		else{
+			items = getElements("item", feedRoot.children);
+			childs = getElements("channel", feedRoot.children, true).children;
+		}
 		
 		if (feedRoot.name === "feed"){
 			feed.type = "atom";
diff --git a/tests/03-events.js b/tests/03-events.js
index 2d187d7..cc653c7 100644
--- a/tests/03-events.js
+++ b/tests/03-events.js
@@ -14,8 +14,6 @@ exports.test = function(test, cb){
 	for(var i = 0; i < test.callbacks.length; i+=2){
 		cbs[test.callbacks[i]] = test.callbacks[i+1].bind(tokens);
 	};
-	var close = cbs.onclosetag;
-	cbs.onclosetag = function(b){console.log("close",b);close(b)}
 	var handler = new EventedHandler(cbs, test.options.handler);
 	helper.writeToParser(handler, test.options.parser, test.html);
 }
\ No newline at end of file

From 0013a8710901c72a12b18f6542a72fbd5ee704c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 18:24:47 +0100
Subject: [PATCH 075/450] Renamed RssHandler to FeedHandler

The new name reflects better what it does (it's not just limited to RSS)
---
 lib/{RssHandler.js => FeedHandler.js} | 0
 lib/index.js                          | 2 +-
 tests/02-feed.js                      | 4 ++--
 3 files changed, 3 insertions(+), 3 deletions(-)
 rename lib/{RssHandler.js => FeedHandler.js} (100%)

diff --git a/lib/RssHandler.js b/lib/FeedHandler.js
similarity index 100%
rename from lib/RssHandler.js
rename to lib/FeedHandler.js
diff --git a/lib/index.js b/lib/index.js
index 9aa5bde..2ceb6eb 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -1,7 +1,7 @@
 module.exports = {
 	Parser: require("./Parser.js"),
 	DefaultHandler: require("./DefaultHandler.js"),
-	RssHandler: require("./RssHandler.js"),
+	FeedHandler: require("./FeedHandler.js"),
 	EventedHandler: require("./EventedHandler.js"),
 	ElementType: require("./ElementType.js"),
 	DomUtils: require("./DomUtils.js")
diff --git a/tests/02-feed.js b/tests/02-feed.js
index 569249b..26d85b5 100644
--- a/tests/02-feed.js
+++ b/tests/02-feed.js
@@ -1,12 +1,12 @@
 //Runs tests for feeds
 
 var helper = require("./test-helper.js"),
-	RssHandler = require("../lib/RssHandler.js");
+	FeedHandler = require("../lib/FeedHandler.js");
 
 exports.dir = "./Feeds/";
 
 exports.test = function(test, cb){
-	var handler = new RssHandler(function(err, dom){
+	var handler = new FeedHandler(function(err, dom){
 		if(err) cb(err, 0); //return the error
 		else cb(null, dom);
 	}, test.options.handler);

From 889fc2c4a8e660a2d46f986738b0bc9be8910782 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 18:30:51 +0100
Subject: [PATCH 076/450] Removed .raw and .data from elements, removed
 position data (no more verbose)

Most tests fail now, but that was expected.

I don't know any project that relied on it, but for know, it's in a
separate branch (and won't be merged).
---
 lib/DefaultHandler.js | 44 +++++++++++++---------
 lib/EventedHandler.js | 86 ++++++++++++++++++++-----------------------
 lib/Parser.js         | 71 ++++-------------------------------
 3 files changed, 75 insertions(+), 126 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index e3cdd76..91373d9 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -60,8 +60,6 @@ DefaultHandler.prototype.closeTag = function(name){
 };
 
 DefaultHandler.prototype._addDomElement = function(element){
-	if(!this._options.verbose) delete element.raw;
-	
 	var lastTag = this._tagStack[this._tagStack.length-1], lastChild;
 	if(!lastTag) this.dom.push(element);
 	else{ //There are parent elements
@@ -70,11 +68,7 @@ DefaultHandler.prototype._addDomElement = function(element){
 			return;
 		}
 		lastChild = lastTag.children[lastTag.children.length-1];
-		if(element.type === ElementType.Comment && lastChild.type === ElementType.Comment){
-			lastChild.data += element.data;
-			if(this._options.verbose) lastChild.raw = lastChild.data;
-		}
-		else if(this._inSpecialTag && element.type === ElementType.Text && lastChild.type === ElementType.Text){
+		if(this._inSpecialTag && element.type === ElementType.Text && lastChild.type === ElementType.Text){
 			lastChild.data += element.data;
 		    if(this._options.verbose)
 				lastChild.raw = lastChild.data;
@@ -83,24 +77,40 @@ DefaultHandler.prototype._addDomElement = function(element){
 	}
 };
 
-DefaultHandler.prototype.openTag = function(element){
-	if(!this._options.verbose) delete element.data;
+DefaultHandler.prototype.openTag = function(name, attribs, type){
+	if(type === ElementType.Script || type === ElementType.Style) this._inSpecialTag = true;
 	
-	var isSpecial = element.type === ElementType.Script || element.type === ElementType.Style;
-	
-	if(isSpecial) this._inSpecialTag = true;
+	var element = {type:type, name:name, attribs:attribs};
 	
 	this._addDomElement(element);
 	
 	//Don't add tags to the tag stack that can't have children
-	if(!this._isEmptyTag(element.name)) this._tagStack.push(element);
+	if(!this._isEmptyTag(name)) this._tagStack.push(element);
 };
 
-DefaultHandler.prototype.writeText = function(element){
-	if(this._options.ignoreWhitespace && element.data.trim() === "") return;
-	this._addDomElement(element);
+DefaultHandler.prototype.writeText = function(data){
+	if(this._options.ignoreWhitespace && data.trim() === "") return;
+	var lastTag = this._tagStack[this._tagStack.length-1];
+	if(this._inSpecialTag && lastTag && lastTag.children && lastTag.children[lastTag.children.length-1].type === ElementType.Text){
+		lastTag.children[lastTag.children.length-1].data += data;
+	}
+	this._addDomElement({data:data, type:ElementType.Text});
 };
 
-DefaultHandler.prototype.writeComment = DefaultHandler.prototype.writeDirective = DefaultHandler.prototype._addDomElement;
+DefaultHandler.prototype.writeComment = function(data){
+	var lastTag = this._tagStack[this._tagStack.length-1], element,
+		lastChild = lastTag && lastTag.children && lastTag.children[lastTag.children.length-1];
+	if(!lastChild || lastChild.type !== ElementType.Comment){
+		element = {data:data, type: ElementType.Comment};
+		if(!lastTag) this.dom.push(element);
+		else if(!lastChild) lastTag.children = [element];
+		else if(lastChild.type !== ElementType.Comment) lastTag.children.push(element);
+	}
+	else lastChild.data += data;
+}
+
+DefaultHandler.prototype.writeDirective = function(name, data){
+	this._addDomElement({name:name, data:data, type:ElementType.Directive});
+};
 
 module.exports = DefaultHandler;
\ No newline at end of file
diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index d46a925..2f9df29 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -1,42 +1,7 @@
-var emptyFunction = function(){};
-var stripData = function(callback){
-	if(typeof callback !== "function") return emptyFunction;
-	return function(data){
-		callback(data.data);
-	};
-};
-var openTagCB = function(openTag, attribute){
-	function attr(name, attributes){ for(var i in attributes) attribute({name:i, value:attributes[i]}); }
-	if(openTag){
-		var open;
-		/*if(openTag.length === 1){ //to be compatible with sax.js
-			open = function(name, attributes){ openTag({name:name, attributes:attributes}); };
-		}
-		else */open = openTag;
-		if(attribute) return function(name, attributes){open(name,attributes); attr(null, attributes);};
-		else return open;
-	}
-	else if(attribute) return attr;
-		else return emptyFunction;
-};
-
 var EventedHandler = function(cbs, options){
-	//map the handlers to their callbacks
-	this.writeComment = stripData(cbs.oncomment);
-	this.writeDirective = stripData(cbs.onprocessinginstruction);
-	this.writeText = stripData(cbs.ontext);
-	this.done = cbs.onend || emptyFunction;
-	
+	this._cbs = cbs || {};
 	if(options) this._options = options;
 	
-	//if someone wants to listen to that
-	this.reset = cbs.onreset || emptyFunction;
-	this.error = cbs.onerror; //if nothing was set, the error is thrown
-	
-	//functions to be called within writeTag
-	this.onopentag = openTagCB(cbs.onopentag, cbs.onattribute);
-	this.onclosetag = cbs.onclosetag || emptyFunction;
-	
 	//privates
 	this._stack = [];
 };
@@ -53,26 +18,55 @@ EventedHandler.prototype.isEmptyTag = function(name){
 };
 
 EventedHandler.prototype.openTag = function(name, attrs /*, type*/){
-    //TODO
-    attrs = name.attribs || {}; name = name.name;
-	this.onopentag(name, attrs);
-	if(this.isEmptyTag(name)) this.onclosetag(name);
+	if(this._cbs.onopentag) this._cbs.onopentag(name, attrs);
+	if(this.isEmptyTag(name)){
+		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
+	}
 	else this._stack.push(name);
 };
 
 EventedHandler.prototype.closeTag = function(name){
 	if(!this.isEmptyTag(name) && this._stack){
+		if(!this._cbs.onclosetag) return; //nothing to do
 		var i = this._stack.length-1;
 		while(i !== -1 && this._stack[i--] !== name){}
 		if( ++i !== 0 || this._stack[0] === name)
 			while(i < this._stack.length)
-				this.onclosetag(this._stack.pop());
-	}
-	//many browsers (eg. Safari) convert </br> to <br>
-	else if(this._options.enforceEmptyTags && name === "br"){
-		this.onopentag(name, {});
-		this.onclosetag(name);
+				this._cbs.onclosetag(this._stack.pop());
 	}
+	//many browsers (eg. Safari, Chrome) convert </br> to <br>
+	else if(name === "br" && this._options.enforceEmptyTags)
+		this.openTag(name, {});
+};
+
+//wrappers for the callbacks
+EventedHandler.prototype.writeComment = function(data){
+	var cb = this._cbs.oncomment;
+	if(cb) cb(data);
+};
+
+EventedHandler.prototype.writeText = function(text){
+	var cb = this._cbs.ontext;
+	if(cb) cb(text);
+};
+
+EventedHandler.prototype.writeDirective = function(name, data){
+	var cb = this._cbs.onprocessinginstruction;
+	if(cb) cb(name, data);
+};
+
+EventedHandler.prototype.done = function(){
+	if(this._cbs.onend) this._cbs.onend();
+};
+
+EventedHandler.prototype.reset = function(){
+	if(this._cbs.onreset) this._cbs.onreset();
+};
+
+EventedHandler.prototype.error = function(error){
+	if(this._cbs.onerror) this._cbs.onerror();
+	else throw error;
 };
 
+//export the evented handler
 module.exports = EventedHandler;
\ No newline at end of file
diff --git a/lib/Parser.js b/lib/Parser.js
index a1c4e58..6717d55 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -25,7 +25,7 @@ var _reWhitespace = /\s/; //Used to find any whitespace to split on
 var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
 
 //Find attributes in a tag
-var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;
+var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;//"
 
 var tagTypes = {};
 tagTypes[ ElementType.Script ] = true;
@@ -117,13 +117,13 @@ SpecialTags[ElementType.Tag] = 0;
 SpecialTags[ElementType.Style]  = 1; //2^0
 SpecialTags[ElementType.Script] = 2; //2^1
 SpecialTags.w = 4; //2^2 - if set, append prev tag sep to data
-SpecialTags[ElementType.Comment] = 8; //2^8
+SpecialTags[ElementType.Comment] = 8; //2^3
 
 //Parses through HTML text and returns an array of found elements
 Parser.prototype.parseTags = function(){
 	var buffer = this._buffer;
 
-	var next, tagSep, rawData, element, elementName, prevElement, elementType, elementData, attributes;
+	var next, tagSep, rawData, elementName, prevElement, elementType, elementData, attributes;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
@@ -193,25 +193,17 @@ Parser.prototype.parseTags = function(){
 			else if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
 				//ElementType.Directive
 				//TODO: what about CDATA?
-				element = {raw: elementData, data: elementData, type: ElementType.Directive, name: elementName};
-				if(this._options.includeLocation) element.location = this.getLocation(false);
-				this._handler.writeDirective(element);
+				this._handler.writeDirective(elementName, elementData);
 			} else
 				this._processTag(elementName, elementData, tagSep, rawData);
 		}
 		else if(elementType === ElementType.Text && rawData !== ""){
-			element = {raw: rawData, data: elementData, type: ElementType.Text};
-			if(this._options.includeLocation) element.location = this.getLocation(false);
-			this._handler.writeText(element);
+			this._handler.writeText(elementData);
 		}
 		
 		this._current = next + 1;
 	}
 
-	if(this._options.includeLocation){
-		this.getLocation();
-		this._location.charOffset = 0;
-	}
 	this._buffer = buffer.substring(this._current);
 	this._current = 0;
 };
@@ -225,15 +217,7 @@ Parser.prototype._processComment = function(rawData, tagSep){
 	else rawData += tagSep;
 	this._prevTagSep = tagSep;
 	
-	var element = {
-		raw: rawData,
-		data: rawData,
-		type: ElementType.Comment
-	};
-	
-	if(this._options.includeLocation) element.location = this.getLocation(false);
-	
-	this._handler.writeComment(element);
+	this._handler.writeComment(rawData);
 };
 
 Parser.prototype._processTag = function(name, data, tagSep, raw){
@@ -246,18 +230,8 @@ Parser.prototype._processTag = function(name, data, tagSep, raw){
 	if(this._options.xmlMode){ /*do nothing*/ }
 	else if(name === "script") type = ElementType.Script;
 	else if(name === "style")  type = ElementType.Style;
-	
-	var element = {
-		raw: raw, data: data, type: type, name: name
-	};
-	
-	var attribs = parseAttributes(data);
-	if(attribs) element.attribs = attribs;
-	
-	if(this._options.includeLocation)
-		element.location = this.getLocation(type === ElementType.Tag);
-	
-	this._handler.openTag(element);
+
+	this._handler.openTag(name, parseAttributes(data), type);
 	
 	//If tag self-terminates, add an explicit, separate closing tag
 	if(data.substr(-1) === "/"){
@@ -268,35 +242,6 @@ Parser.prototype._processTag = function(name, data, tagSep, raw){
 	}
 };
 
-Parser.prototype.getLocation = function(startTag){
-	var c, end, chunk,
-		l = this._location;
-	if(startTag){
-		end = this._current - 1;
-		chunk = l.charOffset === 0 && end === -1;
-	} else {
-		end = this._current;
-		chunk = false;
-	}
-	
-	var rows = this._buffer.substring(l.charOffset, end).split("\n"),
-		rowNum = rows.length - 1;
-	
-	l.charOffset = end;
-	l.row += rowNum;
-	
-	var num = rows[rowNum].replace(/\r/g,"").length;
-	if(rowNum === 0) l.col += num;
-	else l.col = num;
-	
-	if(arguments.length === 0) return;
-	
-	return {
-		line: l.row + 1,
-		col: l.col + (chunk ? 0: 1)
-	};
-};
-
 //Checks the handler to ensure it is an object with the right interface
 var validateHandler = function(handler){
 	if(typeof handler !== "object")

From 39e51513436681efa517dc5412ebc9cef7084d21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 18:31:53 +0100
Subject: [PATCH 077/450] Removed the verbose property from DefaultHandler

---
 lib/DefaultHandler.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 91373d9..a59f12a 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -18,7 +18,6 @@ function DefaultHandler(callback, options){
 //default options
 DefaultHandler.prototype._options = {
 	ignoreWhitespace: false,	//Keep whitespace-only text nodes
-    verbose: true,				//Keep data property for tags and raw property for all
     enforceEmptyTags: true		//Don't allow children for HTML tags defined as empty in spec
 };
 

From 73c723f56a6cdd63957b775888a2775dfd233e61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 18:37:16 +0100
Subject: [PATCH 078/450] Removed Parser#_current

---
 lib/Parser.js | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 6717d55..c15e430 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -10,12 +10,6 @@ function Parser(handler, options){
 	this._prevTagSep = "";
 	this._contentFlags = 0;
 	this._done = false;
-	this._current = 0;
-	this._location = {
-		row: 0,
-		col: 0,
-		charOffset: 0
-	};
 	this._parseState = ElementType.Text;
 }
 
@@ -121,7 +115,7 @@ SpecialTags[ElementType.Comment] = 8; //2^3
 
 //Parses through HTML text and returns an array of found elements
 Parser.prototype.parseTags = function(){
-	var buffer = this._buffer;
+	var buffer = this._buffer, current = 0;
 
 	var next, tagSep, rawData, elementName, prevElement, elementType, elementData, attributes;
 	
@@ -138,7 +132,7 @@ Parser.prototype.parseTags = function(){
 			tagSep = ">";
 			closing = buffer.indexOf(tagSep, next + 1);
 		}
-		rawData = buffer.substring(this._current, next); //The next chunk of data to parse
+		rawData = buffer.substring(current, next); //The next chunk of data to parse
 		elementType = this._parseState;
 		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
 		
@@ -201,11 +195,10 @@ Parser.prototype.parseTags = function(){
 			this._handler.writeText(elementData);
 		}
 		
-		this._current = next + 1;
+		current = next + 1;
 	}
 
-	this._buffer = buffer.substring(this._current);
-	this._current = 0;
+	this._buffer = buffer.substring(current);
 };
 
 Parser.prototype._processComment = function(rawData, tagSep){

From d613e79b4aa47b43734840b5699d8ef0a56e1c2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 18:56:31 +0100
Subject: [PATCH 079/450] Moved assignment of current, started to use continue

---
 lib/Parser.js | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index c15e430..a3d161d 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -133,6 +133,7 @@ Parser.prototype.parseTags = function(){
 			closing = buffer.indexOf(tagSep, next + 1);
 		}
 		rawData = buffer.substring(current, next); //The next chunk of data to parse
+		current = next + 1;
 		elementType = this._parseState;
 		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
 		
@@ -151,7 +152,7 @@ Parser.prototype.parseTags = function(){
 		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
 			//We're currently in a comment tag
 			this._processComment(rawData, tagSep);
-			elementType = null;
+			continue;
 		}
 		//if it's a closing tag, remove the flag
 		else if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "/script"){
@@ -169,13 +170,14 @@ Parser.prototype.parseTags = function(){
 			elementType = ElementType.Text;
 			//If the previous element is text, append the last tag sep to element
 			if(this._contentFlags >= SpecialTags.w){
-				elementData = rawData = this._prevTagSep + rawData;
+				this._handler.writeText(this._prevTagSep + rawData);
 			}
 			else{ //The previous element was not text
 				this._contentFlags += SpecialTags.w;
-				elementData = rawData;
+				this._handler.writeText(rawData);
 			}
 			this._prevTagSep = tagSep;
+			continue;
 		}
 
 		//Processing of non-special tags
@@ -194,8 +196,6 @@ Parser.prototype.parseTags = function(){
 		else if(elementType === ElementType.Text && rawData !== ""){
 			this._handler.writeText(elementData);
 		}
-		
-		current = next + 1;
 	}
 
 	this._buffer = buffer.substring(current);

From e05bc3c0e326b1e94f95be6555c745bbc54f703d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 18:57:03 +0100
Subject: [PATCH 080/450] Fixed some tests

---
 tests/HTML/01-basic.js               | 55 +++++++++++++++++-----------
 tests/HTML/02-single_tag_1.js        | 14 +++++--
 tests/HTML/03-single_tag_2.js        | 19 +++++++---
 tests/HTML/05-tags_in_comment.js     | 21 +++++------
 tests/HTML/08-extra_spaces_in_tag.js | 25 +++++++------
 tests/HTML/09-unquoted_attrib.js     | 25 +++++++------
 tests/HTML/10-singular_attribute.js  | 14 ++++---
 7 files changed, 101 insertions(+), 72 deletions(-)

diff --git a/tests/HTML/01-basic.js b/tests/HTML/01-basic.js
index 2901583..cba7edd 100644
--- a/tests/HTML/01-basic.js
+++ b/tests/HTML/01-basic.js
@@ -4,25 +4,36 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>";
-exports.expected = [ { raw: '!DOCTYPE html',
-    data: '!DOCTYPE html',
-    type: 'directive',
-    name: '!DOCTYPE' },
-  { raw: 'html',
-    data: 'html',
-    type: 'tag',
-    name: 'html',
-    children: 
-     [ { raw: 'title',
-         data: 'title',
-         type: 'tag',
-         name: 'title',
-         children: [ { raw: 'The Title', data: 'The Title', type: 'text' } ] },
-       { raw: 'body',
-         data: 'body',
-         type: 'tag',
-         name: 'body',
-         children: 
-          [ { raw: 'Hello world',
-              data: 'Hello world',
-              type: 'text' } ] } ] } ];
+exports.expected = [
+  {
+    "name": "!DOCTYPE",
+    "data": "!DOCTYPE html",
+    "type": "directive"
+  },
+  {
+    "type": "tag",
+    "name": "html",
+    "children": [
+      {
+        "type": "tag",
+        "name": "title",
+        "children": [
+          {
+            "data": "The Title",
+            "type": "text"
+          }
+        ]
+      },
+      {
+        "type": "tag",
+        "name": "body",
+        "children": [
+          {
+            "data": "Hello world",
+            "type": "text"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/tests/HTML/02-single_tag_1.js b/tests/HTML/02-single_tag_1.js
index 0180f55..3f4871f 100644
--- a/tests/HTML/02-single_tag_1.js
+++ b/tests/HTML/02-single_tag_1.js
@@ -4,7 +4,13 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<br>text</br>";
-exports.expected =
-	[ { raw: 'br', data: 'br', type: 'tag', name: 'br' }
-	, { raw: 'text', data: 'text', type: 'text' }
-	];
\ No newline at end of file
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "br"
+  },
+  {
+    "data": "text",
+    "type": "text"
+  }
+];
\ No newline at end of file
diff --git a/tests/HTML/03-single_tag_2.js b/tests/HTML/03-single_tag_2.js
index 9363dda..eaeec64 100644
--- a/tests/HTML/03-single_tag_2.js
+++ b/tests/HTML/03-single_tag_2.js
@@ -4,8 +4,17 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<br>text<br>";
-exports.expected =
-	[ { raw: 'br', data: 'br', type: 'tag', name: 'br' }
-	, { raw: 'text', data: 'text', type: 'text' }
-	, { raw: 'br', data: 'br', type: 'tag', name: 'br' }
-	];
\ No newline at end of file
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "br"
+  },
+  {
+    "data": "text",
+    "type": "text"
+  },
+  {
+    "type": "tag",
+    "name": "br"
+  }
+];
\ No newline at end of file
diff --git a/tests/HTML/05-tags_in_comment.js b/tests/HTML/05-tags_in_comment.js
index 9f66f6b..e0c770e 100644
--- a/tests/HTML/05-tags_in_comment.js
+++ b/tests/HTML/05-tags_in_comment.js
@@ -4,16 +4,15 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<head><!-- commented out tags <title>Test</title>--></head>";
-exports.expected =
-[ { raw: 'head'
-  , data: 'head'
-  , type: 'tag'
-  , name: 'head'
-  , children: 
-     [ { raw: ' commented out tags <title>Test</title>'
-       , data: ' commented out tags <title>Test</title>'
-       , type: 'comment'
-       }
-     ]
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "head",
+    "children": [
+      {
+        "data": " commented out tags <title>Test</title>",
+        "type": "comment"
+      }
+    ]
   }
 ];
\ No newline at end of file
diff --git a/tests/HTML/08-extra_spaces_in_tag.js b/tests/HTML/08-extra_spaces_in_tag.js
index fca4335..8b6cda5 100644
--- a/tests/HTML/08-extra_spaces_in_tag.js
+++ b/tests/HTML/08-extra_spaces_in_tag.js
@@ -4,17 +4,18 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<\n font\t\n size='14' \n>the text<\n /	\nfont	 \n>";
-exports.expected =
-[ { raw: '\n font	\n size=\'14\' \n'
-  , data: 'font	\n size=\'14\''
-  , type: 'tag'
-  , name: 'font'
-  , attribs: { size: '14' }
-  , children:
-     [ { raw: 'the text'
-       , data: 'the text'
-       , type: 'text'
-       }
-     ]
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "font",
+    "attribs": {
+      "size": "14"
+    },
+    "children": [
+      {
+        "data": "the text",
+        "type": "text"
+      }
+    ]
   }
 ];
diff --git a/tests/HTML/09-unquoted_attrib.js b/tests/HTML/09-unquoted_attrib.js
index d448a54..c787422 100644
--- a/tests/HTML/09-unquoted_attrib.js
+++ b/tests/HTML/09-unquoted_attrib.js
@@ -4,17 +4,18 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<font size= 14>the text</font>";
-exports.expected =
-[ { raw: 'font size= 14'
-  , data: 'font size= 14'
-  , type: 'tag'
-  , name: 'font'
-  , attribs: { size: '14' }
-  , children:
-     [ { raw: 'the text'
-       , data: 'the text'
-       , type: 'text'
-       }
-     ]
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "font",
+    "attribs": {
+      "size": "14"
+    },
+    "children": [
+      {
+        "data": "the text",
+        "type": "text"
+      }
+    ]
   }
 ];
\ No newline at end of file
diff --git a/tests/HTML/10-singular_attribute.js b/tests/HTML/10-singular_attribute.js
index d749b94..ca978e2 100644
--- a/tests/HTML/10-singular_attribute.js
+++ b/tests/HTML/10-singular_attribute.js
@@ -4,11 +4,13 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<option value='foo' selected>";
-exports.expected =
-[ { raw: 'option value=\'foo\' selected'
-  , data: 'option value=\'foo\' selected'
-  , type: 'tag'
-  , name: 'option'
-  , attribs: { value: 'foo', selected: 'selected' }
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "option",
+    "attribs": {
+      "value": "foo",
+      "selected": "selected"
+    }
   }
 ];
\ No newline at end of file

From 4314dfacb758de5d04ade5dce8c8b2dacfbcc8e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:00:35 +0100
Subject: [PATCH 081/450] Fixed two bugs

---
 lib/Parser.js | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index a3d161d..5e47a70 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -59,13 +59,9 @@ Parser.prototype.done = function(){
 			data = data.trim();
 			var name = parseTagName(data);
 			if(name.charAt(0) === "/") this._handler.closeTag(name.substr(1));
-			else this._handler.openTag({
-				name: name, raw: data, data: data, attribs: parseAttributes(data)
-			});
+			else this._handler.openTag(name, parseAttributes(data), ElementType.Tag);
 		}
-		else this._handler.writeText({
-				raw: data, data: data, type: ElementType.Text
-			});
+		else this._handler.writeText(data);
 		
 		this._buffer = "";
 	}
@@ -166,15 +162,13 @@ Parser.prototype.parseTags = function(){
 		//special behaviour for script & style tags
 		//Make sure we're not in a comment
 		else if(!this._options.xmlMode && rawData.substring(0, 3) !== "!--"){
-			//All data from here to style close is now a text element
-			elementType = ElementType.Text;
 			//If the previous element is text, append the last tag sep to element
 			if(this._contentFlags >= SpecialTags.w){
 				this._handler.writeText(this._prevTagSep + rawData);
 			}
 			else{ //The previous element was not text
 				this._contentFlags += SpecialTags.w;
-				this._handler.writeText(rawData);
+				if(rawData !== "") this._handler.writeText(rawData);
 			}
 			this._prevTagSep = tagSep;
 			continue;

From a49fdb6769d379058fd0e8bc8a09a0708fb9022e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:13:56 +0100
Subject: [PATCH 082/450] All tests now match the new pattern

---
 tests/00-runtests.js                          |  2 +
 tests/HTML/04-unescaped_in_script.js          |  7 +-
 tests/HTML/06-comment_in_script.js            | 21 +++---
 tests/HTML/07-unescaped_in_style.js           |  7 +-
 tests/HTML/11-text_outside_tags.js            | 25 ++++---
 tests/HTML/12-text_only.js                    |  8 +--
 tests/HTML/13-comment_in_text.js              | 24 +++----
 tests/HTML/14-comment_in_text_in_script.js    | 38 +++++-----
 tests/HTML/16-ignore_whitespace.js            | 64 ++++++++---------
 tests/HTML/17-xml_namespace.js                | 15 +++-
 tests/HTML/18-enforce_empty_tags.js           | 15 ++--
 tests/HTML/19-ignore_empty_tags.js            | 18 +++--
 ...ipt_tags.js => 20-template_script_tags.js} |  6 +-
 tests/HTML/22-position_data.js                | 71 -------------------
 14 files changed, 128 insertions(+), 193 deletions(-)
 rename tests/HTML/{23-template_script_tags.js => 20-template_script_tags.js} (63%)
 delete mode 100644 tests/HTML/22-position_data.js

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 68ca6fd..15147aa 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -9,8 +9,10 @@ function runTests(test){
 	//read files, load them, run them
 	fs.readdirSync(test.dir
 	).map(function(file){
+		if(file[0] === ".") return false;
 		return require(test.dir + file);
 	}).forEach(function(file){
+		if(file === false) return;
 		var second = false,
 			failed = false,
 			start = Date.now()
diff --git a/tests/HTML/04-unescaped_in_script.js b/tests/HTML/04-unescaped_in_script.js
index 8f0bc3a..d284080 100644
--- a/tests/HTML/04-unescaped_in_script.js
+++ b/tests/HTML/04-unescaped_in_script.js
@@ -10,14 +10,11 @@ exports.expected =
   , type: 'tag'
   , name: 'head'
   , children: 
-     [ { raw: 'script language="Javascript"'
-       , data: 'script language="Javascript"'
-       , type: 'script'
+     [ { type: 'script'
        , name: 'script'
        , attribs: { language: 'Javascript' }
        , children: 
-          [ { raw: 'var foo = "<bar>"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";'
-            , data: 'var foo = "<bar>"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";'
+          [ { data: 'var foo = "<bar>"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";'
             , type: 'text'
             }
           ]
diff --git a/tests/HTML/06-comment_in_script.js b/tests/HTML/06-comment_in_script.js
index af8468a..6022b91 100644
--- a/tests/HTML/06-comment_in_script.js
+++ b/tests/HTML/06-comment_in_script.js
@@ -4,16 +4,15 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<script><!--var foo = 1;--></script>";
-exports.expected =
-[ { raw: 'script'
-  , data: 'script'
-  , type: 'script'
-  , name: 'script'
-  , children: 
-     [ { raw: 'var foo = 1;'
-       , data: 'var foo = 1;'
-       , type: 'comment'
-       }
-     ]
+exports.expected = [
+  {
+    "type": "script",
+    "name": "script",
+    "children": [
+      {
+        "data": "var foo = 1;",
+        "type": "comment"
+      }
+    ]
   }
 ];
\ No newline at end of file
diff --git a/tests/HTML/07-unescaped_in_style.js b/tests/HTML/07-unescaped_in_style.js
index c5817fc..3784336 100644
--- a/tests/HTML/07-unescaped_in_style.js
+++ b/tests/HTML/07-unescaped_in_style.js
@@ -5,14 +5,11 @@ exports.options = {
 };
 exports.html = "<style type=\"text/css\">\n body > p\n	{ font-weight: bold; }</style>";
 exports.expected =
-[ { raw: 'style type="text/css"'
-  , data: 'style type="text/css"'
-  , type: 'style'
+[ { type: 'style'
   , name: 'style'
   , attribs: { type: 'text/css' }
   , children:
-     [ { raw: '\n body > p\n	{ font-weight: bold; }'
-       , data: '\n body > p\n	{ font-weight: bold; }'
+     [ { data: '\n body > p\n	{ font-weight: bold; }'
        , type: 'text'
        }
      ]
diff --git a/tests/HTML/11-text_outside_tags.js b/tests/HTML/11-text_outside_tags.js
index ae40c76..d544b23 100644
--- a/tests/HTML/11-text_outside_tags.js
+++ b/tests/HTML/11-text_outside_tags.js
@@ -4,18 +4,17 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "Line one\n<br>\nline two";
-exports.expected =
-[ { raw: 'Line one\n'
-  , data: 'Line one\n'
-  , type: 'text'
-  }
-  , { raw: 'br'
-  , data: 'br'
-  , type: 'tag'
-  , name: 'br'
-  }
-  , { raw: '\nline two'
-  , data: '\nline two'
-  , type: 'text'
+exports.expected = [
+  {
+    "data": "Line one\n",
+    "type": "text"
+  },
+  {
+    "type": "tag",
+    "name": "br"
+  },
+  {
+    "data": "\nline two",
+    "type": "text"
   }
 ];
\ No newline at end of file
diff --git a/tests/HTML/12-text_only.js b/tests/HTML/12-text_only.js
index 9612840..45d774f 100644
--- a/tests/HTML/12-text_only.js
+++ b/tests/HTML/12-text_only.js
@@ -4,9 +4,9 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "this is the text";
-exports.expected =
-[ { raw: 'this is the text'
-  , data: 'this is the text'
-  , type: 'text'
+exports.expected = [
+  {
+    "data": "this is the text",
+    "type": "text"
   }
 ];
\ No newline at end of file
diff --git a/tests/HTML/13-comment_in_text.js b/tests/HTML/13-comment_in_text.js
index c40d891..46bd94d 100644
--- a/tests/HTML/13-comment_in_text.js
+++ b/tests/HTML/13-comment_in_text.js
@@ -4,17 +4,17 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "this is <!-- the comment --> the text";
-exports.expected =
-[ { raw: 'this is '
-  , data: 'this is '
-  , type: 'text'
-  }
-, { raw: ' the comment '
-  , data: ' the comment '
-  , type: 'comment'
-  }
-, { raw: ' the text'
-  , data: ' the text'
-  , type: 'text'
+exports.expected = [
+  {
+    "data": "this is ",
+    "type": "text"
+  },
+  {
+    "data": " the comment ",
+    "type": "comment"
+  },
+  {
+    "data": " the text",
+    "type": "text"
   }
 ];
\ No newline at end of file
diff --git a/tests/HTML/14-comment_in_text_in_script.js b/tests/HTML/14-comment_in_text_in_script.js
index 8534610..c4fff65 100644
--- a/tests/HTML/14-comment_in_text_in_script.js
+++ b/tests/HTML/14-comment_in_text_in_script.js
@@ -4,25 +4,23 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<script>this is <!-- the comment --> the text</script>";
-exports.expected =
-[ { raw: 'script'
-  , data: 'script'
-  , type: 'script'
-  , name: 'script'
-  , children:
-     [ { raw: 'this is '
-       , data: 'this is '
-       , type: 'text'
-       }
-       , { raw: ' the comment '
-       , data: ' the comment '
-       , type: 'comment'
-       }
-       , { raw: ' the text'
-       , data: ' the text'
-       , type: 'text'
-       }
-
-     ]
+exports.expected = [
+  {
+    "type": "script",
+    "name": "script",
+    "children": [
+      {
+        "data": "this is ",
+        "type": "text"
+      },
+      {
+        "data": " the comment ",
+        "type": "comment"
+      },
+      {
+        "data": " the text",
+        "type": "text"
+      }
+    ]
   }
 ];
\ No newline at end of file
diff --git a/tests/HTML/16-ignore_whitespace.js b/tests/HTML/16-ignore_whitespace.js
index beb0f34..9049bd8 100644
--- a/tests/HTML/16-ignore_whitespace.js
+++ b/tests/HTML/16-ignore_whitespace.js
@@ -4,39 +4,35 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "Line one\n<br> \t\n<br>\nline two<font>\n <br> x </font>";
-exports.expected =
-[ { raw: 'Line one\n'
-  , data: 'Line one\n'
-  , type: 'text'
+exports.expected = [
+  {
+    "data": "Line one\n",
+    "type": "text"
+  },
+  {
+    "type": "tag",
+    "name": "br"
+  },
+  {
+    "type": "tag",
+    "name": "br"
+  },
+  {
+    "data": "\nline two",
+    "type": "text"
+  },
+  {
+    "type": "tag",
+    "name": "font",
+    "children": [
+      {
+        "type": "tag",
+        "name": "br"
+      },
+      {
+        "data": " x ",
+        "type": "text"
+      }
+    ]
   }
-  , { raw: 'br'
-  , data: 'br'
-  , type: 'tag'
-  , name: 'br'
-  }
-  , { raw: 'br'
-  , data: 'br'
-  , type: 'tag'
-  , name: 'br'
-  }
-  , { raw: '\nline two'
-  , data: '\nline two'
-  , type: 'text'
-  }
-  , { raw: 'font'
-  , data: 'font'
-  , type: 'tag'
-  , name: 'font'
-  , children: 
-	[ { raw: 'br'
-  , data: 'br'
-  , type: 'tag'
-  , name: 'br'
-  }
-  , { raw: ' x '
-  , data: ' x '
-  , type: 'text'
-  }
-	  ]
-	}
 ];
\ No newline at end of file
diff --git a/tests/HTML/17-xml_namespace.js b/tests/HTML/17-xml_namespace.js
index a2c0d1f..2789a6e 100644
--- a/tests/HTML/17-xml_namespace.js
+++ b/tests/HTML/17-xml_namespace.js
@@ -4,6 +4,15 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<ns:tag>text</ns:tag>";
-exports.expected =
-	[ { raw: 'ns:tag', data: 'ns:tag', type: 'tag', name: 'ns:tag', children: [ { raw: 'text', data: 'text', type: 'text' } ] }
-	];
\ No newline at end of file
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "ns:tag",
+    "children": [
+      {
+        "data": "text",
+        "type": "text"
+      }
+    ]
+  }
+];
\ No newline at end of file
diff --git a/tests/HTML/18-enforce_empty_tags.js b/tests/HTML/18-enforce_empty_tags.js
index 01af3e3..131a353 100644
--- a/tests/HTML/18-enforce_empty_tags.js
+++ b/tests/HTML/18-enforce_empty_tags.js
@@ -4,8 +4,13 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<link>text</link>";
-exports.expected =
-	[
-		  { raw: 'link', data: 'link', type: 'tag', name: 'link' }
-		, { raw: 'text', data: 'text', type: 'text' }
-	];
\ No newline at end of file
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "link"
+  },
+  {
+    "data": "text",
+    "type": "text"
+  }
+];
\ No newline at end of file
diff --git a/tests/HTML/19-ignore_empty_tags.js b/tests/HTML/19-ignore_empty_tags.js
index abb508f..b50c086 100644
--- a/tests/HTML/19-ignore_empty_tags.js
+++ b/tests/HTML/19-ignore_empty_tags.js
@@ -4,9 +4,15 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<link>text</link>";
-exports.expected =
-	[
-		  { raw: 'link', data: 'link', type: 'tag', name: 'link', children: [
-		  	{ raw: 'text', data: 'text', type: 'text' }
-		  ] }
-	];
\ No newline at end of file
+exports.expected = [
+  {
+    "type": "tag",
+    "name": "link",
+    "children": [
+      {
+        "data": "text",
+        "type": "text"
+      }
+    ]
+  }
+];
\ No newline at end of file
diff --git a/tests/HTML/23-template_script_tags.js b/tests/HTML/20-template_script_tags.js
similarity index 63%
rename from tests/HTML/23-template_script_tags.js
rename to tests/HTML/20-template_script_tags.js
index 24864fd..e6b7b74 100644
--- a/tests/HTML/23-template_script_tags.js
+++ b/tests/HTML/20-template_script_tags.js
@@ -4,12 +4,10 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<script type=\"text/template\"><h1>Heading1</h1></script>";
-exports.expected = [ { raw: 'script type="text/template"',
-    data: 'script type="text/template"',
+exports.expected = [ {
     type: 'script',
     name: 'script',
     attribs: { type: 'text/template' },
     children: 
-     [ { raw: '<h1>Heading1</h1>',
-         data: '<h1>Heading1</h1>',
+     [ { data: '<h1>Heading1</h1>',
          type: 'text' } ] } ];
\ No newline at end of file
diff --git a/tests/HTML/22-position_data.js b/tests/HTML/22-position_data.js
deleted file mode 100644
index b9fecb6..0000000
--- a/tests/HTML/22-position_data.js
+++ /dev/null
@@ -1,71 +0,0 @@
-exports.name = "Postion data";
-exports.options = {
-	  handler: {}
-	, parser: { includeLocation: true }
-};
-exports.html = "<html>\r\n\n\t<title>The Title</title><body>\nHello world\r\n\n</body>\n\n</html>";
-exports.expected = [
-	{
-		raw: 'html',
-		data: 'html',
-		type: 'tag',
-		name: 'html',
-		location: {
-			line: 1,
-			col: 1
-		},
-		children: [{
-			raw: '\r\n\n\t',
-			data: '\r\n\n\t',
-			type: 'text',
-			location: {
-				line: 1,
-				col: 7
-			}
-		}, {
-			raw: 'title',
-			data: 'title',
-			type: 'tag',
-			name: 'title',
-			location: {
-				line: 3,
-				col: 2
-			},
-			children: [{
-				raw: 'The Title',
-				data: 'The Title',
-				type: 'text',
-				location: {
-					line: 3,
-					col: 9
-				}
-			}]
-		}, {
-			raw: 'body',
-			data: 'body',
-			type: 'tag',
-			name: 'body',
-			location: {
-				line: 3,
-				col: 26
-			},
-			children: [{
-				raw: '\nHello world\r\n\n',
-				data: '\nHello world\r\n\n',
-				type: 'text',
-				location: {
-					line: 3,
-					col: 32
-				}
-			}]
-		}, {
-			raw: '\n\n',
-			data: '\n\n',
-			type: 'text',
-			location: {
-				line: 6,
-				col: 8
-			}
-		}]
-	}
-	];
\ No newline at end of file

From 2206351f9f1003512fb9d03dc069e07ae34ad080 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:18:12 +0100
Subject: [PATCH 083/450] Use the new name inside FeedHandler

---
 lib/FeedHandler.js | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 2b82b1b..878992c 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -3,11 +3,11 @@ var DefaultHandler = require("./DefaultHandler.js"),
 	inherits = require("util").inherits;
 
 //TODO: make this a trully streamable handler
-function RssHandler(callback){
+function FeedHandler(callback){
 	DefaultHandler.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
 }
 
-inherits(RssHandler, DefaultHandler);
+inherits(FeedHandler, DefaultHandler);
 
 function getElements(what, where, one, recurse){
 	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
@@ -23,7 +23,7 @@ var isValidFeed = function(value) {
 	return value === "rss" || value === "feed" || value === "rdf:RDF";
 }
 
-RssHandler.prototype.done = function() {
+FeedHandler.prototype.done = function() {
 	var feed = {};
 	var feedRoot;
 	var tmp, items, childs;
@@ -102,4 +102,4 @@ RssHandler.prototype.done = function() {
 	DefaultHandler.prototype.handleCallback.call(this);
 };
 
-module.exports = RssHandler;
\ No newline at end of file
+module.exports = FeedHandler;
\ No newline at end of file

From 819069ce475c5424f368b1b00102d08351fa8f55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:20:55 +0100
Subject: [PATCH 084/450] Updated readme

---
 README.md | 64 ++-----------------------------------------------------
 1 file changed, 2 insertions(+), 62 deletions(-)

diff --git a/README.md b/README.md
index f7daadb..6e950cc 100644
--- a/README.md
+++ b/README.md
@@ -58,8 +58,8 @@ Besides, it features an additional handler that provides the interface of [sax.j
 	}
 	parser.done();
 
-##Parsing RSS/Atom Feeds
-	new htmlparser.RssHandler(function (error, dom) {
+##Parsing RSS/RDF/Atom Feeds
+	new htmlparser.FeedHandler(function (error, dom) {
 		...
 	});
 
@@ -78,7 +78,6 @@ Indicates whether `<script>` and `<style>` tags should get special treatment. If
 
 ###Usage
 	var handler = new htmlparser.DefaultHandler(function (error) {...}, {
-		verbose: false,
 		ignoreWhitespace: true
 	});
 	
@@ -94,22 +93,15 @@ The following HTML will be used:
 ####Example: true
 
 	[{
-		raw: 'font',
-		data: 'font',
 		type: 'tag',
 		name: 'font',
 		children: [{
-			raw: 'br',
-			data: 'br',
 			type: 'tag',
 			name: 'br'
 		}, {
-			raw: 'this is the text\n',
 			data: 'this is the text\n',
 			type: 'text'
 		}, {
-			raw: 'font',
-			data: 'font',
 			type: 'tag',
 			name: 'font'
 		}]
@@ -118,69 +110,23 @@ The following HTML will be used:
 ####Example: false
 
 	[{
-		raw: 'font',
-		data: 'font',
 		type: 'tag',
 		name: 'font',
 		children: [{
-			raw: '\n\t',
 			data: '\n\t',
 			type: 'text'
 		}, {
-			raw: 'br',
-			data: 'br',
 			type: 'tag',
 			name: 'br'
 		}, {
-			raw: 'this is the text\n',
 			data: 'this is the text\n',
 			type: 'text'
 		}, {
-			raw: 'font',
-			data: 'font',
 			type: 'tag',
 			name: 'font'
 		}]
 	}]
 
-###Option: verbose
-Indicates whether to include extra information on each node in the DOM. This information consists of the "raw" attribute (original, unparsed text found between "<" and ">") and the "data" attribute on "tag", "script", and "comment" nodes. The default value is "true".
-
-The following HTML is used:
-
-	<a href="test.html">xxx</a>
-
-####Example: true
-
-	[{
-		raw: 'a href="test.html"',
-		data: 'a href="test.html"',
-		type: 'tag',
-		name: 'a',
-		attribs: {
-			href: 'test.html'
-		},
-		children: [{
-			raw: 'xxx',
-			data: 'xxx',
-			type: 'text'
-		}]
-	}]
-
-####Example: false
-
-	[{
-		type: 'tag',
-		name: 'a',
-		attribs: {
-			href: 'test.html'
-		},
-		children: [{
-			data: 'xxx',
-			type: 'text'
-		}]
-	}]
-
 ###Option: enforceEmptyTags
 Indicates whether the DOM should prevent children on tags marked as empty in the HTML spec. Typically this should be set to "true" HTML parsing and "false" for XML parsing. The default value is "true".
 
@@ -191,12 +137,9 @@ The following HTML is used:
 ####Example: true
 
 	[{
-		raw: 'link',
-		data: 'link',
 		type: 'tag',
 		name: 'link'
 	}, {
-		raw: 'text',
 		data: 'text',
 		type: 'text'
 	}]
@@ -204,12 +147,9 @@ The following HTML is used:
 ####Example: false
 
 	[{
-		raw: 'link',
-		data: 'link',
 		type: 'tag',
 		name: 'link',
 		children: [{
-			raw: 'text',
 			data: 'text',
 			type: 'text'
 		}]

From 880808cc836c584d4d1319c330028d159c498f7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:28:39 +0100
Subject: [PATCH 085/450] updated readme

---
 README.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/README.md b/README.md
index 6e950cc..7356f86 100644
--- a/README.md
+++ b/README.md
@@ -29,24 +29,19 @@ Besides, it features an additional handler that provides the interface of [sax.j
 ##Example output
 
 	[{
-		raw: 'Xyz ',
 		data: 'Xyz ',
 		type: 'text'
 	}, {
-		raw: 'script language= javascript',
-		data: 'script language= javascript',
 		type: 'script',
 		name: 'script',
 		attribs: {
 			language: 'javascript'
 		},
 		children: [{
-			raw: 'var foo = \'<bar>\';<',
 			data: 'var foo = \'<bar>\';<',
 			type: 'text'
 		}]
 	}, {
-		raw: '<!-- Waah! -- ',
 		data: '<!-- Waah! -- ',
 		type: 'comment'
 	}]

From 256c5f3b6f1e21096a30bea17896350acfcf6f37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:29:33 +0100
Subject: [PATCH 086/450] Restructured some code, added 2x continue

---
 lib/Parser.js | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 5e47a70..8d19f1f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -129,8 +129,10 @@ Parser.prototype.parseTags = function(){
 			closing = buffer.indexOf(tagSep, next + 1);
 		}
 		rawData = buffer.substring(current, next); //The next chunk of data to parse
-		current = next + 1;
 		elementType = this._parseState;
+		
+		//set elements for next run
+		current = next + 1;
 		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
 		
 		if(elementType === ElementType.Tag){
@@ -179,13 +181,17 @@ Parser.prototype.parseTags = function(){
 			if(rawData.substring(0, 3) === "!--"){ //This tag is a comment
 				this._contentFlags += SpecialTags[ElementType.Comment];
 				this._processComment(rawData.substr(3), tagSep);
+				continue;
 			}
-			else if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
+			
+			if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
 				//ElementType.Directive
 				//TODO: what about CDATA?
 				this._handler.writeDirective(elementName, elementData);
-			} else
-				this._processTag(elementName, elementData, tagSep, rawData);
+				continue;
+			}
+			
+			this._processTag(elementName, elementData, tagSep, rawData);
 		}
 		else if(elementType === ElementType.Text && rawData !== ""){
 			this._handler.writeText(elementData);

From 4988a806ffabcbfebc531a3d0e3df31e2023f94e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:35:49 +0100
Subject: [PATCH 087/450] Don't call validateHandler by default

As the default handlers will be used in most cases, a check is not
necessary
---
 lib/Parser.js | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 8d19f1f..d180911 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -3,7 +3,9 @@ var ElementType = require("./ElementType.js");
 function Parser(handler, options){
 	if(options) this._options = options;
 
-	validateHandler(handler);
+	//Parser.validateHandler(handler);
+	//most people will use the given handlers, so a check is not necessary
+	//if you want to check your parser, just call Parser.validateHandler
 	this._handler = handler;
 
 	this._buffer = "";
@@ -236,7 +238,7 @@ Parser.prototype._processTag = function(name, data, tagSep, raw){
 };
 
 //Checks the handler to ensure it is an object with the right interface
-var validateHandler = function(handler){
+Parser.validateHandler = function(handler){
 	if(typeof handler !== "object")
 		throw Error("Handler is not an object");
 	["reset", "done", "openTag", "closeTag", "writeText", "writeComment", "writeDirective"].forEach(function(name){

From c6fad57bd025fcf7d643a7d899500e02ebffa416 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:44:34 +0100
Subject: [PATCH 088/450] Moved default callbacks for event tests to the
 processing part

---
 tests/03-events.js        | 18 +++++++++++++++---
 tests/Events/01-simple.js | 13 +------------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/tests/03-events.js b/tests/03-events.js
index cc653c7..b76528c 100644
--- a/tests/03-events.js
+++ b/tests/03-events.js
@@ -6,14 +6,26 @@ exports.dir = "./Events/";
 exports.test = function(test, cb){
 	var tokens = [];
 	var cbs = {
+		onopentag: function(name, attributes){
+			tokens.push({event:"open", name: name, attributes: attributes});
+		},
+		onclosetag: function(name){
+			tokens.push({event:"close", name: name});
+		},
+		ontext: function(text){
+			tokens.push({event:"text", text: text});
+		},
+		oncomment: function(data){
+			tokens.push({event:"comment", data:data});
+		},
+		onprocessinginstruction: function(name, data){
+			tokens.push({event:"processing", name:name, data:data});
+		},
 		onend: function(){
 			//deletes all tokens
 			cb(null, tokens.splice(0));
 		}
 	};
-	for(var i = 0; i < test.callbacks.length; i+=2){
-		cbs[test.callbacks[i]] = test.callbacks[i+1].bind(tokens);
-	};
 	var handler = new EventedHandler(cbs, test.options.handler);
 	helper.writeToParser(handler, test.options.parser, test.html);
 }
\ No newline at end of file
diff --git a/tests/Events/01-simple.js b/tests/Events/01-simple.js
index 135c52d..c0b888f 100644
--- a/tests/Events/01-simple.js
+++ b/tests/Events/01-simple.js
@@ -1,17 +1,6 @@
-exports.name = "Events";
+exports.name = "simple";
 exports.type = "event";
 exports.options = {handler: {}, parser: {}};
-exports.callbacks = [
-	"onopentag", function(name, attributes){
-		this.push({event:"open", name: name, attributes: attributes});
-	},
-	"onclosetag", function(name){
-		this.push({event:"close", name: name});
-	},
-	"ontext", function(text){
-		this.push({event:"text", text: text});
-	}
-];
 exports.html = "<h1 class=test>adsf</h1>";
 exports.expected = [ { event: 'open',
     name: 'h1',

From e336cfd5ea978383fb4ab0175d3ad4bccd74ba42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:47:24 +0100
Subject: [PATCH 089/450] removed type property

---
 tests/Events/01-simple.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/Events/01-simple.js b/tests/Events/01-simple.js
index c0b888f..2cf35f7 100644
--- a/tests/Events/01-simple.js
+++ b/tests/Events/01-simple.js
@@ -1,5 +1,4 @@
 exports.name = "simple";
-exports.type = "event";
 exports.options = {handler: {}, parser: {}};
 exports.html = "<h1 class=test>adsf</h1>";
 exports.expected = [ { event: 'open',

From 4a795e0c90d59c521ed4e1894a925b4aba6d85ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 19:48:11 +0100
Subject: [PATCH 090/450] Added the template html-test as a event test

---
 tests/Events/02-template.js | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 tests/Events/02-template.js

diff --git a/tests/Events/02-template.js b/tests/Events/02-template.js
new file mode 100644
index 0000000..76a29ab
--- /dev/null
+++ b/tests/Events/02-template.js
@@ -0,0 +1,32 @@
+exports.name = "Template script tags";
+exports.options = {handler: {}, parser: {}};
+exports.html = "<script type=\"text/template\"><h1>Heading1</h1></script>";
+exports.expected = [
+  {
+    "event": "open",
+    "name": "script",
+    "attributes": {
+      "type": "text/template"
+    }
+  },
+  {
+    "event": "text",
+    "text": "<h1"
+  },
+  {
+    "event": "text",
+    "text": ">Heading1"
+  },
+  {
+    "event": "text",
+    "text": "</h1"
+  },
+  {
+    "event": "text",
+    "text": ">"
+  },
+  {
+    "event": "close",
+    "name": "script"
+  }
+];
\ No newline at end of file

From 275f0c36f39061e8a0931eeb9713717386a4a7d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 11 Nov 2011 20:04:47 +0100
Subject: [PATCH 091/450] Now all tests pass

---
 lib/DefaultHandler.js                |  6 -----
 tests/HTML/04-unescaped_in_script.js | 35 +++++++++++++++-------------
 2 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index a59f12a..dbc5d1a 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -69,8 +69,6 @@ DefaultHandler.prototype._addDomElement = function(element){
 		lastChild = lastTag.children[lastTag.children.length-1];
 		if(this._inSpecialTag && element.type === ElementType.Text && lastChild.type === ElementType.Text){
 			lastChild.data += element.data;
-		    if(this._options.verbose)
-				lastChild.raw = lastChild.data;
 		}
 		else lastTag.children.push(element);
 	}
@@ -89,10 +87,6 @@ DefaultHandler.prototype.openTag = function(name, attribs, type){
 
 DefaultHandler.prototype.writeText = function(data){
 	if(this._options.ignoreWhitespace && data.trim() === "") return;
-	var lastTag = this._tagStack[this._tagStack.length-1];
-	if(this._inSpecialTag && lastTag && lastTag.children && lastTag.children[lastTag.children.length-1].type === ElementType.Text){
-		lastTag.children[lastTag.children.length-1].data += data;
-	}
 	this._addDomElement({data:data, type:ElementType.Text});
 };
 
diff --git a/tests/HTML/04-unescaped_in_script.js b/tests/HTML/04-unescaped_in_script.js
index d284080..822c107 100644
--- a/tests/HTML/04-unescaped_in_script.js
+++ b/tests/HTML/04-unescaped_in_script.js
@@ -4,21 +4,24 @@ exports.options = {
 	, parser: {}
 };
 exports.html = "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>";
-exports.expected =
-[ { raw: 'head'
-  , data: 'head'
-  , type: 'tag'
-  , name: 'head'
-  , children: 
-     [ { type: 'script'
-       , name: 'script'
-       , attribs: { language: 'Javascript' }
-       , children: 
-          [ { data: 'var foo = "<bar>"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";'
-            , type: 'text'
-            }
-          ]
-       }
-     ]
+exports.expected = [
+  {
+    'type': 'tag',
+    'name': 'head',
+    'children': [
+      {
+        'type': 'script',
+        'name': 'script',
+        'attribs': {
+          'language': 'Javascript'
+        },
+        'children': [
+          {
+            'data': 'var foo = \'<bar>\'; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \'<<>>>><<\';',
+            'type': 'text'
+          }
+        ]
+      }
+    ]
   }
 ];
\ No newline at end of file

From 7dee91c14126ca465fa26548d519f1cd810a9688 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 12 Nov 2011 13:38:21 +0100
Subject: [PATCH 092/450] fix

---
 tests/HTML/04-unescaped_in_script.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/HTML/04-unescaped_in_script.js b/tests/HTML/04-unescaped_in_script.js
index 822c107..5fdefc4 100644
--- a/tests/HTML/04-unescaped_in_script.js
+++ b/tests/HTML/04-unescaped_in_script.js
@@ -17,7 +17,7 @@ exports.expected = [
         },
         'children': [
           {
-            'data': 'var foo = \'<bar>\'; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \'<<>>>><<\';',
+            'data': 'var foo = "<bar>"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = "<<>>>><<";',
             'type': 'text'
           }
         ]

From b411bc6355c379591120c97ed9456da8a96b7a44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 12 Nov 2011 13:38:45 +0100
Subject: [PATCH 093/450] Moved empty tags to ClosingTags, improved closeTag
 logic

---
 lib/ClosingTags.js    | 40 ++++++++++++++++++++++++++++++++++++++++
 lib/DefaultHandler.js | 27 +++++++++++++--------------
 lib/EventedHandler.js | 21 +++++++++++----------
 3 files changed, 64 insertions(+), 24 deletions(-)
 create mode 100644 lib/ClosingTags.js

diff --git a/lib/ClosingTags.js b/lib/ClosingTags.js
new file mode 100644
index 0000000..a65ab62
--- /dev/null
+++ b/lib/ClosingTags.js
@@ -0,0 +1,40 @@
+/*
+*	List of tags that close others / are self-closing
+*/
+
+//Tags that close others
+exports.others = {
+	body: "head",
+	p: "p",
+	li: {
+		close: "li",
+		not: "ul"
+	},
+	tr: {
+		close: "tr",
+		not: "table"
+	},
+	td: {
+		close: "td",
+		not: "table"
+	}
+	//... TODO
+};
+
+//HTML Tags that shouldn't contain child nodes
+exports.self = {
+	area: true,
+	base: true,
+	basefont: true,
+	br: true,
+	col: true,
+	frame: true,
+	hr: true,
+	img: true,
+	input: true,
+	isindex: true,
+	link: true,
+	meta: true,
+	param: true,
+	embed: true
+};
\ No newline at end of file
diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index dbc5d1a..82785a1 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -7,29 +7,28 @@ function DefaultHandler(callback, options){
 	this._tagStack = [];
 	if(options){ //otherwise, the prototype is used
 		this._options = options;
-		if(typeof this._options.verbose === "undefined")
-			this._options.verbose = true;
 		if (typeof this._options.enforceEmptyTags === "undefined")
 			this._options.enforceEmptyTags = true;
 	}
-	this._callback = callback;
+	if(callback) this._callback = callback;
 }
 
 //default options
 DefaultHandler.prototype._options = {
-	ignoreWhitespace: false,	//Keep whitespace-only text nodes
-    enforceEmptyTags: true		//Don't allow children for HTML tags defined as empty in spec
+	ignoreWhitespace: false,//Keep whitespace-only text nodes
+    enforceEmptyTags: true,	//Don't allow children for HTML tags defined as empty in spec
+	closeOtherTags: false	//Let tags close others (eg. <p> closes other <p>s) TODO
 };
 
-//HTML Tags that shouldn't contain child nodes
-var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:true,img:true,input:true,isindex:true,link:true,meta:true,param:true,embed:true};
+var closing = require("./ClosingTags.js");
+var closingOthers = closing.others;
+var emptyTags = closing.self;
 
 //**Public**//
 //Methods//
 //Resets the handler back to starting state
-DefaultHandler.prototype.reset = function() {
-	DefaultHandler.call(this, this._callback);
-};
+DefaultHandler.prototype.reset = DefaultHandler;
+
 //Signals the handler that parsing is done
 DefaultHandler.prototype.done = function() {
 	this._done = true;
@@ -52,10 +51,10 @@ DefaultHandler.prototype.closeTag = function(name){
 	//Ignore closing tags that obviously don't have an opening tag
 	if(!this._tagStack || this._isEmptyTag(name)) return;
 	
-	var pos = this._tagStack.length - 1;
-	while(pos !== -1 && this._tagStack[pos--].name !== name){}
-	if (pos !== -1 || this._tagStack[0].name === name)
-	    this._tagStack.splice(pos+1);
+	var pos = this._tagStack.length;
+	while(pos !== 0 && this._tagStack[--pos].name !== name){}
+	if (pos !== 0 || this._tagStack[0].name === name)
+	    this._tagStack.splice(pos);
 };
 
 DefaultHandler.prototype._addDomElement = function(element){
diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
index 2f9df29..038f6c7 100644
--- a/lib/EventedHandler.js
+++ b/lib/EventedHandler.js
@@ -10,8 +10,7 @@ EventedHandler.prototype._options = {
 	enforceEmptyTags: true //auto-close empty tags
 };
 
-//HTML Tags that shouldn't contain child nodes
-var emptyTags={area:true,base:true,basefont:true,br:true,col:true,frame:true,hr:true,img:true,input:true,isindex:true,link:true,meta:true,param:true,embed:true};
+var emptyTags = require("./ClosingTags.js").self;
 
 EventedHandler.prototype.isEmptyTag = function(name){
 	return this._options.enforceEmptyTags && emptyTags[name];
@@ -26,11 +25,11 @@ EventedHandler.prototype.openTag = function(name, attrs /*, type*/){
 };
 
 EventedHandler.prototype.closeTag = function(name){
-	if(!this.isEmptyTag(name) && this._stack){
+	if(this._stack && !this.isEmptyTag(name)){
 		if(!this._cbs.onclosetag) return; //nothing to do
-		var i = this._stack.length-1;
-		while(i !== -1 && this._stack[i--] !== name){}
-		if( ++i !== 0 || this._stack[0] === name)
+		var i = this._stack.length;
+		while(i !== 0 && this._stack[--i] !== name){}
+		if(i !== 0 || this._stack[0] === name)
 			while(i < this._stack.length)
 				this._cbs.onclosetag(this._stack.pop());
 	}
@@ -39,6 +38,12 @@ EventedHandler.prototype.closeTag = function(name){
 		this.openTag(name, {});
 };
 
+EventedHandler.prototype.done = function(){
+	//close all tags that are still opened
+	this.closeTag(this._stack[0]); //TODO what about self-closing tags?
+	if(this._cbs.onend) this._cbs.onend();
+};
+
 //wrappers for the callbacks
 EventedHandler.prototype.writeComment = function(data){
 	var cb = this._cbs.oncomment;
@@ -55,10 +60,6 @@ EventedHandler.prototype.writeDirective = function(name, data){
 	if(cb) cb(name, data);
 };
 
-EventedHandler.prototype.done = function(){
-	if(this._cbs.onend) this._cbs.onend();
-};
-
 EventedHandler.prototype.reset = function(){
 	if(this._cbs.onreset) this._cbs.onreset();
 };

From f8914d43a350e33d1e5c134c3c36e10f1e626e30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 13 Nov 2011 13:05:27 +0100
Subject: [PATCH 094/450] Moved the interface of EventedHandler to the parser

Requires less (duplicated!) logic, performs better
---
 lib/DefaultHandler.js              |  46 ++++--------
 lib/EventedHandler.js              |  73 -------------------
 lib/FeedHandler.js                 |   8 +--
 lib/Parser.js                      | 108 ++++++++++++++++-------------
 tests/03-events.js                 |   3 +-
 tests/99-benchmark.js              |   8 +++
 tests/HTML/02-single_tag_1.js      |   4 ++
 tests/HTML/19-ignore_empty_tags.js |   6 +-
 8 files changed, 91 insertions(+), 165 deletions(-)
 delete mode 100644 lib/EventedHandler.js
 create mode 100644 tests/99-benchmark.js

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 82785a1..77cd08d 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -7,8 +7,6 @@ function DefaultHandler(callback, options){
 	this._tagStack = [];
 	if(options){ //otherwise, the prototype is used
 		this._options = options;
-		if (typeof this._options.enforceEmptyTags === "undefined")
-			this._options.enforceEmptyTags = true;
 	}
 	if(callback) this._callback = callback;
 }
@@ -16,45 +14,30 @@ function DefaultHandler(callback, options){
 //default options
 DefaultHandler.prototype._options = {
 	ignoreWhitespace: false,//Keep whitespace-only text nodes
-    enforceEmptyTags: true,	//Don't allow children for HTML tags defined as empty in spec
-	closeOtherTags: false	//Let tags close others (eg. <p> closes other <p>s) TODO
 };
 
-var closing = require("./ClosingTags.js");
-var closingOthers = closing.others;
-var emptyTags = closing.self;
-
 //**Public**//
 //Methods//
 //Resets the handler back to starting state
-DefaultHandler.prototype.reset = DefaultHandler;
+DefaultHandler.prototype.onreset = DefaultHandler;
 
 //Signals the handler that parsing is done
-DefaultHandler.prototype.done = function() {
+DefaultHandler.prototype.onend = function() {
+	if(this._done) return;
 	this._done = true;
-	this.handleCallback(null);
+	this._handleCallback(null);
 };
 
 //Methods//
-DefaultHandler.prototype.error =
-DefaultHandler.prototype.handleCallback = function(error){
+DefaultHandler.prototype.onerror =
+DefaultHandler.prototype._handleCallback = function(error){
 		if(typeof this._callback === "function")
 			this._callback(error, this.dom);
 		else if(error) throw error;
 };
 
-DefaultHandler.prototype._isEmptyTag = function(name) {
-	return this._options.enforceEmptyTags && emptyTags[name];
-};
-
-DefaultHandler.prototype.closeTag = function(name){
-	//Ignore closing tags that obviously don't have an opening tag
-	if(!this._tagStack || this._isEmptyTag(name)) return;
-	
-	var pos = this._tagStack.length;
-	while(pos !== 0 && this._tagStack[--pos].name !== name){}
-	if (pos !== 0 || this._tagStack[0].name === name)
-	    this._tagStack.splice(pos);
+DefaultHandler.prototype.onclosetag = function(name){
+	this._tagStack.pop();
 };
 
 DefaultHandler.prototype._addDomElement = function(element){
@@ -73,23 +56,20 @@ DefaultHandler.prototype._addDomElement = function(element){
 	}
 };
 
-DefaultHandler.prototype.openTag = function(name, attribs, type){
+DefaultHandler.prototype.onopentag = function(name, attribs, type){
 	if(type === ElementType.Script || type === ElementType.Style) this._inSpecialTag = true;
 	
 	var element = {type:type, name:name, attribs:attribs};
-	
 	this._addDomElement(element);
-	
-	//Don't add tags to the tag stack that can't have children
-	if(!this._isEmptyTag(name)) this._tagStack.push(element);
+	this._tagStack.push(element);
 };
 
-DefaultHandler.prototype.writeText = function(data){
+DefaultHandler.prototype.ontext = function(data){
 	if(this._options.ignoreWhitespace && data.trim() === "") return;
 	this._addDomElement({data:data, type:ElementType.Text});
 };
 
-DefaultHandler.prototype.writeComment = function(data){
+DefaultHandler.prototype.oncomment = function(data){
 	var lastTag = this._tagStack[this._tagStack.length-1], element,
 		lastChild = lastTag && lastTag.children && lastTag.children[lastTag.children.length-1];
 	if(!lastChild || lastChild.type !== ElementType.Comment){
@@ -101,7 +81,7 @@ DefaultHandler.prototype.writeComment = function(data){
 	else lastChild.data += data;
 }
 
-DefaultHandler.prototype.writeDirective = function(name, data){
+DefaultHandler.prototype.onprocessinginstruction = function(name, data){
 	this._addDomElement({name:name, data:data, type:ElementType.Directive});
 };
 
diff --git a/lib/EventedHandler.js b/lib/EventedHandler.js
deleted file mode 100644
index 038f6c7..0000000
--- a/lib/EventedHandler.js
+++ /dev/null
@@ -1,73 +0,0 @@
-var EventedHandler = function(cbs, options){
-	this._cbs = cbs || {};
-	if(options) this._options = options;
-	
-	//privates
-	this._stack = [];
-};
-
-EventedHandler.prototype._options = {
-	enforceEmptyTags: true //auto-close empty tags
-};
-
-var emptyTags = require("./ClosingTags.js").self;
-
-EventedHandler.prototype.isEmptyTag = function(name){
-	return this._options.enforceEmptyTags && emptyTags[name];
-};
-
-EventedHandler.prototype.openTag = function(name, attrs /*, type*/){
-	if(this._cbs.onopentag) this._cbs.onopentag(name, attrs);
-	if(this.isEmptyTag(name)){
-		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
-	}
-	else this._stack.push(name);
-};
-
-EventedHandler.prototype.closeTag = function(name){
-	if(this._stack && !this.isEmptyTag(name)){
-		if(!this._cbs.onclosetag) return; //nothing to do
-		var i = this._stack.length;
-		while(i !== 0 && this._stack[--i] !== name){}
-		if(i !== 0 || this._stack[0] === name)
-			while(i < this._stack.length)
-				this._cbs.onclosetag(this._stack.pop());
-	}
-	//many browsers (eg. Safari, Chrome) convert </br> to <br>
-	else if(name === "br" && this._options.enforceEmptyTags)
-		this.openTag(name, {});
-};
-
-EventedHandler.prototype.done = function(){
-	//close all tags that are still opened
-	this.closeTag(this._stack[0]); //TODO what about self-closing tags?
-	if(this._cbs.onend) this._cbs.onend();
-};
-
-//wrappers for the callbacks
-EventedHandler.prototype.writeComment = function(data){
-	var cb = this._cbs.oncomment;
-	if(cb) cb(data);
-};
-
-EventedHandler.prototype.writeText = function(text){
-	var cb = this._cbs.ontext;
-	if(cb) cb(text);
-};
-
-EventedHandler.prototype.writeDirective = function(name, data){
-	var cb = this._cbs.onprocessinginstruction;
-	if(cb) cb(name, data);
-};
-
-EventedHandler.prototype.reset = function(){
-	if(this._cbs.onreset) this._cbs.onreset();
-};
-
-EventedHandler.prototype.error = function(error){
-	if(this._cbs.onerror) this._cbs.onerror();
-	else throw error;
-};
-
-//export the evented handler
-module.exports = EventedHandler;
\ No newline at end of file
diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 878992c..331611d 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -2,9 +2,9 @@ var DefaultHandler = require("./DefaultHandler.js"),
 	DomUtils = require("./DomUtils.js"),
 	inherits = require("util").inherits;
 
-//TODO: make this a trully streamable handler
+//TODO: make this a streamable handler
 function FeedHandler(callback){
-	DefaultHandler.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false });
+	DefaultHandler.call(this, callback, { ignoreWhitespace: true });
 }
 
 inherits(FeedHandler, DefaultHandler);
@@ -23,7 +23,7 @@ var isValidFeed = function(value) {
 	return value === "rss" || value === "feed" || value === "rdf:RDF";
 }
 
-FeedHandler.prototype.done = function() {
+FeedHandler.prototype.onend = function() {
 	var feed = {};
 	var feedRoot;
 	var tmp, items, childs;
@@ -99,7 +99,7 @@ FeedHandler.prototype.done = function() {
 		}
 		this.dom = feed;
 	}
-	DefaultHandler.prototype.handleCallback.call(this);
+	DefaultHandler.prototype._handleCallback.call(this);
 };
 
 module.exports = FeedHandler;
\ No newline at end of file
diff --git a/lib/Parser.js b/lib/Parser.js
index d180911..be00b19 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,15 +1,12 @@
 var ElementType = require("./ElementType.js");
 
-function Parser(handler, options){
+function Parser(cbs, options){
 	if(options) this._options = options;
-
-	//Parser.validateHandler(handler);
-	//most people will use the given handlers, so a check is not necessary
-	//if you want to check your parser, just call Parser.validateHandler
-	this._handler = handler;
+	if(cbs) this._cbs = cbs;
 
 	this._buffer = "";
 	this._prevTagSep = "";
+	this._stack = [];
 	this._contentFlags = 0;
 	this._done = false;
 	this._parseState = ElementType.Text;
@@ -23,11 +20,6 @@ var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an el
 //Find attributes in a tag
 var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;//"
 
-var tagTypes = {};
-tagTypes[ ElementType.Script ] = true;
-tagTypes[ ElementType.Style ] = true;
-tagTypes[ ElementType.Tag ] = true;
-
 Parser.prototype._options = {
 	includeLocation: false, //Do not track element position in document by default
 	xmlMode: false //Special behaviour for script/style tags by default
@@ -44,7 +36,7 @@ Parser.prototype.parseComplete = function(data){
 
 //Parses a piece of an HTML document
 Parser.prototype.parseChunk = function(data){
-	if(this._done) this.handleError(Error("Attempted to parse chunk after parsing already done"));
+	if(this._done) this._handleError(Error("Attempted to parse chunk after parsing already done"));
 	this._buffer += data; //FIXME: this can be a bottleneck
 	this.parseTags();
 };
@@ -60,20 +52,23 @@ Parser.prototype.done = function(){
 		if(this._parseState === ElementType.Tag){
 			data = data.trim();
 			var name = parseTagName(data);
-			if(name.charAt(0) === "/") this._handler.closeTag(name.substr(1));
-			else this._handler.openTag(name, parseAttributes(data), ElementType.Tag);
+			if(name.charAt(0) === "/"){
+				if(this._cbs.onclosetag) this._cbs.onclosetag(name.substr(1));
+			} else if(this._cbs.onopentag){
+				this._cbs.onopentag(name, parseAttributes(data), ElementType.Tag);
+			}
 		}
-		else this._handler.writeText(data);
+		else if(this._cbs.ontext) this._cbs.ontext(data);
 		
 		this._buffer = "";
 	}
-	this._handler.done();
+	if(this._cbs.onend) this._cbs.onend();
 };
 
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
-	Parser.call(this, this._handler);
-	this._handler.reset();
+	Parser.call(this);
+	if(this._cbs.onreset) this._cbs.onreset();
 };
 
 //**Private**//
@@ -168,11 +163,11 @@ Parser.prototype.parseTags = function(){
 		else if(!this._options.xmlMode && rawData.substring(0, 3) !== "!--"){
 			//If the previous element is text, append the last tag sep to element
 			if(this._contentFlags >= SpecialTags.w){
-				this._handler.writeText(this._prevTagSep + rawData);
+				if(this._cbs.ontext) this._cbs.ontext(this._prevTagSep + rawData);
 			}
 			else{ //The previous element was not text
 				this._contentFlags += SpecialTags.w;
-				if(rawData !== "") this._handler.writeText(rawData);
+				if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
 			}
 			this._prevTagSep = tagSep;
 			continue;
@@ -189,14 +184,16 @@ Parser.prototype.parseTags = function(){
 			if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
 				//ElementType.Directive
 				//TODO: what about CDATA?
-				this._handler.writeDirective(elementName, elementData);
+				if(this._cbs.onprocessinginstruction){
+					this._cbs.onprocessinginstruction(elementName, elementData);
+				}
 				continue;
 			}
-			
-			this._processTag(elementName, elementData, tagSep, rawData);
+			if(elementName.charAt(0) === "/") this._processCloseTag(elementName.substr(1));
+			else this._processOpenTag(elementName, elementData, tagSep);
 		}
-		else if(elementType === ElementType.Text && rawData !== ""){
-			this._handler.writeText(elementData);
+		else if(elementType === ElementType.Text && rawData !== "" && this._cbs.ontext){
+			this._cbs.ontext(elementData);
 		}
 	}
 
@@ -204,52 +201,63 @@ Parser.prototype.parseTags = function(){
 };
 
 Parser.prototype._processComment = function(rawData, tagSep){
+	this._prevTagSep = tagSep;
+	
 	if(tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
 		//remove the written flag (also removes the comment flag)
 		this._contentFlags %= SpecialTags.w;
 		rawData = rawData.slice(0, -2);
 	}
 	else rawData += tagSep;
-	this._prevTagSep = tagSep;
 	
-	this._handler.writeComment(rawData);
+	if(this._cbs.oncomment) this._cbs.oncomment(rawData);
 };
 
-Parser.prototype._processTag = function(name, data, tagSep, raw){
-	if(name.charAt(0) === "/"){
-		this._handler.closeTag(name.substring(1));
-		return;
-	}
-	
+var emptyTags = require("./ClosingTags.js").self;
+
+Parser.prototype._isEmptyTag = function(name){
+	return !this._options.xmlMode && emptyTags[name];
+};
+
+Parser.prototype._processCloseTag = function(name){
+	if(this._stack && !this._isEmptyTag(name)){
+		var i = this._stack.length;
+		while(i !== 0 && this._stack[--i] !== name){}
+		if(i !== 0 || this._stack[0] === name)
+			if(this._cbs.onclosetag){
+				while(i < this._stack.length)
+					this._cbs.onclosetag(this._stack.pop());
+			}
+			else this._stack.splice(i);
+		}
+	//many browsers (eg. Safari, Chrome) convert </br> to <br>
+	else if(name === "br" && !this._options.xmlMode)
+		this._processOpenTag(name, "/");
+};
+
+Parser.prototype._processOpenTag = function(name, data, tagSep){
 	var type = ElementType.Tag;
 	if(this._options.xmlMode){ /*do nothing*/ }
 	else if(name === "script") type = ElementType.Script;
 	else if(name === "style")  type = ElementType.Style;
-
-	this._handler.openTag(name, parseAttributes(data), type);
+	
+	if(this._cbs.onopentag){
+		this._cbs.onopentag(name, parseAttributes(data), type);
+	}
 	
 	//If tag self-terminates, add an explicit, separate closing tag
-	if(data.substr(-1) === "/"){
-		this._handler.closeTag(name);
+	if(data.substr(-1) === "/" || this._isEmptyTag(name)){
+		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
 	} else {
 		this._contentFlags += SpecialTags[type];
+		this._stack.push(name);
 		this._prevTagSep = tagSep;
 	}
 };
 
-//Checks the handler to ensure it is an object with the right interface
-Parser.validateHandler = function(handler){
-	if(typeof handler !== "object")
-		throw Error("Handler is not an object");
-	["reset", "done", "openTag", "closeTag", "writeText", "writeComment", "writeDirective"].forEach(function(name){
-		if(typeof handler[name] !== "function")
-			throw Error("Handler method '" + name + "' is invalid");
-	});
-};
-
-Parser.prototype.handleError = function(error){
-	if(typeof this._handler.error === "function")
-		this._handler.error(error);
+Parser.prototype._handleError = function(error){
+	if(this._cbs.onerror)
+		this._cbs.onerror(error);
 	else throw error;
 };
 
diff --git a/tests/03-events.js b/tests/03-events.js
index b76528c..3bbfebe 100644
--- a/tests/03-events.js
+++ b/tests/03-events.js
@@ -26,6 +26,5 @@ exports.test = function(test, cb){
 			cb(null, tokens.splice(0));
 		}
 	};
-	var handler = new EventedHandler(cbs, test.options.handler);
-	helper.writeToParser(handler, test.options.parser, test.html);
+	helper.writeToParser(cbs, test.options.parser, test.html);
 }
\ No newline at end of file
diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
new file mode 100644
index 0000000..071badc
--- /dev/null
+++ b/tests/99-benchmark.js
@@ -0,0 +1,8 @@
+var xml = Array(5e3).join("<!directive><tag attr='value'> text <!--Comment<>--></tag>"),
+	handler = new (require("../lib/EventedHandler.js")),
+	parser = new (require("../lib/Parser.js"))(handler),
+	ben = require("ben");
+
+console.log("Test took (ms)", ben(1e2, function(){
+	parser.parseComplete(xml);
+}));
\ No newline at end of file
diff --git a/tests/HTML/02-single_tag_1.js b/tests/HTML/02-single_tag_1.js
index 3f4871f..12ecc38 100644
--- a/tests/HTML/02-single_tag_1.js
+++ b/tests/HTML/02-single_tag_1.js
@@ -12,5 +12,9 @@ exports.expected = [
   {
     "data": "text",
     "type": "text"
+  },
+  {
+    "type": "tag",
+    "name": "br"
   }
 ];
\ No newline at end of file
diff --git a/tests/HTML/19-ignore_empty_tags.js b/tests/HTML/19-ignore_empty_tags.js
index b50c086..3445884 100644
--- a/tests/HTML/19-ignore_empty_tags.js
+++ b/tests/HTML/19-ignore_empty_tags.js
@@ -1,7 +1,7 @@
-exports.name = "Ignore empty tags";
+exports.name = "Ignore empty tags (xml mode)";
 exports.options = {
-	  handler: { enforceEmptyTags: false }
-	, parser: {}
+	  handler: {}
+	, parser: {xmlMode:true}
 };
 exports.html = "<link>text</link>";
 exports.expected = [

From 803ca097c84d988ff6d36a9aa68c32df09b59775 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 13 Nov 2011 13:37:43 +0100
Subject: [PATCH 095/450] removed require EventedHandler

---
 tests/03-events.js    | 3 +--
 tests/99-benchmark.js | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/03-events.js b/tests/03-events.js
index 3bbfebe..f30ea99 100644
--- a/tests/03-events.js
+++ b/tests/03-events.js
@@ -1,5 +1,4 @@
-var helper = require("./test-helper.js"),
-	EventedHandler = require("../lib/EventedHandler.js");
+var helper = require("./test-helper.js");
 
 exports.dir = "./Events/";
 
diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
index 071badc..1b1ac93 100644
--- a/tests/99-benchmark.js
+++ b/tests/99-benchmark.js
@@ -1,6 +1,5 @@
 var xml = Array(5e3).join("<!directive><tag attr='value'> text <!--Comment<>--></tag>"),
-	handler = new (require("../lib/EventedHandler.js")),
-	parser = new (require("../lib/Parser.js"))(handler),
+	parser = new (require("../lib/Parser.js"))({}),
 	ben = require("ben");
 
 console.log("Test took (ms)", ben(1e2, function(){

From 68482a8e9690dac9a972c6f978df0b8f1d9a8183 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 13 Nov 2011 13:47:56 +0100
Subject: [PATCH 096/450] fixed getElementsByTagType

---
 lib/DomUtils.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 9f4c2b5..10255d7 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -55,7 +55,7 @@ module.exports = {
 	}
 	
 	, getElementsByTagType: function(type, currentElement, recurse, limit){
-		return this.testAttr(function(elem){return elem.type === type;}, currentElement, recurse, limit);
+		return this.getElements({tag_type: type}, currentElement, recurse, limit);
 		//function(elem){return elem.type === type;}
 	}
 	

From 536b157773250f754c47b8cfa28ce3ac7a739e29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 13 Nov 2011 13:48:08 +0100
Subject: [PATCH 097/450] Added tests for DomUtils

---
 tests/00-runtests.js            |  5 +--
 tests/04-dom_utils.js           | 17 +++++++++++
 tests/DomUtils/01-by_id.js      | 54 +++++++++++++++++++++++++++++++++
 tests/DomUtils/02-by_tagname.js | 22 ++++++++++++++
 tests/DomUtils/03-by_type.js    | 22 ++++++++++++++
 5 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 tests/04-dom_utils.js
 create mode 100644 tests/DomUtils/01-by_id.js
 create mode 100644 tests/DomUtils/02-by_tagname.js
 create mode 100644 tests/DomUtils/03-by_type.js

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 15147aa..a1ccc67 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -48,8 +48,9 @@ function runTests(test){
 };
 
 //run all tests
-var tests = ["./01-html.js", "./02-feed.js", "./03-events.js"];
-tests.map(require).forEach(runTests);
+["./01-html.js", "./02-feed.js", "./03-events.js", "./04-dom_utils.js"]
+	.map(require)
+	.forEach(runTests);
 
 //log the results
 console.log("Total time:", totalTime);
diff --git a/tests/04-dom_utils.js b/tests/04-dom_utils.js
new file mode 100644
index 0000000..b44c9a0
--- /dev/null
+++ b/tests/04-dom_utils.js
@@ -0,0 +1,17 @@
+var DomUtils = require("../lib/DomUtils.js");
+
+//generate a dom
+var handler = new (require("../lib/DefaultHandler.js"))();
+
+(new (require("../lib/Parser.js"))(handler)).parseComplete(
+	Array(21).join("<?xml><tag1 id='asdf'> <script>text</script> <!-- comment --> <tag2> text </tag1>")
+);
+
+var dom = handler.dom;
+
+exports.dir = "./DomUtils/";
+
+exports.test = function(test, cb){
+	cb(null, test.getElements(dom));
+	cb(null, test.getByFunction(dom));
+};
\ No newline at end of file
diff --git a/tests/DomUtils/01-by_id.js b/tests/DomUtils/01-by_id.js
new file mode 100644
index 0000000..578257f
--- /dev/null
+++ b/tests/DomUtils/01-by_id.js
@@ -0,0 +1,54 @@
+var DomUtils = require("../../lib/DomUtils.js");
+
+exports.name = "Get element by id";
+exports.getElements = function(dom){
+	return DomUtils.getElements({id:"asdf"}, dom, true, 1)[0];
+};
+exports.getByFunction = function(dom){
+	return DomUtils.getElementById("asdf", dom, true);
+};
+exports.expected = {
+  "type": "tag",
+  "name": "tag1",
+  "attribs": {
+    "id": "asdf"
+  },
+  "children": [
+    {
+      "data": " ",
+      "type": "text"
+    },
+    {
+      "type": "script",
+      "name": "script",
+      "children": [
+        {
+          "data": "text",
+          "type": "text"
+        }
+      ]
+    },
+    {
+      "data": " ",
+      "type": "text"
+    },
+    {
+      "data": " comment ",
+      "type": "comment"
+    },
+    {
+      "data": " ",
+      "type": "text"
+    },
+    {
+      "type": "tag",
+      "name": "tag2",
+      "children": [
+        {
+          "data": " text ",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+};
\ No newline at end of file
diff --git a/tests/DomUtils/02-by_tagname.js b/tests/DomUtils/02-by_tagname.js
new file mode 100644
index 0000000..280414e
--- /dev/null
+++ b/tests/DomUtils/02-by_tagname.js
@@ -0,0 +1,22 @@
+var DomUtils = require("../../lib/DomUtils.js");
+
+exports.name = "Get elements by tagName";
+exports.getElements = function(dom){
+	return DomUtils.getElements({tag_name:"tag2"}, dom, true);
+};
+exports.getByFunction = function(dom){
+	return DomUtils.getElementsByTagName("tag2", dom, true);
+};
+exports.expected = [];
+for(var i = 0; i < 20; i++) exports.expected.push(
+  {
+    "type": "tag",
+    "name": "tag2",
+    "children": [
+      {
+        "data": " text ",
+        "type": "text"
+      }
+    ]
+  }
+);
\ No newline at end of file
diff --git a/tests/DomUtils/03-by_type.js b/tests/DomUtils/03-by_type.js
new file mode 100644
index 0000000..16a3971
--- /dev/null
+++ b/tests/DomUtils/03-by_type.js
@@ -0,0 +1,22 @@
+var DomUtils = require("../../lib/DomUtils.js");
+
+exports.name = "Get elements by type";
+exports.getElements = function(dom){
+	return DomUtils.getElements({tag_type:"script"}, dom, true);
+};
+exports.getByFunction = function(dom){
+	return DomUtils.getElementsByTagType("script", dom, true);
+};
+exports.expected = [];
+for(var i = 0; i < 20; i++) exports.expected.push(
+  {
+    "type": "script",
+    "name": "script",
+    "children": [
+      {
+        "data": "text",
+        "type": "text"
+      }
+    ]
+  }
+);
\ No newline at end of file

From 05d1e2ec2c6addc2ce8b480907e3975386603675 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 13 Nov 2011 14:06:52 +0100
Subject: [PATCH 098/450] Added force option to parseTags, removed logic from
 done

---
 lib/Parser.js | 31 ++++++++++++-------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index be00b19..34d7d54 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -21,7 +21,6 @@ var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an el
 var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;//"
 
 Parser.prototype._options = {
-	includeLocation: false, //Do not track element position in document by default
 	xmlMode: false //Special behaviour for script/style tags by default
 };
 
@@ -46,22 +45,13 @@ Parser.prototype.done = function(){
 	if(this._done) return;
 	this._done = true;
 
-	//Push any unparsed text into a final element in the element list
-	if(this._buffer){
-		var data = this._buffer;
-		if(this._parseState === ElementType.Tag){
-			data = data.trim();
-			var name = parseTagName(data);
-			if(name.charAt(0) === "/"){
-				if(this._cbs.onclosetag) this._cbs.onclosetag(name.substr(1));
-			} else if(this._cbs.onopentag){
-				this._cbs.onopentag(name, parseAttributes(data), ElementType.Tag);
-			}
-		}
-		else if(this._cbs.ontext) this._cbs.ontext(data);
-		
-		this._buffer = "";
+	//Parse the buffer to its end
+	if(this._buffer) this.parseTags(true);
+	
+	if(this._cbs.onclosetag){
+		while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
 	}
+	
 	if(this._cbs.onend) this._cbs.onend();
 };
 
@@ -107,15 +97,18 @@ SpecialTags.w = 4; //2^2 - if set, append prev tag sep to data
 SpecialTags[ElementType.Comment] = 8; //2^3
 
 //Parses through HTML text and returns an array of found elements
-Parser.prototype.parseTags = function(){
+Parser.prototype.parseTags = function(force){
 	var buffer = this._buffer, current = 0;
 
 	var next, tagSep, rawData, elementName, prevElement, elementType, elementData, attributes;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
+	//if force is true, parse everything
+	if(force) opening = Infinity;
+
 	while(opening !== closing){ //just false if both are -1
-		if(closing === -1 || (opening !== -1 && opening < closing)){
+		if((opening !== -1 && opening < closing) || closing === -1){
 			next = opening;
 			tagSep = "<";
 			opening = buffer.indexOf(tagSep, next + 1);
@@ -229,7 +222,7 @@ Parser.prototype._processCloseTag = function(name){
 					this._cbs.onclosetag(this._stack.pop());
 			}
 			else this._stack.splice(i);
-		}
+	}
 	//many browsers (eg. Safari, Chrome) convert </br> to <br>
 	else if(name === "br" && !this._options.xmlMode)
 		this._processOpenTag(name, "/");

From 9b1c606c413ad7a625260eae81f63c72ff287a64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 13 Nov 2011 14:33:32 +0100
Subject: [PATCH 099/450] Added option to convert tag names to lower case

Also renamed parseTags to _parseTags (so it's officially a private
function) and added write as another name for parseChunk
---
 lib/Parser.js | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 34d7d54..307193c 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -14,14 +14,12 @@ function Parser(cbs, options){
 
 //**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reWhitespace = /\s/; //Used to find any whitespace to split on
-var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //Used to find the tag name for an element
-
-//Find attributes in a tag
+var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //matches tagnames
 var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;//"
 
 Parser.prototype._options = {
-	xmlMode: false //Special behaviour for script/style tags by default
+	xmlMode: false, //Special behaviour for script/style tags by default
+	lowerCaseTags: false //call .toLowerCase for each tagname
 };
 
 //**Public**//
@@ -34,10 +32,11 @@ Parser.prototype.parseComplete = function(data){
 };
 
 //Parses a piece of an HTML document
+Parser.prototype.write =
 Parser.prototype.parseChunk = function(data){
 	if(this._done) this._handleError(Error("Attempted to parse chunk after parsing already done"));
 	this._buffer += data; //FIXME: this can be a bottleneck
-	this.parseTags();
+	this._parseTags();
 };
 
 //Tells the parser that the HTML being parsed is complete
@@ -46,7 +45,7 @@ Parser.prototype.done = function(){
 	this._done = true;
 
 	//Parse the buffer to its end
-	if(this._buffer) this.parseTags(true);
+	if(this._buffer) this._parseTags(true);
 	
 	if(this._cbs.onclosetag){
 		while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
@@ -64,7 +63,7 @@ Parser.prototype.reset = function(){
 //**Private**//
 //Takes an element and adds an "attribs" property for any element attributes found
 var parseAttributes = function(data){
-	var pos = data.search(_reWhitespace);
+	var pos = data.search(/\s/); //Find any whitespace
 	if(pos === -1) return;
 	var attribRaw = data.substr(pos);
 
@@ -82,10 +81,13 @@ var parseAttributes = function(data){
 };
 
 //Extracts the base tag name from the data value of an element
-var parseTagName = function(data){
+Parser.prototype._parseTagName = function(data){
 	var match = data.match(_reTagName);
 	if(match === null) return "";
-	return match[1] + match[2];
+	if(this._options.lowerCaseTags){
+		return match[1] + match[2].toLowerCase();
+	}
+	else return match[1] + match[2];
 };
 
 //Special tags that are threated differently
@@ -97,7 +99,7 @@ SpecialTags.w = 4; //2^2 - if set, append prev tag sep to data
 SpecialTags[ElementType.Comment] = 8; //2^3
 
 //Parses through HTML text and returns an array of found elements
-Parser.prototype.parseTags = function(force){
+Parser.prototype._parseTags = function(force){
 	var buffer = this._buffer, current = 0;
 
 	var next, tagSep, rawData, elementName, prevElement, elementType, elementData, attributes;
@@ -127,7 +129,7 @@ Parser.prototype.parseTags = function(force){
 		
 		if(elementType === ElementType.Tag){
 			elementData = rawData.trim();
-			elementName = parseTagName(elementData);
+			elementName = this._parseTagName(elementData);
 		}
 		else{
 			elementData = rawData;

From dc2a3b4ff89db44674eff16a5f110a36eb477028 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 22 Nov 2011 19:05:55 +0100
Subject: [PATCH 100/450] Added a prototype for a new FeedHandler (not finished
 yet!)

---
 lib/_FeedHandler.js | 100 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 lib/_FeedHandler.js

diff --git a/lib/_FeedHandler.js b/lib/_FeedHandler.js
new file mode 100644
index 0000000..5f8adeb
--- /dev/null
+++ b/lib/_FeedHandler.js
@@ -0,0 +1,100 @@
+// NOT FINISHED YET! DON'T USE IT!
+
+//opening tags
+var searchRoot = function(tagName){
+	if(tagName === "rss" || tagName === "rdf:RDF" || tagName === "feed"){
+		if(tagName === "rdf:RDF") this.feed.type = "rdf";
+		else this.feed.type = tagName;
+		this._map = RssFeedMap;
+		this.onopentag = getChannelElement;
+	}
+	else if(tagName === "feed"){
+		this.feed.type = "atom";
+		this._map = AtomFeedMap;
+		this.onclosetag = getFeedElements;
+		this.ontext = writeText;
+		this.onopentag = getOpenTag;
+	}
+}
+
+var getChannelElement = function(tagName){
+	if(tagName === "channel"){
+		this.onopentag = getOpenTag;
+		this.onclosetag = getFeedElements;
+		this.ontext = writeText;
+	}
+}
+
+var getOpenTag = function(tagName, attribs){
+	this._level += 1;
+	if(tagName === this._childName){
+		if(this._feed.type === "atom"){
+		}
+		else{
+		
+		}
+	} else if(tagName === "link" && this._level === 1 
+		&& this._feed.type === "atom" && attribs.href){
+			this.feed.link = attribs.href;
+	}
+};
+
+//text
+var writeText = function(text){
+	if(this._stack[this._level]){
+		this._stack[this._level] += text;
+	} else this._stack[this._level] = text;
+};
+
+//closing tags
+var getFeedElements = function(tagName){
+	var text = this._stack.pop();
+	if(this._level-- === 1){
+		var elemName = this._map[tagName];
+		if(elemName){
+			if(elemName === "updated") text = Date(text);	
+			this._feed[elemName] = text;
+		}
+	}
+};
+
+//mappings
+var RssFeedMap = {
+	title: "title",
+	link: "link",
+	description: "description",
+	lastBuildDate: "updated",
+	managingEditor: "author"/*,
+	item: "item"*/
+};
+
+var AtomFeedMap = {
+	id: "id",
+	title: "title",
+	subtitle: "description",
+	updated: "updated",
+	email: "author"/*,
+	entry: "item"*/
+};
+
+//TODO: make this a trully streamable handler
+function FeedHandler(callback, onitem){
+	this.onopentag = searchRoot;
+	this.feed = {
+		type: null,
+		id: "",
+		title: null,
+		link: null,
+		description: null,
+		updated: null,
+		author: null,
+		items: []
+	};
+	this._level = 0;
+	this._stack = [];
+	this._map = null;
+	this.onend = callback;
+	this.onitem = onitem; //called when a new item was found
+}
+
+module.exports = FeedHandler;
\ No newline at end of file

From 7ccffcffd97751c97e02c03e3dfffa8c14b589ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 22 Nov 2011 19:06:34 +0100
Subject: [PATCH 101/450] Removed EventedHandler from index.js

---
 lib/index.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/index.js b/lib/index.js
index 2ceb6eb..85a26a6 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -2,7 +2,6 @@ module.exports = {
 	Parser: require("./Parser.js"),
 	DefaultHandler: require("./DefaultHandler.js"),
 	FeedHandler: require("./FeedHandler.js"),
-	EventedHandler: require("./EventedHandler.js"),
 	ElementType: require("./ElementType.js"),
 	DomUtils: require("./DomUtils.js")
 }
\ No newline at end of file

From df8f48eedd08a4952246ba9cb9faf9d66d4e9f01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 22 Nov 2011 19:06:50 +0100
Subject: [PATCH 102/450] Added callbacks to prototype

---
 lib/Parser.js | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lib/Parser.js b/lib/Parser.js
index 307193c..47610f0 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -22,6 +22,16 @@ Parser.prototype._options = {
 	lowerCaseTags: false //call .toLowerCase for each tagname
 };
 
+Parser.prototype._cbs = {
+	/*
+	onopentag,
+	onclosetag,
+	ontext,
+	onprocessinginstruction,
+	oncomment
+	*/
+};
+
 //**Public**//
 //Methods//
 //Parses a complete HTML and pushes it to the handler

From 0073cecb3858453678655867dbc3dd2bce0570b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 22 Nov 2011 19:07:50 +0100
Subject: [PATCH 103/450] Restructured some code in FeedHandler

---
 lib/FeedHandler.js | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 331611d..c72e18c 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -1,13 +1,14 @@
 var DefaultHandler = require("./DefaultHandler.js"),
-	DomUtils = require("./DomUtils.js"),
-	inherits = require("util").inherits;
+	DomUtils = require("./DomUtils.js");
 
 //TODO: make this a streamable handler
 function FeedHandler(callback){
-	DefaultHandler.call(this, callback, { ignoreWhitespace: true });
+	this.init(callback, { ignoreWhitespace: true });
 }
 
-inherits(FeedHandler, DefaultHandler);
+require("util").inherits(FeedHandler, DefaultHandler);
+
+FeedHandler.prototype.init = DefaultHandler;
 
 function getElements(what, where, one, recurse){
 	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];

From 3ed5cc4d0d7124c1db8b464d3f0f2e2d9d6380da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 23 Nov 2011 20:23:29 +0100
Subject: [PATCH 104/450] Updated readme concerning verbose output

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 7356f86..bc195ff 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,8 @@ This is a fork of the project above. The main difference is that this is just in
 
 Besides, it features an additional handler that provides the interface of [sax.js](https://github.com/isaacs/sax-js) (written for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
 
+The support for location data and verbose output was removed a couple of versions ago. It's still available in [this earlier version](https://github.com/FB55/node-htmlparser/tree/e1ae2b231c66caf75ca9b1328925e0cf95bfecc2) of htmlparser2 (if you really need it, for whatever reason that may be).
+
 ##Usage
 
 	var htmlparser = require("htmlparser");

From 2750286aa5fe8703234098b58df406be4788ee3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 08:58:17 +0100
Subject: [PATCH 105/450] removed unused vars

---
 lib/Parser.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 47610f0..b37a78f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -112,7 +112,7 @@ SpecialTags[ElementType.Comment] = 8; //2^3
 Parser.prototype._parseTags = function(force){
 	var buffer = this._buffer, current = 0;
 
-	var next, tagSep, rawData, elementName, prevElement, elementType, elementData, attributes;
+	var next, tagSep, rawData, elementName, elementType, elementData;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 

From 0ad61f63f81ff7d54a863cbf03bda403f09e52d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 09:18:36 +0100
Subject: [PATCH 106/450] Restructured some code

---
 lib/DefaultHandler.js | 89 ++++++++++++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 31 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 77cd08d..8c389d6 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -5,84 +5,111 @@ function DefaultHandler(callback, options){
 	this._done = false;
 	this._inSpecialTag = false;
 	this._tagStack = [];
-	if(options){ //otherwise, the prototype is used
-		this._options = options;
-	}
+	if(options) this._options = options; //otherwise, the prototype is used
 	if(callback) this._callback = callback;
 }
 
 //default options
 DefaultHandler.prototype._options = {
-	ignoreWhitespace: false,//Keep whitespace-only text nodes
+	ignoreWhitespace: false //Keep whitespace-only text nodes
 };
 
-//**Public**//
-//Methods//
 //Resets the handler back to starting state
 DefaultHandler.prototype.onreset = DefaultHandler;
 
 //Signals the handler that parsing is done
-DefaultHandler.prototype.onend = function() {
+DefaultHandler.prototype.onend = function(){
 	if(this._done) return;
 	this._done = true;
 	this._handleCallback(null);
 };
 
-//Methods//
-DefaultHandler.prototype.onerror =
-DefaultHandler.prototype._handleCallback = function(error){
-		if(typeof this._callback === "function")
-			this._callback(error, this.dom);
-		else if(error) throw error;
+DefaultHandler.prototype.onerror = function(error){
+	if(typeof this._callback === "function"){
+		return this._callback(error, this.dom);
+	} else {
+		if(error) throw error;
+	}
 };
 
+DefaultHandler.prototype._handleCallback = DefaultHandler.prototype.onerror;
+
 DefaultHandler.prototype.onclosetag = function(name){
 	this._tagStack.pop();
 };
 
 DefaultHandler.prototype._addDomElement = function(element){
-	var lastTag = this._tagStack[this._tagStack.length-1], lastChild;
-	if(!lastTag) this.dom.push(element);
-	else{ //There are parent elements
+	var lastChild,
+		lastTag = this._tagStack[this._tagStack.length - 1];
+	
+	if(lastTag){ //There are parent elements
 		if(!lastTag.children){
 			lastTag.children = [element];
 			return;
 		}
-		lastChild = lastTag.children[lastTag.children.length-1];
+		lastChild = lastTag.children[lastTag.children.length - 1];
 		if(this._inSpecialTag && element.type === ElementType.Text && lastChild.type === ElementType.Text){
 			lastChild.data += element.data;
+		} else {
+			lastTag.children.push(element);
 		}
-		else lastTag.children.push(element);
+	}
+	else {
+		this.dom.push(element);
 	}
 };
 
 DefaultHandler.prototype.onopentag = function(name, attribs, type){
-	if(type === ElementType.Script || type === ElementType.Style) this._inSpecialTag = true;
-	
-	var element = {type:type, name:name, attribs:attribs};
+	if(type === ElementType.Script || type === ElementType.Style){
+		this._inSpecialTag = true;
+	}
+	var element = {
+		type: type,
+		name: name,
+		attribs: attribs
+	};
 	this._addDomElement(element);
 	this._tagStack.push(element);
 };
 
 DefaultHandler.prototype.ontext = function(data){
 	if(this._options.ignoreWhitespace && data.trim() === "") return;
-	this._addDomElement({data:data, type:ElementType.Text});
+	this._addDomElement({
+		data: data,
+		type: ElementType.Text
+	});
 };
 
 DefaultHandler.prototype.oncomment = function(data){
-	var lastTag = this._tagStack[this._tagStack.length-1], element,
-		lastChild = lastTag && lastTag.children && lastTag.children[lastTag.children.length-1];
+	var lastTag = this._tagStack[this._tagStack.length - 1];
+	var lastChild = lastTag && lastTag.children && lastTag.children[lastTag.children.length - 1];
+	
+	var element;
 	if(!lastChild || lastChild.type !== ElementType.Comment){
-		element = {data:data, type: ElementType.Comment};
-		if(!lastTag) this.dom.push(element);
-		else if(!lastChild) lastTag.children = [element];
-		else if(lastChild.type !== ElementType.Comment) lastTag.children.push(element);
+		element = {
+			data: data,
+			type: ElementType.Comment
+		};
+		if(!lastTag){
+			return this.dom.push(element);
+		} else if(!lastChild){
+			lastTag.children = [element];
+		} else {
+			if(lastChild.type !== ElementType.Comment){
+				lastTag.children.push(element);
+			}
+		}
+	} else {
+		lastChild.data += data;
 	}
-	else lastChild.data += data;
-}
+};
 
 DefaultHandler.prototype.onprocessinginstruction = function(name, data){
-	this._addDomElement({name:name, data:data, type:ElementType.Directive});
+	this._addDomElement({
+		name: name,
+		data: data,
+		type: ElementType.Directive
+	});
 };
 
 module.exports = DefaultHandler;
\ No newline at end of file

From 3ce24f4c3eae804918fe0b83dcb37885059206f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 10:56:23 +0100
Subject: [PATCH 107/450] Updated readme

---
 package.json | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/package.json b/package.json
index e394215..27bb103 100644
--- a/package.json
+++ b/package.json
@@ -1,22 +1,29 @@
 {
-	  "name": "htmlparser2"
-	, "description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface (EventedHandler)."
-	, "version": "1.5.0"
-	, "author": "Felix Boehm <me@feedic.com>"
-	, "contributors": [ "Chris Winberry <chris@winberry.net>" ]
-	, "repository": {
-		  "type": "git"
-		, "url": "git://github.com/fb55/node-htmlparser.git"
-	}
-	, "bugs": {
-		  "mail": "me@feedic.com"
-		, "url": "http://github.com/fb55/node-htmlparser/issues"
-	}
-	, "directories": { "lib": "./lib/" }
-	, "main": "./lib/"
-	, "engines": { "node": ">0" }
-	, "licenses": [{
-		  "type": "MIT"
-		, "url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"
+	"name": "htmlparser2",
+	"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
+	"version": "1.9.0",
+	"author": "Felix Boehm <me@feedic.com>",
+	"contributors": ["Chris Winberry <chris@winberry.net>"],
+	"repository": {
+		"type": "git",
+		"url": "git://github.com/fb55/node-htmlparser.git"
+	},
+	"bugs": {
+		"mail": "me@feedic.com",
+		"url": "http://github.com/fb55/node-htmlparser/issues"
+	},
+	"directories": {
+		"lib": "./lib/"
+	},
+	"main": "./lib/",
+	"scripts": {
+		"test": "make test"
+	},
+	"engines": {
+		"node": ">0"
+	},
+	"licenses": [{
+		"type": "MIT",
+		"url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"
 	}]
 }
\ No newline at end of file

From 2f68f49413cf534f060e36dfd8153cce69718a0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 10:59:13 +0100
Subject: [PATCH 108/450] Throw if there was an error

---
 tests/00-runtests.js | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index a1ccc67..b1fd7f5 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -55,4 +55,8 @@ function runTests(test){
 //log the results
 console.log("Total time:", totalTime);
 console.log("Total tests:", testCount);
-console.log("Failed tests:", failCount);
\ No newline at end of file
+console.log("Failed tests:", failCount);
+
+if(failCount !== 0){
+	throw Error("Encountered " + failCount + " errors!");
+}
\ No newline at end of file

From 51a1370ded8f8416160f4c8657a93e2786750cf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 11:00:51 +0100
Subject: [PATCH 109/450] Added .travis.yml

required for http://travis-ci.org/
---
 .travis.yml | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..381c985
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,4 @@
+language: node_js
+node_js:
+  - 0.4
+  - 0.6
\ No newline at end of file

From a813aed7202e1e213dc9d73c94fc79cf96b73307 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 11:04:32 +0100
Subject: [PATCH 110/450] Fixed tests dir in package.json

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 27bb103..f6ea609 100644
--- a/package.json
+++ b/package.json
@@ -17,7 +17,7 @@
 	},
 	"main": "./lib/",
 	"scripts": {
-		"test": "make test"
+		"test": "cd tests && node 00-runtests.js"
 	},
 	"engines": {
 		"node": ">0"

From fe24ee7f70cf1833b0128738d793209156cf4287 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 11:12:02 +0100
Subject: [PATCH 111/450] Added Travis status to readme

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index bc195ff..41688df 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,10 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 ##Running Tests
 	node tests/00-runtests.js
 
+This project is linked to [Travis CI](http://travis-ci.org/). The latest builds status is:
+
+[![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser)
+
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
 This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
 

From af9023bc5bf8276b84e5876a8d896f7535aef0ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 11:24:04 +0100
Subject: [PATCH 112/450] Updated readme, moved options to wiki

---
 README.md | 98 +++----------------------------------------------------
 1 file changed, 5 insertions(+), 93 deletions(-)

diff --git a/README.md b/README.md
index 41688df..56a1efe 100644
--- a/README.md
+++ b/README.md
@@ -14,9 +14,9 @@ This project is linked to [Travis CI](http://travis-ci.org/). The latest builds
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
 This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
 
-Besides, it features an additional handler that provides the interface of [sax.js](https://github.com/isaacs/sax-js) (written for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
+Besides, the parser now provides the interface of [sax.js](https://github.com/isaacs/sax-js) (originally intended for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
 
-The support for location data and verbose output was removed a couple of versions ago. It's still available in [this earlier version](https://github.com/FB55/node-htmlparser/tree/e1ae2b231c66caf75ca9b1328925e0cf95bfecc2) of htmlparser2 (if you really need it, for whatever reason that may be).
+The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose) (if you really need it, for whatever reason that may be).
 
 ##Usage
 
@@ -64,94 +64,6 @@ The support for location data and verbose output was removed a couple of version
 		...
 	});
 
-##Parser options
-
-###Usage
-	var Parser = new htmlparser.Parser(handler, options);
-
-###Option: includeLocation
-Indicates whether the parser should include the location of a token as part of it. Default: false.
-
-###Option: xmlMode
-Indicates whether `<script>` and `<style>` tags should get special treatment. If false, their content will be text only. For RSS feeds and other XML content (not HTML), set this to true. Default: false.
-
-##DefaultHandler options
-
-###Usage
-	var handler = new htmlparser.DefaultHandler(function (error) {...}, {
-		ignoreWhitespace: true
-	});
-	
-###Option: ignoreWhitespace
-Indicates whether the DOM should exclude text nodes that consists solely of whitespace. The default value is "false". 
-
-The following HTML will be used:
-
-	<font>
-		<br>this is the text
-	<font>
-
-####Example: true
-
-	[{
-		type: 'tag',
-		name: 'font',
-		children: [{
-			type: 'tag',
-			name: 'br'
-		}, {
-			data: 'this is the text\n',
-			type: 'text'
-		}, {
-			type: 'tag',
-			name: 'font'
-		}]
-	}]
-
-####Example: false
-
-	[{
-		type: 'tag',
-		name: 'font',
-		children: [{
-			data: '\n\t',
-			type: 'text'
-		}, {
-			type: 'tag',
-			name: 'br'
-		}, {
-			data: 'this is the text\n',
-			type: 'text'
-		}, {
-			type: 'tag',
-			name: 'font'
-		}]
-	}]
-
-###Option: enforceEmptyTags
-Indicates whether the DOM should prevent children on tags marked as empty in the HTML spec. Typically this should be set to "true" HTML parsing and "false" for XML parsing. The default value is "true".
-
-The following HTML is used:
-
-	<link>text</link>
-
-####Example: true
-
-	[{
-		type: 'tag',
-		name: 'link'
-	}, {
-		data: 'text',
-		type: 'text'
-	}]
-
-####Example: false
-
-	[{
-		type: 'tag',
-		name: 'link',
-		children: [{
-			data: 'text',
-			type: 'text'
-		}]
-	}]
\ No newline at end of file
+##Further reading
+* [Parser options](https://github.com/FB55/node-htmlparser/wiki/Parser-options)
+* [DefaultHandler options](https://github.com/FB55/node-htmlparser/wiki/DefaultHandler-options)
\ No newline at end of file

From ed11136e292b6eb1c37cb695965b1d1a0c438355 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 11:51:03 +0100
Subject: [PATCH 113/450] Updated example code

---
 README.md | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 56a1efe..23d4a5b 100644
--- a/README.md
+++ b/README.md
@@ -27,12 +27,13 @@ The support for location data and verbose output was removed a couple of version
 			[...do something for errors...]
 		else
 			[...parsing done, do something...]
+            console.log(dom);
 	});
 	var parser = new htmlparser.Parser(handler);
-	parser.parseComplete(rawHtml);
-	sys.puts(sys.inspect(handler.dom, false, null));
+	parser.write(rawHtml);
+    parser.done();
 
-##Example output
+Output:
 
 	[{
 		data: 'Xyz ',
@@ -55,7 +56,7 @@ The support for location data and verbose output was removed a couple of version
 ##Streaming To Parser
 	while (...) {
 		...
-		parser.parseChunk(chunk);
+		parser.write(chunk);
 	}
 	parser.done();
 

From 38d6b91dbcb2054a71143c4705d2bdb6570b9ac9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 25 Nov 2011 22:47:57 +0100
Subject: [PATCH 114/450] Changed formatting

Has no impact on how this works
---
 tests/Feeds/01-rss.js  | 73 ++++++++++++++++++++----------------------
 tests/Feeds/02-atom.js | 38 +++++++++++-----------
 tests/Feeds/03-rdf.js  | 35 +++++++++++++-------
 3 files changed, 77 insertions(+), 69 deletions(-)

diff --git a/tests/Feeds/01-rss.js b/tests/Feeds/01-rss.js
index 1e4af05..8059ed6 100644
--- a/tests/Feeds/01-rss.js
+++ b/tests/Feeds/01-rss.js
@@ -1,7 +1,9 @@
 exports.name = "RSS (2.0)";
 exports.options = {
-	  handler: {}
-	, parser: {xmlMode:true}
+	handler: {},
+	parser: {
+		xmlMode: true
+	}
 };
 exports.type = "rss";
 //http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
@@ -53,39 +55,34 @@ exports.html = '<?xml version="1.0"?>\
    </channel>\
 </rss>';
 exports.expected = {
-	  type: "rss"
-	, id: ""
- 	, title: "Liftoff News"
-	, link: "http://liftoff.msfc.nasa.gov/"
-	, description: "Liftoff to Space Exploration."
-	, updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT")
-	, author: "editor@example.com"
-	, items: [
-		  {
-			  id: "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
-			, title: "Star City"
-			, link: "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
-			, description: "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\"&gt;Star City&lt;/a&gt;."
-			, pubDate: new Date("Tue, 03 Jun 2003 09:39:21 GMT")
-			}
-		, {
-			  id: "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572"
-			, description: "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st."
-			, pubDate: new Date("Fri, 30 May 2003 11:06:42 GMT")
-			}
-		, {
-			  id: "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571"
-			, title: "The Engine That Does More"
-			, link: "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
-			, description: "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that."
-			, pubDate: new Date("Tue, 27 May 2003 08:37:32 GMT")
-			}
-		, {
-			 id: "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570"
-			, title: "Astronauts' Dirty Laundry"
-			, link: "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
-			, description: "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options."
-			, pubDate: new Date("Tue, 20 May 2003 08:56:02 GMT")
-			}
-		]
-	};
\ No newline at end of file
+	type: "rss",
+	id: "",
+	title: "Liftoff News",
+	link: "http://liftoff.msfc.nasa.gov/",
+	description: "Liftoff to Space Exploration.",
+	updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
+	author: "editor@example.com",
+	items: [{
+		id: "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573",
+		title: "Star City",
+		link: "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp",
+		description: "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\"&gt;Star City&lt;/a&gt;.",
+		pubDate: new Date("Tue, 03 Jun 2003 09:39:21 GMT")
+	}, {
+		id: "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572",
+		description: "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.",
+		pubDate: new Date("Fri, 30 May 2003 11:06:42 GMT")
+	}, {
+		id: "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571",
+		title: "The Engine That Does More",
+		link: "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp",
+		description: "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that.",
+		pubDate: new Date("Tue, 27 May 2003 08:37:32 GMT")
+	}, {
+		id: "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570",
+		title: "Astronauts' Dirty Laundry",
+		link: "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp",
+		description: "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options.",
+		pubDate: new Date("Tue, 20 May 2003 08:56:02 GMT")
+	}]
+};
\ No newline at end of file
diff --git a/tests/Feeds/02-atom.js b/tests/Feeds/02-atom.js
index b067206..48674c9 100644
--- a/tests/Feeds/02-atom.js
+++ b/tests/Feeds/02-atom.js
@@ -1,7 +1,9 @@
 exports.name = "Atom (1.0)";
 exports.options = {
-	  handler: {}
-	, parser: {xmlMode:true}
+	handler: {},
+	parser: {
+		xmlMode: true
+	}
 };
 exports.type = "rss";
 //http://en.wikipedia.org/wiki/Atom_%28standard%29
@@ -32,20 +34,18 @@ exports.html = '<?xml version="1.0" encoding="utf-8"?>\
 \
 </feed>';
 exports.expected = {
-	  type: "atom"
-	, id: "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6"
- 	, title: "Example Feed"
-	, link: "http://example.org/feed/"
-	, description: "A subtitle."
-	, updated: new Date("2003-12-13T18:30:02Z")
-	, author: "johndoe@example.com"
-	, items: [
-		  {
-			  id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
-			, title: "Atom-Powered Robots Run Amok"
-			, link: "http://example.org/2003/12/13/atom03"
-			, description: "Some text."
-			, pubDate: new Date("2003-12-13T18:30:02Z")
-			}
-		]
-	};
\ No newline at end of file
+	type: "atom",
+	id: "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
+	title: "Example Feed",
+	link: "http://example.org/feed/",
+	description: "A subtitle.",
+	updated: new Date("2003-12-13T18:30:02Z"),
+	author: "johndoe@example.com",
+	items: [{
+		id: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
+		title: "Atom-Powered Robots Run Amok",
+		link: "http://example.org/2003/12/13/atom03",
+		description: "Some text.",
+		pubDate: new Date("2003-12-13T18:30:02Z")
+	}]
+};
\ No newline at end of file
diff --git a/tests/Feeds/03-rdf.js b/tests/Feeds/03-rdf.js
index bd4a06e..8d7d385 100644
--- a/tests/Feeds/03-rdf.js
+++ b/tests/Feeds/03-rdf.js
@@ -1,15 +1,26 @@
 exports.name = "RDF test";
 exports.type = "rss";
-exports.options = {handler: {}, parser: {xmlMode:true}};
+exports.options = {
+	handler: {},
+	parser: {
+		xmlMode: true
+	}
+};
+
 exports.html = '<?xml version="1.0" encoding="UTF-8"?>\n<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">\n\t<channel rdf:about="http://sfbay.craigslist.org/ccc/">\n\t\t<title>craigslist | all community in SF bay area</title>\n\t\t<link>http://sfbay.craigslist.org/ccc/</link>\n\t\t<description/>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:publisher>webmaster@craigslist.org</dc:publisher>\n\t\t<dc:creator>webmaster@craigslist.org</dc:creator>\n\t\t<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>\n\t\t<dc:title>craigslist | all community in SF bay area</dc:title>\n\t\t<dc:type>Collection</dc:type>\n\t\t<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>\n\t\t<syn:updateFrequency>4</syn:updateFrequency>\n\t\t<syn:updatePeriod>hourly</syn:updatePeriod>\n\t\t<items>\n\t\t\t<rdf:Seq>\n\t\t\t\t<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>\n\t\t\t</rdf:Seq>\n\t\t</items>\n\t</channel>\n\t<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">\n\t\t<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</link>\n\t\t<description><![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:35:17-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</dc:source>\n\t\t<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>\n\t</item>\n\t<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">\n\t\t<title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</link>\n\t\t<description><![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:34:54-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</dc:source>\n\t\t<dc:title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>\n\t</item>\n</rdf:RDF>';
-exports.expected = { type: 'rdf:RDF',
-  id: '',
-  title: 'craigslist | all community in SF bay area',
-  link: 'http://sfbay.craigslist.org/ccc/',
-  items: 
-   [ { title: '![CDATA[ Music Equipment Repair and Consignment ]]',
-       link: '\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n',
-       description: '![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065' },
-     { title: '![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]',
-       link: '\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n',
-       description: '![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.' } ] };
\ No newline at end of file
+
+exports.expected = {
+	type: 'rdf:RDF',
+	id: '',
+	title: 'craigslist | all community in SF bay area',
+	link: 'http://sfbay.craigslist.org/ccc/',
+	items: [{
+		title: '![CDATA[ Music Equipment Repair and Consignment ]]',
+		link: '\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n',
+		description: '![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065'
+	}, {
+		title: '![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]',
+		link: '\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n',
+		description: '![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.'
+	}]
+};
\ No newline at end of file

From 3152817ef126d52615fa0a9b5bc4bb0b10fc98df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 26 Nov 2011 01:35:38 +0100
Subject: [PATCH 115/450] Feedhandler should always export the feed tree

---
 lib/FeedHandler.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index c72e18c..8df0c67 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -98,8 +98,8 @@ FeedHandler.prototype.onend = function() {
 				feed.items[i] = entry;
 			});
 		}
-		this.dom = feed;
 	}
+	this.dom = feed;
 	DefaultHandler.prototype._handleCallback.call(this);
 };
 

From 89cf35e6e486a90692d87a6b9c8f63005711dc56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 26 Nov 2011 01:37:42 +0100
Subject: [PATCH 116/450] Rewrote parts of DomUtils

---
 lib/DomUtils.js | 139 ++++++++++++++++++++++++++++--------------------
 1 file changed, 82 insertions(+), 57 deletions(-)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 10255d7..6b3bb7d 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -4,73 +4,98 @@ function getTest (checkVal) {
 	return function (value) { return value === checkVal; };
 }
 
-function testElement(options, element) {
-	if (!element) return false;
-    
-    var type = element.type;
-
-    for (var key in options) {
-    	if (key === "tag_name") {
-    		if (type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
-    		if (!options.tag_name(element.name)) return false;
-    	} else if (key === "tag_type") {
-    		if (!options.tag_type(type)) return false;
-    	} else if (key === "tag_contains") {
-    		if (type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
-    		if (!options.tag_contains(element.data)) return false;
-    	} else if (!element.attribs || !options[key](element.attribs[key]))
-    		return false;
-    }
+var arrayPush = Array.prototype.push;
+function filterArray(test, arr, recurse, limit){
+	var result = [], childs;
+	
+	for(var i = 0, j = arr.length; i < j; i++){
+		if(test(arr[i])){
+			result.push(arr[i]);
+			if(--limit <= 0) break;
+		}
+		
+		if(recurse && (childs = arr[i].children)){
+			childs = filterArray(test, childs, limit);
+			arrayPush.apply(result, childs);
+			limit -= childs.length;
+			if(limit <= 0) break;
+		}
+	}
+	return result;
+}
 
-    return true;
+function filter(test, element, recurse, limit){
+	if(recurse !== false) recurse = true;
+	if(isNaN(limit)) limit = Infinity;
+	if(!Array.isArray(element)){
+		element = [element];
+	}
+	return filterArray(test, element, recurse, limit);
 }
 
 module.exports = {
-	testElement: testElement, 
+	testElement: function testElement(options, element) {
+		 var type = element.type;
 	
-	getElements: function(options, currentElement, recurse, limit){
-		recurse = recurse === undefined || recurse === null || recurse;
-		if(isNaN(limit)) limit = -1;
-
+		 for(var key in options){
+		 	if(key === "tag_name"){
+		 		if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
+		 		if(!options.tag_name(element.name)) return false;
+		 	} else if(key === "tag_type") {
+		 		if(!options.tag_type(type)) return false;
+		 	} else if(key === "tag_contains") {
+		 		if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
+		 		if(!options.tag_contains(element.data)) return false;
+		 	} else if(!element.attribs || !options[key](element.attribs[key]))
+		 		return false;
+		 }
+	
+		 return true;
+	}, 
+	
+	getElements: function(options, element, recurse, limit){
 		for(var key in options){
-			if (typeof options[key] !== "function")
+			if(typeof options[key] !== "function"){
 				options[key] = getTest(options[key]);
+			}
+		}
+		
+		return filter(this.testElement.bind(null, options), element, recurse, limit);
+	},
+
+	getElementById: function(id, element, recurse) {
+		var result;
+		if(typeof id === "function"){
+			result = filter(function(elem){
+				return elem.attribs && id(elem.attribs);
+			}, element, recurse, 1);
+		}
+		else{
+			result = filter(function(elem){
+				return elem.attribs && elem.attribs.id === id;
+			}, element, recurse, 1);
 		}
-		return this.testAttr(testElement.bind(null, options), currentElement, recurse, limit);
-	}
-	
-	, getElementById: function(id, currentElement, recurse) {
-		var result = this.getElements({ id: id }, currentElement, recurse, 1);
 		return result.length ? result[0] : null;
-		//function(elem){return elem.attribs && elem.attribs.id === id;}
-	}
-	
-	, getElementsByTagName: function(name, currentElement, recurse, limit) {
-		return this.getElements({ tag_name: name }, currentElement, recurse, limit);
-		/*function(elem){
+	},
+
+	getElementsByTagName: function(name, element, recurse, limit){
+		if(typeof name === "function") return filter(function(elem){
 			var type = elem.type;
 			if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
-			return elem.name === name;
-		};*/
-	}
-	
-	, getElementsByTagType: function(type, currentElement, recurse, limit){
-		return this.getElements({tag_type: type}, currentElement, recurse, limit);
-		//function(elem){return elem.type === type;}
-	}
-	
-	, testAttr: function(test, element, recurse, limit){
-		var found = [], elementList;
-		if(!element) return found;
-		if(test(element)) found.push(element);
-		
-		if(recurse && element.children) elementList = element.children;
-		else if(Array.isArray(element)) elementList = element;
-		else return found;
+			return name(elem.name);
+		}, element, recurse, limit);
 		
-		for(var i = 0, j = elementList.length; i < j && (limit < 0 || found.length < limit); i++){
-			found = found.concat(this.testAttr(test, elementList[i], recurse, limit));
+		else return filter(function(elem){
+			var type = elem.type;
+			if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
+			return elem.name === name;
+		}, element, recurse, limit);
+	},
+
+	getElementsByTagType: function(type, element, recurse, limit){
+		if(typeof type === "function"){
+			return filter(function(elem){return type(elem.type);}, element, recurse, limit);
 		}
-		
-		return found;
-	}};
\ No newline at end of file
+		else return filter(function(elem){return elem.type === type;}, element, recurse, limit);
+	}
+};
\ No newline at end of file

From 4873584f99f624a7d5000889e3975726ae6af2e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 26 Nov 2011 19:22:08 +0100
Subject: [PATCH 117/450] use Parser#write instead of parseChunk

---
 tests/test-helper.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-helper.js b/tests/test-helper.js
index eaa532a..f533505 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -5,7 +5,7 @@ exports.writeToParser = function(handler, options, data){
 	var parser = new Parser(handler, options);
 	//first, try to run the test via chunks
 	for(var i = 0; i < data.length; i+=chunkSize){
-		parser.parseChunk(data.substring(i, i + chunkSize));
+		parser.write(data.substring(i, i + chunkSize));
 	}
 	parser.done();
 	//then parse everything

From b3125b170f1cce0cda26fdaed6272c67fb53b29f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 26 Nov 2011 19:22:37 +0100
Subject: [PATCH 118/450] Ensure that there is always an attires object

---
 lib/Parser.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index b37a78f..beeef93 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -73,12 +73,12 @@ Parser.prototype.reset = function(){
 //**Private**//
 //Takes an element and adds an "attribs" property for any element attributes found
 var parseAttributes = function(data){
-	var pos = data.search(/\s/); //Find any whitespace
-	if(pos === -1) return;
+	var pos = data.search(/\s/), attrs = {}; //Find any whitespace
+	if(pos === -1) return attrs;
 	var attribRaw = data.substr(pos);
 
 	_reAttrib.lastIndex = 0;
-	var match, attrs = {};
+	var match;
 	
 	while(match = _reAttrib.exec(attribRaw)){
 		if(match[1])		attrs[match[1]] = match[2];

From b1503fe3293c26eb7683ee39ab007510104923a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 3 Dec 2011 16:25:22 +0100
Subject: [PATCH 119/450] 2.0.0

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index f6ea609..60ccf7f 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
-	"version": "1.9.0",
+	"version": "2.0.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
 	"repository": {

From 475711db2164fe3fd01663a4f534e9c465c19e5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 3 Dec 2011 16:25:31 +0100
Subject: [PATCH 120/450] Added syntax highlighting to readme

---
 README.md | 83 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 46 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 23d4a5b..d6697af 100644
--- a/README.md
+++ b/README.md
@@ -20,50 +20,59 @@ The support for location data and verbose output was removed a couple of version
 
 ##Usage
 
-	var htmlparser = require("htmlparser");
-	var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< /  script><!--<!-- Waah! -- -->";
-	var handler = new htmlparser.DefaultHandler(function (error, dom) {
-		if (error)
-			[...do something for errors...]
-		else
-			[...parsing done, do something...]
-            console.log(dom);
-	});
-	var parser = new htmlparser.Parser(handler);
-	parser.write(rawHtml);
-    parser.done();
+```javascript
+var htmlparser = require("htmlparser");
+var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< /  script><!--<!-- Waah! -- -->";
+var handler = new htmlparser.DefaultHandler(function (error, dom) {
+    if (error)
+    	[...do something for errors...]
+    else
+    	[...parsing done, do something...]
+        console.log(dom);
+});
+var parser = new htmlparser.Parser(handler);
+parser.write(rawHtml);
+parser.done();
+```
 
 Output:
 
-	[{
-		data: 'Xyz ',
-		type: 'text'
-	}, {
-		type: 'script',
-		name: 'script',
-		attribs: {
-			language: 'javascript'
-		},
-		children: [{
-			data: 'var foo = \'<bar>\';<',
-			type: 'text'
-		}]
-	}, {
-		data: '<!-- Waah! -- ',
-		type: 'comment'
-	}]
+```javascript
+[{
+    data: 'Xyz ',
+    type: 'text'
+}, {
+    type: 'script',
+    name: 'script',
+    attribs: {
+    	language: 'javascript'
+    },
+    children: [{
+    	data: 'var foo = \'<bar>\';<',
+    	type: 'text'
+    }]
+}, {
+    data: '<!-- Waah! -- ',
+    type: 'comment'
+}]
+```
 
 ##Streaming To Parser
-	while (...) {
-		...
-		parser.write(chunk);
-	}
-	parser.done();
+```javascript
+while (...) {
+    ...
+    parser.write(chunk);
+}
+parser.done();
+```
 
 ##Parsing RSS/RDF/Atom Feeds
-	new htmlparser.FeedHandler(function (error, dom) {
-		...
-	});
+
+```javascript
+new htmlparser.FeedHandler(function (error, feed) {
+    ...
+});
+```
 
 ##Further reading
 * [Parser options](https://github.com/FB55/node-htmlparser/wiki/Parser-options)

From 39325e8c5447ffc4ce9ecf5e866ceacadc859478 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 3 Dec 2011 16:34:39 +0100
Subject: [PATCH 121/450] Fixed a bug

Empty attributes weren't recognized as they should
---
 lib/DefaultHandler.js | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 8c389d6..89fbebb 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -65,9 +65,13 @@ DefaultHandler.prototype.onopentag = function(name, attribs, type){
 	}
 	var element = {
 		type: type,
-		name: name,
-		attribs: attribs
+		name: name
 	};
+	//for some reason, an if doesn't work
+	for(var i in attribs){
+		element.attribs = attribs;
+		break;
+	}
 	this._addDomElement(element);
 	this._tagStack.push(element);
 };

From 2ac91201020307e4d11542dc0fb863795371b28d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 3 Dec 2011 16:55:47 +0100
Subject: [PATCH 122/450] cosmetic changes

---
 lib/ElementType.js  | 12 ++++++------
 lib/_FeedHandler.js |  8 ++++++++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/lib/ElementType.js b/lib/ElementType.js
index 70194d2..204fbf2 100644
--- a/lib/ElementType.js
+++ b/lib/ElementType.js
@@ -1,9 +1,9 @@
 //Types of elements found in the DOM
 module.exports = {
-	Text: "text", /*Plain text*/
-	Directive: "directive", /*Special tag <!...>*/
-	Comment: "comment", /*Special tag <!--...-->*/
-	Script: "script", /*Special tag <script>...</script>*/
-	Style: "style", /*Special tag <style>...</style>*/
-	Tag: "tag" /*Any tag that isn't special*/
+	Text: "text", //Plain text
+	Directive: "directive", //Special tag <!...>
+	Comment: "comment", //Special tag <!--...-->
+	Script: "script", //Special tag <script>...</script>
+	Style: "style", //Special tag <style>...</style>
+	Tag: "tag" //Any tag that isn't special
 };
\ No newline at end of file
diff --git a/lib/_FeedHandler.js b/lib/_FeedHandler.js
index 5f8adeb..73059d4 100644
--- a/lib/_FeedHandler.js
+++ b/lib/_FeedHandler.js
@@ -68,6 +68,10 @@ var RssFeedMap = {
 	item: "item"*/
 };
 
+var RssItemMap = {
+
+};
+
 var AtomFeedMap = {
 	id: "id",
 	title: "title",
@@ -77,6 +81,10 @@ var AtomFeedMap = {
 	entry: "item"*/
 };
 
+var AtomItemMap = {
+
+};
+
 //TODO: make this a trully streamable handler
 function FeedHandler(callback, onitem){
 	this.onopentag = searchRoot;

From 2f66caeca5882b44c25896740ec0b6446feace6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 9 Dec 2011 09:46:44 +0100
Subject: [PATCH 123/450] Added oncommentend event, fixed #2

+ added test
---
 lib/DefaultHandler.js                 | 38 +++++++++++++--------------
 lib/Parser.js                         |  7 +++--
 tests/HTML/21-conditional_comments.js | 16 +++++++++++
 3 files changed, 38 insertions(+), 23 deletions(-)
 create mode 100644 tests/HTML/21-conditional_comments.js

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 89fbebb..4380e0f 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -86,26 +86,26 @@ DefaultHandler.prototype.ontext = function(data){
 
 DefaultHandler.prototype.oncomment = function(data){
 	var lastTag = this._tagStack[this._tagStack.length - 1];
-	var lastChild = lastTag && lastTag.children && lastTag.children[lastTag.children.length - 1];
-	
-	var element;
-	if(!lastChild || lastChild.type !== ElementType.Comment){
-		element = {
-			data: data,
-			type: ElementType.Comment
-		};
-		if(!lastTag){
-			return this.dom.push(element);
-		} else if(!lastChild){
-			lastTag.children = [element];
-		} else {
-			if(lastChild.type !== ElementType.Comment){
-				lastTag.children.push(element);
-			}
-		}
-	} else {
-		lastChild.data += data;
+
+	if(lastTag && lastTag.type === ElementType.Comment){
+		lastTag.data += data;
+		return;
 	}
+
+	var element = {
+		data: data,
+	    type: ElementType.Comment
+	};
+
+	if(!lastTag) this.dom.push(element);
+	else if(!lastTag.children) lastTag.children = [element];
+	else lastTag.children.push(element);
+
+	this._tagStack.push(element);
+};
+
+DefaultHandler.prototype.oncommentend = function(){
+	this._tagStack.pop();
 };
 
 DefaultHandler.prototype.onprocessinginstruction = function(name, data){
diff --git a/lib/Parser.js b/lib/Parser.js
index beeef93..323699c 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -211,11 +211,10 @@ Parser.prototype._processComment = function(rawData, tagSep){
 	if(tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
 		//remove the written flag (also removes the comment flag)
 		this._contentFlags %= SpecialTags.w;
-		rawData = rawData.slice(0, -2);
+		if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
+		if(this._cbs.oncommentend) this._cbs.oncommentend();
 	}
-	else rawData += tagSep;
-	
-	if(this._cbs.oncomment) this._cbs.oncomment(rawData);
+	else if(this._cbs.oncomment) this._cbs.oncomment(rawData + tagSep);
 };
 
 var emptyTags = require("./ClosingTags.js").self;
diff --git a/tests/HTML/21-conditional_comments.js b/tests/HTML/21-conditional_comments.js
new file mode 100644
index 0000000..583981e
--- /dev/null
+++ b/tests/HTML/21-conditional_comments.js
@@ -0,0 +1,16 @@
+exports.name = "Conditional comments";
+exports.options = {
+	  handler: {}
+	, parser: {}
+};
+exports.html = "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->";
+exports.expected = [
+  {
+    "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
+    "type": "comment"
+  },
+  {
+    "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
+    "type": "comment"
+  }
+];
\ No newline at end of file

From 5cf9364aa1a0eacedaa1928156f7143727cc7cbc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 11 Dec 2011 13:27:10 +0100
Subject: [PATCH 124/450] Added Parser#end as an alias for #done

---
 lib/Parser.js | 7 ++++---
 package.json  | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 323699c..30a5aa2 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -37,8 +37,8 @@ Parser.prototype._cbs = {
 //Parses a complete HTML and pushes it to the handler
 Parser.prototype.parseComplete = function(data){
 	this.reset();
-	this.parseChunk(data);
-	this.done();
+	this.write(data);
+	this.end();
 };
 
 //Parses a piece of an HTML document
@@ -50,11 +50,12 @@ Parser.prototype.parseChunk = function(data){
 };
 
 //Tells the parser that the HTML being parsed is complete
-Parser.prototype.done = function(){
+Parser.prototype.end = Parser.prototype.done = function(chunk){
 	if(this._done) return;
 	this._done = true;
 
 	//Parse the buffer to its end
+	if(chunk) this._buffer += chunk;
 	if(this._buffer) this._parseTags(true);
 	
 	if(this._cbs.onclosetag){
diff --git a/package.json b/package.json
index 60ccf7f..cdf5a46 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
-	"version": "2.0.0",
+	"version": "2.0.2",
 	"author": "Felix Boehm <me@feedic.com>",
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
 	"repository": {

From 48cae534208c7009908e96b301b5c8273d34db68 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 15 Dec 2011 20:47:37 +0100
Subject: [PATCH 125/450] Accept malformed directives

---
 lib/Parser.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 30a5aa2..2db3cff 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -14,7 +14,7 @@ function Parser(cbs, options){
 
 //**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reTagName = /^\s*(\/?)\s*([^\s\/]+)/; //matches tagnames
+var _reTagName = /^\s*([\/\?\!]?)\s*([^\s\/]+)/; //matches tagnames
 var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;//"
 
 Parser.prototype._options = {
@@ -74,8 +74,8 @@ Parser.prototype.reset = function(){
 //**Private**//
 //Takes an element and adds an "attribs" property for any element attributes found
 var parseAttributes = function(data){
-	var pos = data.search(/\s/), attrs = {}; //Find any whitespace
-	if(pos === -1) return attrs;
+	var pos = data.search(/\w\s/) + 1, attrs = {}; //Find any whitespace
+	if(pos === 0) return attrs;
 	var attribRaw = data.substr(pos);
 
 	_reAttrib.lastIndex = 0;

From af54c2870d488cfc1a4e7b01916f44568062993d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 15 Dec 2011 20:48:22 +0100
Subject: [PATCH 126/450] Added getInnerHTML & getOuterHTML methods to DomUtils

For #3
---
 lib/DomUtils.js | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 6b3bb7d..5ef7e75 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -97,5 +97,38 @@ module.exports = {
 			return filter(function(elem){return type(elem.type);}, element, recurse, limit);
 		}
 		else return filter(function(elem){return elem.type === type;}, element, recurse, limit);
+	},
+	
+	getInnerHTML: function(elem){
+		if(!elem.children) return "";
+		
+		var childs = elem.children,
+			childNum = childs.length,
+			ret = "";
+		
+		for(var i = 0; i < childNum; i++){
+			ret += this.getOuterHTML(childs[i]);
+		}
+		
+		return ret;
+	},
+	
+	getOuterHTML: function(elem){
+		var type = elem.type;
+
+		if(type === "text") return elem.data;
+		if(type === "comment") return "<!--" + elem.data + "-->";
+		
+		var ret = "<" + elem.name;
+		
+		for(var i in elem.attribs){
+			ret += " " + i + "=\"" + elem.attribs[i] + "\"";
+		}
+		
+		ret += ">";
+		
+		if(type === "directive") return ret;
+		
+		return ret + this.getInnerHTML(elem) + "</" + elem.name + ">";
 	}
 };
\ No newline at end of file

From d6469cf0f1650311911092ca2919b85286f226e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 16 Dec 2011 11:07:29 +0100
Subject: [PATCH 127/450] Make empty tags self-closing, handle attributes
 better in DomUtils

---
 lib/DomUtils.js | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 5ef7e75..1b55ea9 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -116,19 +116,24 @@ module.exports = {
 	getOuterHTML: function(elem){
 		var type = elem.type;
 
-		if(type === "text") return elem.data;
-		if(type === "comment") return "<!--" + elem.data + "-->";
+		if(type === ElementType.Text) return elem.data;
+		if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
 		
 		var ret = "<" + elem.name;
 		
-		for(var i in elem.attribs){
-			ret += " " + i + "=\"" + elem.attribs[i] + "\"";
+		var value;
+		for(var name in elem.attribs){
+			value = elem.attribs[name];
+			ret += " " + name + "=";
+			
+			if(/^\w+$/.test(value)) ret += value;
+			else if(value.indeOf("\"") !== -1) ret += "'" + value + "'";
+			else ret += "\"" + value + "\"";
 		}
 		
-		ret += ">";
+		if(type === ElementType.Directive) return ret + ">";
+		if(type === ElementType.Tag && !elem.children) return ret + "/>";
 		
-		if(type === "directive") return ret;
-		
-		return ret + this.getInnerHTML(elem) + "</" + elem.name + ">";
+		return ">" + ret + this.getInnerHTML(elem) + "</" + elem.name + ">";
 	}
 };
\ No newline at end of file

From b059a99e021de8b3ddf51b9787ed804848bcd612 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 16 Dec 2011 11:30:20 +0100
Subject: [PATCH 128/450] Added a better regexp to test for unquoted attributes

Is now matching the HTML spec
---
 lib/DomUtils.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 1b55ea9..3285f80 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -126,13 +126,13 @@ module.exports = {
 			value = elem.attribs[name];
 			ret += " " + name + "=";
 			
-			if(/^\w+$/.test(value)) ret += value;
+			if(/^[^\s"\'\`\=\<\>]+$/.test(value)) ret += value;
 			else if(value.indeOf("\"") !== -1) ret += "'" + value + "'";
 			else ret += "\"" + value + "\"";
 		}
 		
 		if(type === ElementType.Directive) return ret + ">";
-		if(type === ElementType.Tag && !elem.children) return ret + "/>";
+		if(type === ElementType.Tag && !elem.children) return ret + " />";
 		
 		return ">" + ret + this.getInnerHTML(elem) + "</" + elem.name + ">";
 	}

From fa7321a6c74245b4650330b2b165ff86bea1c44f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Dec 2011 19:12:54 +0100
Subject: [PATCH 129/450] Restructured Parser#_parseTags

made it much cleaner & compact
---
 lib/Parser.js | 116 +++++++++++++++++++++++++-------------------------
 1 file changed, 58 insertions(+), 58 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 2db3cff..0e998e5 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -14,8 +14,8 @@ function Parser(cbs, options){
 
 //**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reTagName = /^\s*([\/\?\!]?)\s*([^\s\/]+)/; //matches tagnames
-var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*"([^"]*)"|([^=<>\"\'\s]+)\s*=\s*'([^']*)'|([^=<>\"\'\s]+)\s*=\s*([^'"\s]+)|([^=<>\"\'\s\/]+)/g;//"
+var _reTagName = /^([\?\!]?)\s*([^\s\/]+)/; //matches tagnames
+var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^'"\s]+))|([^=<>\"\'\s\/]+)/g;
 
 Parser.prototype._options = {
 	xmlMode: false, //Special behaviour for script/style tags by default
@@ -82,10 +82,8 @@ var parseAttributes = function(data){
 	var match;
 	
 	while(match = _reAttrib.exec(attribRaw)){
-		if(match[1])		attrs[match[1]] = match[2];
-		else if(match[3])	attrs[match[3]] = match[4];
-		else if(match[5])	attrs[match[5]] = match[6];
-		else if(match[7])	attrs[match[7]] = match[7];
+		if(match[1]) attrs[match[1]] = match[2] || match[3] || match[4];
+		else attrs[match[5]] = match[5];
 	}
 	
 	return attrs;
@@ -138,68 +136,58 @@ Parser.prototype._parseTags = function(force){
 		current = next + 1;
 		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
 		
-		if(elementType === ElementType.Tag){
-			elementData = rawData.trim();
-			elementName = this._parseTagName(elementData);
-		}
-		else{
-			elementData = rawData;
-			elementName = "";
-		}
-
-		//This section inspects the current tag stack and modifies the current
-		//element if we're actually parsing a special area (script/comment/style tag)
-		if(this._contentFlags === 0){ /*do nothing*/ }
-		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
+		if(this._contentFlags >= SpecialTags[ElementType.Comment]){
 			//We're currently in a comment tag
 			this._processComment(rawData, tagSep);
 			continue;
 		}
-		//if it's a closing tag, remove the flag
-		else if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "/script"){
-			//remove the script flag (also removes the written flag)
-			this._contentFlags %= SpecialTags[ElementType.Script];
-		}
-		else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementName === "/style"){
-			//remove the style flag (also removes the written flag)
-			this._contentFlags %= SpecialTags[ElementType.Style];
-		}
-		//special behaviour for script & style tags
-		//Make sure we're not in a comment
-		else if(!this._options.xmlMode && rawData.substring(0, 3) !== "!--"){
-			//If the previous element is text, append the last tag sep to element
-			if(this._contentFlags >= SpecialTags.w){
-				if(this._cbs.ontext) this._cbs.ontext(this._prevTagSep + rawData);
-			}
-			else{ //The previous element was not text
-				this._contentFlags += SpecialTags.w;
-				if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
-			}
-			this._prevTagSep = tagSep;
-			continue;
-		}
-
-		//Processing of non-special tags
+		
 		if(elementType === ElementType.Tag){
-			if(rawData.substring(0, 3) === "!--"){ //This tag is a comment
-				this._contentFlags += SpecialTags[ElementType.Comment];
-				this._processComment(rawData.substr(3), tagSep);
-				continue;
+			elementData = rawData.trimLeft();
+			if(elementData.charAt(0) === "/"){
+				elementName = this._parseTagName(elementData.substr(1));
+				if(this._contentFlags !== 0){
+					//if it's a closing tag, remove the flag
+					if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "script"){
+						//remove the script flag (also removes the written flag)
+						this._contentFlags %= SpecialTags[ElementType.Script];
+					}
+					else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementName === "style"){
+						//remove the style flag (also removes the written flag)
+						this._contentFlags %= SpecialTags[ElementType.Style];
+					}
+					else {
+						this._writeSpecial(rawData, tagSep);
+						continue;
+					}
+				}
+				this._processCloseTag(elementName);
 			}
-			
-			if(rawData.charAt(0) === "!" || rawData.charAt(0) === "?"){
-				//ElementType.Directive
+			else if(elementData.charAt(0) === "!" || elementData.charAt(0) === "?"){
+				if(elementData.substr(0, 3) === "!--"){
+					//This tag is a comment
+					this._contentFlags += SpecialTags[ElementType.Comment];
+					this._processComment(rawData.substr(3), tagSep);
+				}
+				else if(this._contentFlags !== 0){
+					this._writeSpecial(rawData, tagSep);
+				}
+				//This tag is a directive
 				//TODO: what about CDATA?
-				if(this._cbs.onprocessinginstruction){
-					this._cbs.onprocessinginstruction(elementName, elementData);
+				else if(this._cbs.onprocessinginstruction){
+					this._cbs.onprocessinginstruction(this._parseTagName(elementData), elementData);
 				}
-				continue;
 			}
-			if(elementName.charAt(0) === "/") this._processCloseTag(elementName.substr(1));
-			else this._processOpenTag(elementName, elementData, tagSep);
+			else if(this._contentFlags !== 0) this._writeSpecial(rawData, tagSep);
+			else this._processOpenTag(this._parseTagName(elementData), elementData, tagSep);
 		}
-		else if(elementType === ElementType.Text && rawData !== "" && this._cbs.ontext){
-			this._cbs.ontext(elementData);
+		else{
+			if(this._contentFlags !== 0){
+				this._writeSpecial(rawData, tagSep);
+			}
+			else if(rawData !== "" && this._cbs.ontext){
+				this._cbs.ontext(rawData);
+			}
 		}
 	}
 
@@ -218,6 +206,18 @@ Parser.prototype._processComment = function(rawData, tagSep){
 	else if(this._cbs.oncomment) this._cbs.oncomment(rawData + tagSep);
 };
 
+Parser.prototype._writeSpecial = function(rawData, tagSep){
+	//if the previous element is text, append the last tag sep to element
+	if(this._contentFlags >= SpecialTags.w){
+	    if(this._cbs.ontext) this._cbs.ontext(this._prevTagSep + rawData);
+	}
+	else{ //The previous element was not text
+	    this._contentFlags += SpecialTags.w;
+	    if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
+	}
+	this._prevTagSep = tagSep;
+};
+
 var emptyTags = require("./ClosingTags.js").self;
 
 Parser.prototype._isEmptyTag = function(name){

From c530ba5298e79a16c1556ace87a9328bab08acce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Dec 2011 19:31:36 +0100
Subject: [PATCH 130/450] Shortened Parser#_parseTagName, removed elementName
 var

---
 lib/Parser.js | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 0e998e5..7cca147 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -14,7 +14,7 @@ function Parser(cbs, options){
 
 //**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reTagName = /^([\?\!]?)\s*([^\s\/]+)/; //matches tagnames
+var _reTagName = /[^\s\/]+/; //matches tagnames
 var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^'"\s]+))|([^=<>\"\'\s\/]+)/g;
 
 Parser.prototype._options = {
@@ -94,9 +94,9 @@ Parser.prototype._parseTagName = function(data){
 	var match = data.match(_reTagName);
 	if(match === null) return "";
 	if(this._options.lowerCaseTags){
-		return match[1] + match[2].toLowerCase();
+		return match[0].toLowerCase();
 	}
-	else return match[1] + match[2];
+	else return match[0];
 };
 
 //Special tags that are threated differently
@@ -111,7 +111,7 @@ SpecialTags[ElementType.Comment] = 8; //2^3
 Parser.prototype._parseTags = function(force){
 	var buffer = this._buffer, current = 0;
 
-	var next, tagSep, rawData, elementName, elementType, elementData;
+	var next, tagSep, rawData, elementType, elementData;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
@@ -145,14 +145,15 @@ Parser.prototype._parseTags = function(force){
 		if(elementType === ElementType.Tag){
 			elementData = rawData.trimLeft();
 			if(elementData.charAt(0) === "/"){
-				elementName = this._parseTagName(elementData.substr(1));
+				//elementData = elementData.substr(1).trim();
+				elementData = this._parseTagName(elementData.substr(1));
 				if(this._contentFlags !== 0){
 					//if it's a closing tag, remove the flag
-					if(this._contentFlags >= SpecialTags[ElementType.Script] && elementName === "script"){
+					if(this._contentFlags >= SpecialTags[ElementType.Script] && elementData === "script"){
 						//remove the script flag (also removes the written flag)
 						this._contentFlags %= SpecialTags[ElementType.Script];
 					}
-					else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementName === "style"){
+					else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementData === "style"){
 						//remove the style flag (also removes the written flag)
 						this._contentFlags %= SpecialTags[ElementType.Style];
 					}
@@ -161,7 +162,7 @@ Parser.prototype._parseTags = function(force){
 						continue;
 					}
 				}
-				this._processCloseTag(elementName);
+				this._processCloseTag(elementData);
 			}
 			else if(elementData.charAt(0) === "!" || elementData.charAt(0) === "?"){
 				if(elementData.substr(0, 3) === "!--"){
@@ -175,7 +176,10 @@ Parser.prototype._parseTags = function(force){
 				//This tag is a directive
 				//TODO: what about CDATA?
 				else if(this._cbs.onprocessinginstruction){
-					this._cbs.onprocessinginstruction(this._parseTagName(elementData), elementData);
+					this._cbs.onprocessinginstruction(
+						elementData.charAt(0) + this._parseTagName(elementData.substr(1)), 
+						elementData
+					);
 				}
 			}
 			else if(this._contentFlags !== 0) this._writeSpecial(rawData, tagSep);

From 05abd8f755e2281712f5f3869dab3b28f2da2004 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Dec 2011 19:46:00 +0100
Subject: [PATCH 131/450] Replaced Parser#_parseState with Parser#_tagSep
 instead

Also removed Parser#_prevTagSep
---
 lib/Parser.js | 49 ++++++++++++++++++++++---------------------------
 1 file changed, 22 insertions(+), 27 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 7cca147..ef4c57e 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -5,11 +5,10 @@ function Parser(cbs, options){
 	if(cbs) this._cbs = cbs;
 
 	this._buffer = "";
-	this._prevTagSep = "";
+	this._tagSep = "";
 	this._stack = [];
 	this._contentFlags = 0;
 	this._done = false;
-	this._parseState = ElementType.Text;
 }
 
 //**"Static"**//
@@ -111,7 +110,7 @@ SpecialTags[ElementType.Comment] = 8; //2^3
 Parser.prototype._parseTags = function(force){
 	var buffer = this._buffer, current = 0;
 
-	var next, tagSep, rawData, elementType, elementData;
+	var next, rawData, elementType, elementData, lastTagSep;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
@@ -119,30 +118,30 @@ Parser.prototype._parseTags = function(force){
 	if(force) opening = Infinity;
 
 	while(opening !== closing){ //just false if both are -1
+		lastTagSep = this._tagSep;
+		
 		if((opening !== -1 && opening < closing) || closing === -1){
 			next = opening;
-			tagSep = "<";
-			opening = buffer.indexOf(tagSep, next + 1);
+			this._tagSep = "<";
+			opening = buffer.indexOf("<", next + 1);
 		}
 		else{
 			next = closing;
-			tagSep = ">";
-			closing = buffer.indexOf(tagSep, next + 1);
+			this._tagSep = ">";
+			closing = buffer.indexOf(">", next + 1);
 		}
 		rawData = buffer.substring(current, next); //The next chunk of data to parse
-		elementType = this._parseState;
 		
 		//set elements for next run
 		current = next + 1;
-		this._parseState = (tagSep === "<") ? ElementType.Tag : ElementType.Text;
 		
 		if(this._contentFlags >= SpecialTags[ElementType.Comment]){
 			//We're currently in a comment tag
-			this._processComment(rawData, tagSep);
+			this._processComment(rawData);
 			continue;
 		}
 		
-		if(elementType === ElementType.Tag){
+		if(lastTagSep === "<"){
 			elementData = rawData.trimLeft();
 			if(elementData.charAt(0) === "/"){
 				//elementData = elementData.substr(1).trim();
@@ -158,7 +157,7 @@ Parser.prototype._parseTags = function(force){
 						this._contentFlags %= SpecialTags[ElementType.Style];
 					}
 					else {
-						this._writeSpecial(rawData, tagSep);
+						this._writeSpecial(rawData, lastTagSep);
 						continue;
 					}
 				}
@@ -168,10 +167,10 @@ Parser.prototype._parseTags = function(force){
 				if(elementData.substr(0, 3) === "!--"){
 					//This tag is a comment
 					this._contentFlags += SpecialTags[ElementType.Comment];
-					this._processComment(rawData.substr(3), tagSep);
+					this._processComment(rawData.substr(3));
 				}
 				else if(this._contentFlags !== 0){
-					this._writeSpecial(rawData, tagSep);
+					this._writeSpecial(rawData, lastTagSep);
 				}
 				//This tag is a directive
 				//TODO: what about CDATA?
@@ -182,12 +181,12 @@ Parser.prototype._parseTags = function(force){
 					);
 				}
 			}
-			else if(this._contentFlags !== 0) this._writeSpecial(rawData, tagSep);
-			else this._processOpenTag(this._parseTagName(elementData), elementData, tagSep);
+			else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
+			else this._processOpenTag(this._parseTagName(elementData), elementData);
 		}
 		else{
 			if(this._contentFlags !== 0){
-				this._writeSpecial(rawData, tagSep);
+				this._writeSpecial(rawData, lastTagSep);
 			}
 			else if(rawData !== "" && this._cbs.ontext){
 				this._cbs.ontext(rawData);
@@ -198,28 +197,25 @@ Parser.prototype._parseTags = function(force){
 	this._buffer = buffer.substring(current);
 };
 
-Parser.prototype._processComment = function(rawData, tagSep){
-	this._prevTagSep = tagSep;
-	
-	if(tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
+Parser.prototype._processComment = function(rawData){
+	if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
 		//remove the written flag (also removes the comment flag)
 		this._contentFlags %= SpecialTags.w;
 		if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
 		if(this._cbs.oncommentend) this._cbs.oncommentend();
 	}
-	else if(this._cbs.oncomment) this._cbs.oncomment(rawData + tagSep);
+	else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
 };
 
-Parser.prototype._writeSpecial = function(rawData, tagSep){
+Parser.prototype._writeSpecial = function(rawData, lastTagSep){
 	//if the previous element is text, append the last tag sep to element
 	if(this._contentFlags >= SpecialTags.w){
-	    if(this._cbs.ontext) this._cbs.ontext(this._prevTagSep + rawData);
+	    if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
 	}
 	else{ //The previous element was not text
 	    this._contentFlags += SpecialTags.w;
 	    if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
 	}
-	this._prevTagSep = tagSep;
 };
 
 var emptyTags = require("./ClosingTags.js").self;
@@ -244,7 +240,7 @@ Parser.prototype._processCloseTag = function(name){
 		this._processOpenTag(name, "/");
 };
 
-Parser.prototype._processOpenTag = function(name, data, tagSep){
+Parser.prototype._processOpenTag = function(name, data){
 	var type = ElementType.Tag;
 	if(this._options.xmlMode){ /*do nothing*/ }
 	else if(name === "script") type = ElementType.Script;
@@ -260,7 +256,6 @@ Parser.prototype._processOpenTag = function(name, data, tagSep){
 	} else {
 		this._contentFlags += SpecialTags[type];
 		this._stack.push(name);
-		this._prevTagSep = tagSep;
 	}
 };
 

From 3c95ad90150ac0089fee788d12d119e73e5d7d18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Dec 2011 19:51:08 +0100
Subject: [PATCH 132/450] Removed Parser#_isEmptyTag

---
 lib/Parser.js | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index ef4c57e..bf2a11c 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -220,12 +220,8 @@ Parser.prototype._writeSpecial = function(rawData, lastTagSep){
 
 var emptyTags = require("./ClosingTags.js").self;
 
-Parser.prototype._isEmptyTag = function(name){
-	return !this._options.xmlMode && emptyTags[name];
-};
-
 Parser.prototype._processCloseTag = function(name){
-	if(this._stack && !this._isEmptyTag(name)){
+	if(this._stack && (!emptyTags[name] || this._options.xmlMode)){
 		var i = this._stack.length;
 		while(i !== 0 && this._stack[--i] !== name){}
 		if(i !== 0 || this._stack[0] === name)
@@ -251,7 +247,7 @@ Parser.prototype._processOpenTag = function(name, data){
 	}
 	
 	//If tag self-terminates, add an explicit, separate closing tag
-	if(data.substr(-1) === "/" || this._isEmptyTag(name)){
+	if(data.substr(-1) === "/" || (emptyTags[name] && !this._options.xmlMode)){
 		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
 	} else {
 		this._contentFlags += SpecialTags[type];

From 563ea0ef6ac52b3022aa59b7d30b3bbfdc6e6ec5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 19 Dec 2011 22:27:45 +0100
Subject: [PATCH 133/450] Fixed a bug, introduced Parser#_wroteSpecial

The bug: When a `</script>` occurred inside a `<style>`, it would have
closed it
---
 lib/Parser.js | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index bf2a11c..2419efd 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -7,18 +7,19 @@ function Parser(cbs, options){
 	this._buffer = "";
 	this._tagSep = "";
 	this._stack = [];
+	this._wroteSpecial = false;
 	this._contentFlags = 0;
 	this._done = false;
 }
 
 //**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reTagName = /[^\s\/]+/; //matches tagnames
+var _reTagName = /[^\s\/]+/; //matches tag names
 var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^'"\s]+))|([^=<>\"\'\s\/]+)/g;
 
 Parser.prototype._options = {
-	xmlMode: false, //Special behaviour for script/style tags by default
-	lowerCaseTags: false //call .toLowerCase for each tagname
+	xmlMode: false, //Special behavior for script/style tags by default
+	lowerCaseTags: false //call .toLowerCase for each tag name
 };
 
 Parser.prototype._cbs = {
@@ -98,13 +99,12 @@ Parser.prototype._parseTagName = function(data){
 	else return match[0];
 };
 
-//Special tags that are threated differently
+//Special tags that are treated differently
 var SpecialTags = {};
-SpecialTags[ElementType.Tag] = 0;
+//SpecialTags[ElementType.Tag] = 0;
 SpecialTags[ElementType.Style]  = 1; //2^0
 SpecialTags[ElementType.Script] = 2; //2^1
-SpecialTags.w = 4; //2^2 - if set, append prev tag sep to data
-SpecialTags[ElementType.Comment] = 8; //2^3
+SpecialTags[ElementType.Comment] = 4; //2^3
 
 //Parses through HTML text and returns an array of found elements
 Parser.prototype._parseTags = function(force){
@@ -148,13 +148,13 @@ Parser.prototype._parseTags = function(force){
 				elementData = this._parseTagName(elementData.substr(1));
 				if(this._contentFlags !== 0){
 					//if it's a closing tag, remove the flag
-					if(this._contentFlags >= SpecialTags[ElementType.Script] && elementData === "script"){
-						//remove the script flag (also removes the written flag)
-						this._contentFlags %= SpecialTags[ElementType.Script];
+					if(this._contentFlags === SpecialTags[ElementType.Script] && elementData === "script"){
+						//remove the script flag
+						this._contentFlags -= SpecialTags[ElementType.Script];
 					}
-					else if(this._contentFlags >= SpecialTags[ElementType.Style] && elementData === "style"){
-						//remove the style flag (also removes the written flag)
-						this._contentFlags %= SpecialTags[ElementType.Style];
+					else if(this._contentFlags === SpecialTags[ElementType.Style] && elementData === "style"){
+						//remove the style flag
+						this._contentFlags -= SpecialTags[ElementType.Style];
 					}
 					else {
 						this._writeSpecial(rawData, lastTagSep);
@@ -169,9 +169,7 @@ Parser.prototype._parseTags = function(force){
 					this._contentFlags += SpecialTags[ElementType.Comment];
 					this._processComment(rawData.substr(3));
 				}
-				else if(this._contentFlags !== 0){
-					this._writeSpecial(rawData, lastTagSep);
-				}
+				else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
 				//This tag is a directive
 				//TODO: what about CDATA?
 				else if(this._cbs.onprocessinginstruction){
@@ -200,7 +198,8 @@ Parser.prototype._parseTags = function(force){
 Parser.prototype._processComment = function(rawData){
 	if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
 		//remove the written flag (also removes the comment flag)
-		this._contentFlags %= SpecialTags.w;
+		this._contentFlags -= SpecialTags[ElementType.Comment];
+		this._wroteSpecial = false;
 		if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
 		if(this._cbs.oncommentend) this._cbs.oncommentend();
 	}
@@ -209,11 +208,11 @@ Parser.prototype._processComment = function(rawData){
 
 Parser.prototype._writeSpecial = function(rawData, lastTagSep){
 	//if the previous element is text, append the last tag sep to element
-	if(this._contentFlags >= SpecialTags.w){
+	if(this._wroteSpecial){
 	    if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
 	}
 	else{ //The previous element was not text
-	    this._contentFlags += SpecialTags.w;
+	    this._wroteSpecial = true;
 	    if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
 	}
 };
@@ -250,7 +249,10 @@ Parser.prototype._processOpenTag = function(name, data){
 	if(data.substr(-1) === "/" || (emptyTags[name] && !this._options.xmlMode)){
 		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
 	} else {
-		this._contentFlags += SpecialTags[type];
+		if(type !== ElementType.Tag){
+			this._contentFlags += SpecialTags[type];
+			this._wroteSpecial = false;	
+		}
 		this._stack.push(name);
 	}
 };

From 1645174a15e055ac4351c432a340b54c687d1b22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 19 Dec 2011 22:29:10 +0100
Subject: [PATCH 134/450] 2.1.1

Also: Now requires node version 0.3.0 or better. (I don't know exactly
when the requires were changed to blocking, but before that, it doesn't
work.)
---
 package.json | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/package.json b/package.json
index cdf5a46..9dc88f5 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
-	"version": "2.0.2",
+	"version": "2.1.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
 	"repository": {
@@ -19,9 +19,7 @@
 	"scripts": {
 		"test": "cd tests && node 00-runtests.js"
 	},
-	"engines": {
-		"node": ">0"
-	},
+	"engines": "node >= 0.3.0",
 	"licenses": [{
 		"type": "MIT",
 		"url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"

From c9ad2b685f27e1033538a4d962bc08cc80b85376 Mon Sep 17 00:00:00 2001
From: Siddharth Mahendraker <siddharth_mahen@me.com>
Date: Tue, 20 Dec 2011 08:43:14 +0200
Subject: [PATCH 135/450] Makes sure recursion works properly

---
 lib/DomUtils.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 3285f80..47fec5f 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -15,7 +15,7 @@ function filterArray(test, arr, recurse, limit){
 		}
 		
 		if(recurse && (childs = arr[i].children)){
-			childs = filterArray(test, childs, limit);
+			childs = filterArray(test, childs, recurse, limit);
 			arrayPush.apply(result, childs);
 			limit -= childs.length;
 			if(limit <= 0) break;
@@ -136,4 +136,4 @@ module.exports = {
 		
 		return ">" + ret + this.getInnerHTML(elem) + "</" + elem.name + ">";
 	}
-};
\ No newline at end of file
+};

From 7f6e6a6aca0e12265b53ad3378c9ff1d3d98bb3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 20 Dec 2011 08:36:08 +0100
Subject: [PATCH 136/450] Added test for upper case case tags

---
 tests/HTML/22-lowercase_tags.js | 39 +++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 tests/HTML/22-lowercase_tags.js

diff --git a/tests/HTML/22-lowercase_tags.js b/tests/HTML/22-lowercase_tags.js
new file mode 100644
index 0000000..0bf07c8
--- /dev/null
+++ b/tests/HTML/22-lowercase_tags.js
@@ -0,0 +1,39 @@
+exports.name = "Basic test";
+exports.options = {
+	  handler: {}
+	, parser: {lowerCaseTags:true}
+};
+exports.html = "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>";
+exports.expected = [
+  {
+    "name": "!doctype",
+    "data": "!DOCTYPE html",
+    "type": "directive"
+  },
+  {
+    "type": "tag",
+    "name": "html",
+    "children": [
+      {
+        "type": "tag",
+        "name": "title",
+        "children": [
+          {
+            "data": "The Title",
+            "type": "text"
+          }
+        ]
+      },
+      {
+        "type": "tag",
+        "name": "body",
+        "children": [
+          {
+            "data": "Hello world",
+            "type": "text"
+          }
+        ]
+      }
+    ]
+  }
+]

From f3943a5e4f8fb85a629535d8ecccc69eb9228010 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 20 Dec 2011 09:00:33 +0100
Subject: [PATCH 137/450] Dynamically load files when requested in index.js

---
 lib/index.js | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/lib/index.js b/lib/index.js
index 85a26a6..dfb51c7 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -1,7 +1,22 @@
 module.exports = {
-	Parser: require("./Parser.js"),
-	DefaultHandler: require("./DefaultHandler.js"),
-	FeedHandler: require("./FeedHandler.js"),
-	ElementType: require("./ElementType.js"),
-	DomUtils: require("./DomUtils.js")
+	get Parser(){
+		Object.defineProperty(this, "Parser", {value:require("./Parser.js")});
+		return this.Parser;
+	},
+	get DefaultHandler(){
+		Object.defineProperty(this, "DefaultHandler", {value:require("./DefaultHandler.js")});
+		return this.DefaultHandler;
+	},
+	get FeedHandler(){
+		Object.defineProperty(this, "FeedHandler", {value:require("./FeedHandler.js")});
+		return this.FeedHandler;
+	},
+	get ElementType(){
+		Object.defineProperty(this, "ElementType", {value:require("./ElementType.js")});
+		return ElementType;
+	},
+	get DomUtils(){
+		Object.defineProperty(this, "DomUtils", {value:require("./DomUtils.js")});
+		return this.DomUtils;
+	}
 }
\ No newline at end of file

From 52c7e32f0347774386a31477c6a894e6a82097ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 20 Dec 2011 14:38:17 +0100
Subject: [PATCH 138/450] Moved self-closing tags to parser, removed
 ClosingTags.js

---
 lib/ClosingTags.js | 40 ----------------------------------------
 1 file changed, 40 deletions(-)
 delete mode 100644 lib/ClosingTags.js

diff --git a/lib/ClosingTags.js b/lib/ClosingTags.js
deleted file mode 100644
index a65ab62..0000000
--- a/lib/ClosingTags.js
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
-*	List of tags that close others / are self-closing
-*/
-
-//Tags that close others
-exports.others = {
-	body: "head",
-	p: "p",
-	li: {
-		close: "li",
-		not: "ul"
-	},
-	tr: {
-		close: "tr",
-		not: "table"
-	},
-	td: {
-		close: "td",
-		not: "table"
-	}
-	//... TODO
-};
-
-//HTML Tags that shouldn't contain child nodes
-exports.self = {
-	area: true,
-	base: true,
-	basefont: true,
-	br: true,
-	col: true,
-	frame: true,
-	hr: true,
-	img: true,
-	input: true,
-	isindex: true,
-	link: true,
-	meta: true,
-	param: true,
-	embed: true
-};
\ No newline at end of file

From 539cdb872e7baef379df423d081f71d90c88bf2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 20 Dec 2011 14:38:35 +0100
Subject: [PATCH 139/450] Moved self-closing tags to parser

---
 lib/Parser.js | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 2419efd..0f54cbd 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -12,7 +12,6 @@ function Parser(cbs, options){
 	this._done = false;
 }
 
-//**"Static"**//
 //Regular expressions used for cleaning up and parsing (stateless)
 var _reTagName = /[^\s\/]+/; //matches tag names
 var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^'"\s]+))|([^=<>\"\'\s\/]+)/g;
@@ -32,8 +31,6 @@ Parser.prototype._cbs = {
 	*/
 };
 
-//**Public**//
-//Methods//
 //Parses a complete HTML and pushes it to the handler
 Parser.prototype.parseComplete = function(data){
 	this.reset();
@@ -52,10 +49,11 @@ Parser.prototype.parseChunk = function(data){
 //Tells the parser that the HTML being parsed is complete
 Parser.prototype.end = Parser.prototype.done = function(chunk){
 	if(this._done) return;
+
+	if(chunk) this.write(chunk);
 	this._done = true;
 
 	//Parse the buffer to its end
-	if(chunk) this._buffer += chunk;
 	if(this._buffer) this._parseTags(true);
 	
 	if(this._cbs.onclosetag){
@@ -71,21 +69,20 @@ Parser.prototype.reset = function(){
 	if(this._cbs.onreset) this._cbs.onreset();
 };
 
-//**Private**//
-//Takes an element and adds an "attribs" property for any element attributes found
+//parses the attribute string
 var parseAttributes = function(data){
 	var pos = data.search(/\w\s/) + 1, attrs = {}; //Find any whitespace
 	if(pos === 0) return attrs;
 	var attribRaw = data.substr(pos);
 
 	_reAttrib.lastIndex = 0;
+
 	var match;
-	
 	while(match = _reAttrib.exec(attribRaw)){
 		if(match[1]) attrs[match[1]] = match[2] || match[3] || match[4];
 		else attrs[match[5]] = match[5];
 	}
-	
+
 	return attrs;
 };
 
@@ -102,7 +99,7 @@ Parser.prototype._parseTagName = function(data){
 //Special tags that are treated differently
 var SpecialTags = {};
 //SpecialTags[ElementType.Tag] = 0;
-SpecialTags[ElementType.Style]  = 1; //2^0
+SpecialTags[ElementType.Style] = 1; //2^0
 SpecialTags[ElementType.Script] = 2; //2^1
 SpecialTags[ElementType.Comment] = 4; //2^3
 
@@ -217,7 +214,22 @@ Parser.prototype._writeSpecial = function(rawData, lastTagSep){
 	}
 };
 
-var emptyTags = require("./ClosingTags.js").self;
+var emptyTags = {
+	area: true,
+	base: true,
+	basefont: true,
+	br: true,
+	col: true,
+	frame: true,
+	hr: true,
+	img: true,
+	input: true,
+	isindex: true,
+	link: true,
+	meta: true,
+	param: true,
+	embed: true
+};
 
 Parser.prototype._processCloseTag = function(name){
 	if(this._stack && (!emptyTags[name] || this._options.xmlMode)){

From 1289e0abb0762bb2b1e1a9ee34a266b36c93af38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 20 Dec 2011 20:38:57 +0100
Subject: [PATCH 140/450] Made the callback optional

As suggested in #8
---
 lib/DefaultHandler.js | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 4380e0f..3632d40 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -5,8 +5,11 @@ function DefaultHandler(callback, options){
 	this._done = false;
 	this._inSpecialTag = false;
 	this._tagStack = [];
-	if(options) this._options = options; //otherwise, the prototype is used
-	if(callback) this._callback = callback;
+	if(typeof callback === "object") this._options = callback;
+	else {
+		if(options) this._options = options; //otherwise, the prototype is used
+		if(callback) this._callback = callback;	
+	}
 }
 
 //default options

From b8e94452182471d6db47be6db43975757c11053b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <felixboehm55@googlemail.com>
Date: Wed, 21 Dec 2011 15:06:01 +0000
Subject: [PATCH 141/450] Some minor changes

---
 lib/Parser.js         | 4 ++--
 tests/00-runtests.js  | 6 +++---
 tests/01-html.js      | 2 +-
 tests/02-feed.js      | 4 ++--
 tests/03-events.js    | 4 ++--
 tests/04-dom_utils.js | 4 +---
 6 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 0f54cbd..0115e69 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -107,12 +107,12 @@ SpecialTags[ElementType.Comment] = 4; //2^3
 Parser.prototype._parseTags = function(force){
 	var buffer = this._buffer, current = 0;
 
-	var next, rawData, elementType, elementData, lastTagSep;
+	var next, rawData, elementData, lastTagSep;
 	
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
 	//if force is true, parse everything
-	if(force) opening = Infinity;
+	if(force) opening = 1/0;
 
 	while(opening !== closing){ //just false if both are -1
 		lastTagSep = this._tagSep;
diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index b1fd7f5..af716b9 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -7,15 +7,15 @@ var testCount = 0,
 function runTests(test){
 	var begin = Date.now();
 	//read files, load them, run them
-	fs.readdirSync(test.dir
+	fs.readdirSync(__dirname + test.dir
 	).map(function(file){
 		if(file[0] === ".") return false;
-		return require(test.dir + file);
+		return require(__dirname + test.dir + file);
 	}).forEach(function(file){
 		if(file === false) return;
 		var second = false,
 			failed = false,
-			start = Date.now()
+			start = Date.now(),
 			took = 0;
 		
 		console.log("Testing:", file.name);
diff --git a/tests/01-html.js b/tests/01-html.js
index 3d995a5..f20d59c 100644
--- a/tests/01-html.js
+++ b/tests/01-html.js
@@ -3,7 +3,7 @@
 var helper = require("./test-helper.js"),
 	DefaultHandler = require("../lib/DefaultHandler.js");
 
-exports.dir = "./HTML/";
+exports.dir = "/HTML/";
 
 /*
 	function test()
diff --git a/tests/02-feed.js b/tests/02-feed.js
index 26d85b5..e2b0911 100644
--- a/tests/02-feed.js
+++ b/tests/02-feed.js
@@ -3,7 +3,7 @@
 var helper = require("./test-helper.js"),
 	FeedHandler = require("../lib/FeedHandler.js");
 
-exports.dir = "./Feeds/";
+exports.dir = "/Feeds/";
 
 exports.test = function(test, cb){
 	var handler = new FeedHandler(function(err, dom){
@@ -11,4 +11,4 @@ exports.test = function(test, cb){
 		else cb(null, dom);
 	}, test.options.handler);
 	helper.writeToParser(handler, test.options.parser, test.html);
-}
\ No newline at end of file
+};
\ No newline at end of file
diff --git a/tests/03-events.js b/tests/03-events.js
index f30ea99..0cbf54a 100644
--- a/tests/03-events.js
+++ b/tests/03-events.js
@@ -1,6 +1,6 @@
 var helper = require("./test-helper.js");
 
-exports.dir = "./Events/";
+exports.dir = "/Events/";
 
 exports.test = function(test, cb){
 	var tokens = [];
@@ -26,4 +26,4 @@ exports.test = function(test, cb){
 		}
 	};
 	helper.writeToParser(cbs, test.options.parser, test.html);
-}
\ No newline at end of file
+};
\ No newline at end of file
diff --git a/tests/04-dom_utils.js b/tests/04-dom_utils.js
index b44c9a0..1c8809b 100644
--- a/tests/04-dom_utils.js
+++ b/tests/04-dom_utils.js
@@ -1,5 +1,3 @@
-var DomUtils = require("../lib/DomUtils.js");
-
 //generate a dom
 var handler = new (require("../lib/DefaultHandler.js"))();
 
@@ -9,7 +7,7 @@ var handler = new (require("../lib/DefaultHandler.js"))();
 
 var dom = handler.dom;
 
-exports.dir = "./DomUtils/";
+exports.dir = "/DomUtils/";
 
 exports.test = function(test, cb){
 	cb(null, test.getElements(dom));

From 0136f11b34d2639419b91c35103bbcaa1bd3dc0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 21 Dec 2011 17:09:26 +0100
Subject: [PATCH 142/450] 2.1.2

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 9dc88f5..18fb42b 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
-	"version": "2.1.1",
+	"version": "2.1.2",
 	"author": "Felix Boehm <me@feedic.com>",
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
 	"repository": {

From 280a9acf3e473998c700645789f24d922c35da57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 21 Dec 2011 17:12:42 +0100
Subject: [PATCH 143/450] Some fixes & improvements

---
 lib/DefaultHandler.js |  9 ++++-----
 lib/DomUtils.js       | 30 +++++++++++++++---------------
 lib/index.js          |  2 +-
 package.json          |  2 +-
 tests/99-benchmark.js |  9 ++++++++-
 5 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/lib/DefaultHandler.js b/lib/DefaultHandler.js
index 3632d40..bcb1c05 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DefaultHandler.js
@@ -27,18 +27,17 @@ DefaultHandler.prototype.onend = function(){
 	this._handleCallback(null);
 };
 
+DefaultHandler.prototype._handleCallback = 
 DefaultHandler.prototype.onerror = function(error){
 	if(typeof this._callback === "function"){
-		return this._callback(error, this.dom);
+		this._callback(error, this.dom);
 	} else {
 		if(error) throw error;
 	}
 };
 
-DefaultHandler.prototype._handleCallback = DefaultHandler.prototype.onerror;
-
 DefaultHandler.prototype.onclosetag = function(name){
-	this._tagStack.pop();
+	if(this._tagStack.pop().name !== name) this._handleCallback(Error("tagname didn't match!"));
 };
 
 DefaultHandler.prototype._addDomElement = function(element){
@@ -97,7 +96,7 @@ DefaultHandler.prototype.oncomment = function(data){
 
 	var element = {
 		data: data,
-	    type: ElementType.Comment
+		 type: ElementType.Comment
 	};
 
 	if(!lastTag) this.dom.push(element);
diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 47fec5f..6fd8a77 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -26,7 +26,7 @@ function filterArray(test, arr, recurse, limit){
 
 function filter(test, element, recurse, limit){
 	if(recurse !== false) recurse = true;
-	if(isNaN(limit)) limit = Infinity;
+	if(isNaN(limit)) limit = 1/0;
 	if(!Array.isArray(element)){
 		element = [element];
 	}
@@ -37,18 +37,18 @@ module.exports = {
 	testElement: function testElement(options, element) {
 		 var type = element.type;
 	
-		 for(var key in options){
-		 	if(key === "tag_name"){
-		 		if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
-		 		if(!options.tag_name(element.name)) return false;
-		 	} else if(key === "tag_type") {
-		 		if(!options.tag_type(type)) return false;
-		 	} else if(key === "tag_contains") {
-		 		if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
-		 		if(!options.tag_contains(element.data)) return false;
-		 	} else if(!element.attribs || !options[key](element.attribs[key]))
-		 		return false;
-		 }
+		for(var key in options){
+			if(key === "tag_name"){
+				if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
+				if(!options.tag_name(element.name)) return false;
+			} else if(key === "tag_type") {
+				if(!options.tag_type(type)) return false;
+			} else if(key === "tag_contains") {
+				if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
+				if(!options.tag_contains(element.data)) return false;
+			} else if(!element.attribs || !options[key](element.attribs[key]))
+				return false;
+		}
 	
 		 return true;
 	}, 
@@ -85,7 +85,7 @@ module.exports = {
 			return name(elem.name);
 		}, element, recurse, limit);
 		
-		else return filter(function(elem){
+		return filter(function(elem){
 			var type = elem.type;
 			if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
 			return elem.name === name;
@@ -136,4 +136,4 @@ module.exports = {
 		
 		return ">" + ret + this.getInnerHTML(elem) + "</" + elem.name + ">";
 	}
-};
+};
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index dfb51c7..45b07cb 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -13,7 +13,7 @@ module.exports = {
 	},
 	get ElementType(){
 		Object.defineProperty(this, "ElementType", {value:require("./ElementType.js")});
-		return ElementType;
+		return this.ElementType;
 	},
 	get DomUtils(){
 		Object.defineProperty(this, "DomUtils", {value:require("./DomUtils.js")});
diff --git a/package.json b/package.json
index 18fb42b..6869a4f 100644
--- a/package.json
+++ b/package.json
@@ -17,7 +17,7 @@
 	},
 	"main": "./lib/",
 	"scripts": {
-		"test": "cd tests && node 00-runtests.js"
+		"test": "node tests/00-runtests.js"
 	},
 	"engines": "node >= 0.3.0",
 	"licenses": [{
diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
index 1b1ac93..373a2fd 100644
--- a/tests/99-benchmark.js
+++ b/tests/99-benchmark.js
@@ -1,5 +1,12 @@
 var xml = Array(5e3).join("<!directive><tag attr='value'> text <!--Comment<>--></tag>"),
-	parser = new (require("../lib/Parser.js"))({}),
+	empty = function(){},
+	parser = new (require("../lib/Parser.js"))({
+		onopentag: empty,
+		onclosetag: empty,
+		oncomment: empty,
+		oncommentend: empty,
+		onprocessinginstruction: empty
+	}),
 	ben = require("ben");
 
 console.log("Test took (ms)", ben(1e2, function(){

From 0564b7e5e4aadd73467618278f2b4b136b81ed4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 22 Dec 2011 14:36:25 +0100
Subject: [PATCH 144/450] Added another test for #6

---
 tests/Events/03-lowercase_tags.js | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 tests/Events/03-lowercase_tags.js

diff --git a/tests/Events/03-lowercase_tags.js b/tests/Events/03-lowercase_tags.js
new file mode 100644
index 0000000..1ef2ad5
--- /dev/null
+++ b/tests/Events/03-lowercase_tags.js
@@ -0,0 +1,8 @@
+exports.name = "simple";
+exports.options = {handler: {}, parser: {lowerCaseTags:true}};
+exports.html = "<H1 class=test>adsf</H1>";
+exports.expected = [ { event: 'open',
+    name: 'h1',
+    attributes: { class: 'test' } },
+  { event: 'text', text: 'adsf' },
+  { event: 'close', name: 'h1' } ];
\ No newline at end of file

From 447b52f38d14c6084baa2abed52ca8a8cdc677af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 22 Dec 2011 19:55:14 +0100
Subject: [PATCH 145/450] Added support for CDATA, optimized regexps

---
 lib/ElementType.js       |  3 +-
 lib/Parser.js            | 88 +++++++++++++++++++++++++++-------------
 tests/Events/04-cdata.js | 75 ++++++++++++++++++++++++++++++++++
 tests/Feeds/03-rdf.js    | 30 +++++++-------
 4 files changed, 152 insertions(+), 44 deletions(-)
 create mode 100644 tests/Events/04-cdata.js

diff --git a/lib/ElementType.js b/lib/ElementType.js
index 204fbf2..b2c8a10 100644
--- a/lib/ElementType.js
+++ b/lib/ElementType.js
@@ -5,5 +5,6 @@ module.exports = {
 	Comment: "comment", //Special tag <!--...-->
 	Script: "script", //Special tag <script>...</script>
 	Style: "style", //Special tag <style>...</style>
-	Tag: "tag" //Any tag that isn't special
+	Tag: "tag", //Any tag that isn't special
+	CDATA: "cdata"
 };
\ No newline at end of file
diff --git a/lib/Parser.js b/lib/Parser.js
index 0115e69..082f8b4 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -13,8 +13,9 @@ function Parser(cbs, options){
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reTagName = /[^\s\/]+/; //matches tag names
-var _reAttrib = /([^=<>\"\'\s]+)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^'"\s]+))|([^=<>\"\'\s\/]+)/g;
+var _reAttrib = /\s([^=\"\'\s\/]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g,
+	_reAttribStart = /\s+[^=\"\'\s\/]/,
+	_reTail = /\s|\//;
 
 Parser.prototype._options = {
 	xmlMode: false, //Special behavior for script/style tags by default
@@ -23,11 +24,16 @@ Parser.prototype._options = {
 
 Parser.prototype._cbs = {
 	/*
-	onopentag,
-	onclosetag,
-	ontext,
-	onprocessinginstruction,
-	oncomment
+		oncdatastart,
+		oncdataend,
+		ontext,
+		onprocessinginstruction,
+		oncomment,
+		oncommentend,
+		onclosetag,
+		onopentag,
+		onerror,
+		onreset
 	*/
 };
 
@@ -71,16 +77,13 @@ Parser.prototype.reset = function(){
 
 //parses the attribute string
 var parseAttributes = function(data){
-	var pos = data.search(/\w\s/) + 1, attrs = {}; //Find any whitespace
-	if(pos === 0) return attrs;
-	var attribRaw = data.substr(pos);
-
-	_reAttrib.lastIndex = 0;
+	var pos = data.search(_reAttribStart), attrs = {};
+	if(pos === -1) return attrs;
+	var attribRaw = data.substring(pos);
 
 	var match;
 	while(match = _reAttrib.exec(attribRaw)){
-		if(match[1]) attrs[match[1]] = match[2] || match[3] || match[4];
-		else attrs[match[5]] = match[5];
+		attrs[match[1]] = match[2] || match[3] || match[4] || match[1];
 	}
 
 	return attrs;
@@ -88,12 +91,12 @@ var parseAttributes = function(data){
 
 //Extracts the base tag name from the data value of an element
 Parser.prototype._parseTagName = function(data){
-	var match = data.match(_reTagName);
-	if(match === null) return "";
-	if(this._options.lowerCaseTags){
-		return match[0].toLowerCase();
-	}
-	else return match[0];
+	var pos = data.search(_reTail), match;
+	if(pos === -1) match = data;
+	else match = data.substr(0, pos);
+	
+	if(!this._options.lowerCaseTags) return match;
+	return match.toLowerCase();
 };
 
 //Special tags that are treated differently
@@ -101,7 +104,8 @@ var SpecialTags = {};
 //SpecialTags[ElementType.Tag] = 0;
 SpecialTags[ElementType.Style] = 1; //2^0
 SpecialTags[ElementType.Script] = 2; //2^1
-SpecialTags[ElementType.Comment] = 4; //2^3
+SpecialTags[ElementType.Comment] = 4; //2^2
+SpecialTags[ElementType.CDATA] = 8; //2^3
 
 //Parses through HTML text and returns an array of found elements
 Parser.prototype._parseTags = function(force){
@@ -132,13 +136,21 @@ Parser.prototype._parseTags = function(force){
 		//set elements for next run
 		current = next + 1;
 		
-		if(this._contentFlags >= SpecialTags[ElementType.Comment]){
+		if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
+			if(this._tagSep === ">" && rawData.substr(-2) === "]]"){
+				if(rawData.length !== 2 && this._cbs.ontext){
+					this._cbs.ontext(rawData.slice(0,-2));
+				}
+				this._contentFlags -= SpecialTags[ElementType.CDATA];
+				if(this._cbs.oncdataend) this._cbs.oncdataend();
+			}
+			else if(this._cbs.ontext) this._cbs.ontext(rawData + this._tagSep);
+		}
+		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
 			//We're currently in a comment tag
 			this._processComment(rawData);
-			continue;
 		}
-		
-		if(lastTagSep === "<"){
+		else if(lastTagSep === "<"){
 			elementData = rawData.trimLeft();
 			if(elementData.charAt(0) === "/"){
 				//elementData = elementData.substr(1).trim();
@@ -160,23 +172,41 @@ Parser.prototype._parseTags = function(force){
 				}
 				this._processCloseTag(elementData);
 			}
-			else if(elementData.charAt(0) === "!" || elementData.charAt(0) === "?"){
-				if(elementData.substr(0, 3) === "!--"){
+			else if(elementData.charAt(0) === "!"){
+				if(elementData.substr(1, 2) === "--"){
 					//This tag is a comment
 					this._contentFlags += SpecialTags[ElementType.Comment];
 					this._processComment(rawData.substr(3));
 				}
+				else if(elementData.substr(1, 7) === "[CDATA["){
+					if(this._cbs.oncdatastart) this._cbs.oncdatastart();
+					if(this._tagSep === ">" && elementData.substr(-2) === "]]"){
+						if(this._cbs.oncdataend) this._cbs.oncdataend();
+						if(this._cbs.ontext) this._cbs.ontext(elementData.slice(8, -2));
+					}
+					else{
+						if(this._cbs.ontext) this._cbs.ontext(elementData.substr(8));
+						this._contentFlags += SpecialTags[ElementType.CDATA];
+					}
+				}
 				else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
 				//This tag is a directive
-				//TODO: what about CDATA?
 				else if(this._cbs.onprocessinginstruction){
 					this._cbs.onprocessinginstruction(
-						elementData.charAt(0) + this._parseTagName(elementData.substr(1)), 
+						"!" + this._parseTagName(elementData.substr(1)), 
 						elementData
 					);
 				}
 			}
 			else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
+			else if(elementData.charAt(0) === "?"){
+				if(this._cbs.onprocessinginstruction){
+					this._cbs.onprocessinginstruction(
+						"?" + this._parseTagName(elementData.substr(1)), 
+						elementData
+					);
+				}
+			}
 			else this._processOpenTag(this._parseTagName(elementData), elementData);
 		}
 		else{
diff --git a/tests/Events/04-cdata.js b/tests/Events/04-cdata.js
new file mode 100644
index 0000000..197235d
--- /dev/null
+++ b/tests/Events/04-cdata.js
@@ -0,0 +1,75 @@
+exports.name = "simple";
+exports.options = {handler: {}, parser: {}};
+exports.html = "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag>";
+exports.expected = [
+  {
+    "event": "opentag",
+    "data": [
+      "tag",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "cdatastart",
+    "data": []
+  },
+  {
+    "event": "text",
+    "data": [
+      " asdf "
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "<"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "asdf>"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "<"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "/adsf>"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "<"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      ">"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      " fo"
+    ]
+  },
+  {
+    "event": "cdataend",
+    "data": []
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "tag"
+    ]
+  }
+];
\ No newline at end of file
diff --git a/tests/Feeds/03-rdf.js b/tests/Feeds/03-rdf.js
index 8d7d385..96e4c89 100644
--- a/tests/Feeds/03-rdf.js
+++ b/tests/Feeds/03-rdf.js
@@ -1,5 +1,4 @@
 exports.name = "RDF test";
-exports.type = "rss";
 exports.options = {
 	handler: {},
 	parser: {
@@ -10,17 +9,20 @@ exports.options = {
 exports.html = '<?xml version="1.0" encoding="UTF-8"?>\n<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">\n\t<channel rdf:about="http://sfbay.craigslist.org/ccc/">\n\t\t<title>craigslist | all community in SF bay area</title>\n\t\t<link>http://sfbay.craigslist.org/ccc/</link>\n\t\t<description/>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:publisher>webmaster@craigslist.org</dc:publisher>\n\t\t<dc:creator>webmaster@craigslist.org</dc:creator>\n\t\t<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>\n\t\t<dc:title>craigslist | all community in SF bay area</dc:title>\n\t\t<dc:type>Collection</dc:type>\n\t\t<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>\n\t\t<syn:updateFrequency>4</syn:updateFrequency>\n\t\t<syn:updatePeriod>hourly</syn:updatePeriod>\n\t\t<items>\n\t\t\t<rdf:Seq>\n\t\t\t\t<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>\n\t\t\t</rdf:Seq>\n\t\t</items>\n\t</channel>\n\t<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">\n\t\t<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</link>\n\t\t<description><![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:35:17-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</dc:source>\n\t\t<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>\n\t</item>\n\t<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">\n\t\t<title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</link>\n\t\t<description><![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:34:54-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</dc:source>\n\t\t<dc:title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>\n\t</item>\n</rdf:RDF>';
 
 exports.expected = {
-	type: 'rdf:RDF',
-	id: '',
-	title: 'craigslist | all community in SF bay area',
-	link: 'http://sfbay.craigslist.org/ccc/',
-	items: [{
-		title: '![CDATA[ Music Equipment Repair and Consignment ]]',
-		link: '\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n',
-		description: '![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065'
-	}, {
-		title: '![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]',
-		link: '\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n',
-		description: '![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.'
-	}]
+  "type": "rdf:RDF",
+  "id": "",
+  "title": "craigslist | all community in SF bay area",
+  "link": "http://sfbay.craigslist.org/ccc/",
+  "items": [
+    {
+      "title": " Music Equipment Repair and Consignment ",
+      "link": "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n",
+      "description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065"
+    },
+    {
+      "title": "\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n",
+      "link": "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n",
+      "description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101."
+    }
+  ]
 };
\ No newline at end of file

From b927498cc4cfcc7e796814cc0b3f7bb7cc89da2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 22 Dec 2011 19:58:20 +0100
Subject: [PATCH 146/450] Switched to ES6 proxies inside events test

If they are not available, use a static object
---
 package.json                      |  2 +-
 tests/03-events.js                | 61 +++++++++++++++++++------------
 tests/Events/01-simple.js         | 29 ++++++++++++---
 tests/Events/02-template.js       | 35 ++++++++++++------
 tests/Events/03-lowercase_tags.js | 29 ++++++++++++---
 5 files changed, 111 insertions(+), 45 deletions(-)

diff --git a/package.json b/package.json
index 6869a4f..b875cfb 100644
--- a/package.json
+++ b/package.json
@@ -17,7 +17,7 @@
 	},
 	"main": "./lib/",
 	"scripts": {
-		"test": "node tests/00-runtests.js"
+		"test": "node --harmony_proxies tests/00-runtests.js"
 	},
 	"engines": "node >= 0.3.0",
 	"licenses": [{
diff --git a/tests/03-events.js b/tests/03-events.js
index 0cbf54a..196db95 100644
--- a/tests/03-events.js
+++ b/tests/03-events.js
@@ -1,29 +1,44 @@
-var helper = require("./test-helper.js");
+var helper = require("./test-helper.js"),
+	sliceArr = Array.prototype.slice;
 
 exports.dir = "/Events/";
 
 exports.test = function(test, cb){
-	var tokens = [];
-	var cbs = {
-		onopentag: function(name, attributes){
-			tokens.push({event:"open", name: name, attributes: attributes});
-		},
-		onclosetag: function(name){
-			tokens.push({event:"close", name: name});
-		},
-		ontext: function(text){
-			tokens.push({event:"text", text: text});
-		},
-		oncomment: function(data){
-			tokens.push({event:"comment", data:data});
-		},
-		onprocessinginstruction: function(name, data){
-			tokens.push({event:"processing", name:name, data:data});
-		},
-		onend: function(){
-			//deletes all tokens
-			cb(null, tokens.splice(0));
-		}
-	};
+	var tokens = [], cbs;
+	if(typeof Proxy !== "undefined"){
+		cbs = Proxy.create({ get: function(a, name){
+			if(name === "onend"){
+				return function(){
+					cb(null, tokens.splice(0));
+				}
+			}
+			if(name === "onreset") return function(){};
+			return function(){
+				tokens.push({
+					event: name.substr(2),
+					data: sliceArr.apply(arguments)
+				});
+			}
+		}});
+	}
+	else{
+		cbs = {
+			onerror: cb,
+			onend: function(){
+				cb(null, tokens.splice(0));
+			}
+		};
+		["cdatastart", "cdataend", "text"
+		, "processinginstruction", "comment"
+		, "commentend", "closetag"
+		, "opentag"].forEach(function(name){
+			cbs["on" + name] = function(){
+				tokens.push({
+					event: name,
+					data: sliceArr.apply(arguments)
+				});
+			}
+		});
+	}
 	helper.writeToParser(cbs, test.options.parser, test.html);
 };
\ No newline at end of file
diff --git a/tests/Events/01-simple.js b/tests/Events/01-simple.js
index 2cf35f7..cfecf9c 100644
--- a/tests/Events/01-simple.js
+++ b/tests/Events/01-simple.js
@@ -1,8 +1,27 @@
 exports.name = "simple";
 exports.options = {handler: {}, parser: {}};
 exports.html = "<h1 class=test>adsf</h1>";
-exports.expected = [ { event: 'open',
-    name: 'h1',
-    attributes: { class: 'test' } },
-  { event: 'text', text: 'adsf' },
-  { event: 'close', name: 'h1' } ];
\ No newline at end of file
+exports.expected = [
+  {
+    "event": "opentag",
+    "data": [
+      "h1",
+      {
+        "class": "test"
+      },
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "adsf"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "h1"
+    ]
+  }
+];
\ No newline at end of file
diff --git a/tests/Events/02-template.js b/tests/Events/02-template.js
index 76a29ab..6f68857 100644
--- a/tests/Events/02-template.js
+++ b/tests/Events/02-template.js
@@ -3,30 +3,43 @@ exports.options = {handler: {}, parser: {}};
 exports.html = "<script type=\"text/template\"><h1>Heading1</h1></script>";
 exports.expected = [
   {
-    "event": "open",
-    "name": "script",
-    "attributes": {
-      "type": "text/template"
-    }
+    "event": "opentag",
+    "data": [
+      "script",
+      {
+        "type": "text/template"
+      },
+      "script"
+    ]
   },
   {
     "event": "text",
-    "text": "<h1"
+    "data": [
+      "<h1"
+    ]
   },
   {
     "event": "text",
-    "text": ">Heading1"
+    "data": [
+      ">Heading1"
+    ]
   },
   {
     "event": "text",
-    "text": "</h1"
+    "data": [
+      "</h1"
+    ]
   },
   {
     "event": "text",
-    "text": ">"
+    "data": [
+      ">"
+    ]
   },
   {
-    "event": "close",
-    "name": "script"
+    "event": "closetag",
+    "data": [
+      "script"
+    ]
   }
 ];
\ No newline at end of file
diff --git a/tests/Events/03-lowercase_tags.js b/tests/Events/03-lowercase_tags.js
index 1ef2ad5..42348f9 100644
--- a/tests/Events/03-lowercase_tags.js
+++ b/tests/Events/03-lowercase_tags.js
@@ -1,8 +1,27 @@
 exports.name = "simple";
 exports.options = {handler: {}, parser: {lowerCaseTags:true}};
 exports.html = "<H1 class=test>adsf</H1>";
-exports.expected = [ { event: 'open',
-    name: 'h1',
-    attributes: { class: 'test' } },
-  { event: 'text', text: 'adsf' },
-  { event: 'close', name: 'h1' } ];
\ No newline at end of file
+exports.expected = [
+  {
+    "event": "opentag",
+    "data": [
+      "h1",
+      {
+        "class": "test"
+      },
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "adsf"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "h1"
+    ]
+  }
+];
\ No newline at end of file

From 8c304017bb326151cbade4d64f185f73ab2073a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 23 Dec 2011 18:27:15 +0100
Subject: [PATCH 147/450] Improved attrib parsing (again)

---
 lib/Parser.js | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 082f8b4..25d01cb 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -13,8 +13,7 @@ function Parser(cbs, options){
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reAttrib = /\s([^=\"\'\s\/]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g,
-	_reAttribStart = /\s+[^=\"\'\s\/]/,
+var _reAttrib = /\s(\S+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|\s|\/|$)/g,
 	_reTail = /\s|\//;
 
 Parser.prototype._options = {
@@ -77,12 +76,9 @@ Parser.prototype.reset = function(){
 
 //parses the attribute string
 var parseAttributes = function(data){
-	var pos = data.search(_reAttribStart), attrs = {};
-	if(pos === -1) return attrs;
-	var attribRaw = data.substring(pos);
-
-	var match;
-	while(match = _reAttrib.exec(attribRaw)){
+	var attrs = {}, match;
+	
+	while(match = _reAttrib.exec(data)){
 		attrs[match[1]] = match[2] || match[3] || match[4] || match[1];
 	}
 

From e70304ac6e9dd29ae81d8964a798f7f3ab24a34d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 24 Dec 2011 13:00:55 +0100
Subject: [PATCH 148/450] Shortened Parser#_parseTagName

---
 lib/Parser.js | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 25d01cb..b44fbfb 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -14,7 +14,7 @@ function Parser(cbs, options){
 
 //Regular expressions used for cleaning up and parsing (stateless)
 var _reAttrib = /\s(\S+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|\s|\/|$)/g,
-	_reTail = /\s|\//;
+	_reTail = /\s|\/|$/;
 
 Parser.prototype._options = {
 	xmlMode: false, //Special behavior for script/style tags by default
@@ -23,16 +23,16 @@ Parser.prototype._options = {
 
 Parser.prototype._cbs = {
 	/*
-		oncdatastart,
 		oncdataend,
-		ontext,
-		onprocessinginstruction,
+		oncdatastart,
+		onclosetag,
 		oncomment,
 		oncommentend,
-		onclosetag,
-		onopentag,
 		onerror,
-		onreset
+		onopentag,
+		onprocessinginstruction,
+		onreset,
+		ontext
 	*/
 };
 
@@ -87,10 +87,7 @@ var parseAttributes = function(data){
 
 //Extracts the base tag name from the data value of an element
 Parser.prototype._parseTagName = function(data){
-	var pos = data.search(_reTail), match;
-	if(pos === -1) match = data;
-	else match = data.substr(0, pos);
-	
+	var match = data.substr(0, data.search(_reTail));
 	if(!this._options.lowerCaseTags) return match;
 	return match.toLowerCase();
 };

From 0f0d7561449d19335ff2f4513786c42674a60404 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 24 Dec 2011 13:01:43 +0100
Subject: [PATCH 149/450] Moved cb names to tests/test-helper, added additional
 tests to benchmark

---
 tests/03-events.js    |  5 +----
 tests/99-benchmark.js | 34 +++++++++++++++++++++++-----------
 tests/test-helper.js  |  4 +++-
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/tests/03-events.js b/tests/03-events.js
index 196db95..3552a7e 100644
--- a/tests/03-events.js
+++ b/tests/03-events.js
@@ -28,10 +28,7 @@ exports.test = function(test, cb){
 				cb(null, tokens.splice(0));
 			}
 		};
-		["cdatastart", "cdataend", "text"
-		, "processinginstruction", "comment"
-		, "commentend", "closetag"
-		, "opentag"].forEach(function(name){
+		helper.EVENTS.forEach(function(name){
 			cbs["on" + name] = function(){
 				tokens.push({
 					event: name,
diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
index 373a2fd..ed70b02 100644
--- a/tests/99-benchmark.js
+++ b/tests/99-benchmark.js
@@ -1,14 +1,26 @@
-var xml = Array(5e3).join("<!directive><tag attr='value'> text <!--Comment<>--></tag>"),
+var multiply = function(text){
+		return Array(5e3+1).join(text);
+	},
+	tests = {
+		self_closing: multiply("<br/>"),
+		tag: multiply("<tag foo=bar foobar> Text </tag>"),
+		comment: multiply("<!-- this is <<a> comment -->"),
+		directive: multiply("<?foo bar?>"),
+		special: multiply("<script> THIS IS <SPECIAL> </script>"),
+		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
+	}
 	empty = function(){},
-	parser = new (require("../lib/Parser.js"))({
-		onopentag: empty,
-		onclosetag: empty,
-		oncomment: empty,
-		oncommentend: empty,
-		onprocessinginstruction: empty
-	}),
+	cbs = {};
+
+require("./test-helper.js").EVENTS.forEach(function(name){
+    cbs["on" + name] = empty;
+});
+
+var parser = new (require("../lib/Parser.js"))(cbs),
 	ben = require("ben");
 
-console.log("Test took (ms)", ben(1e2, function(){
-	parser.parseComplete(xml);
-}));
\ No newline at end of file
+Object.keys(tests).forEach(function(name){
+	console.log("Test", name, "took", ben(150, function(){
+		parser.parseComplete(tests[name]);
+	}));
+});
\ No newline at end of file
diff --git a/tests/test-helper.js b/tests/test-helper.js
index f533505..8fc24fc 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -10,4 +10,6 @@ exports.writeToParser = function(handler, options, data){
 	parser.done();
 	//then parse everything
 	parser.parseComplete(data);
-}
\ No newline at end of file
+}
+
+exports.EVENTS = ["cdatastart", "cdataend", "text", "processinginstruction", "comment", "commentend", "closetag", "opentag"/*, "error", "end"*/];
\ No newline at end of file

From c36eed8af100b17f2563c11378b2b8294ce37721 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 24 Dec 2011 13:09:29 +0100
Subject: [PATCH 150/450] Fixed a bug with boolean attributes

`<tag foo bar>` just returned `{foo:"foo"}` as an attribute
---
 lib/Parser.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index b44fbfb..7b580e7 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -13,7 +13,7 @@ function Parser(cbs, options){
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reAttrib = /\s(\S+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|\s|\/|$)/g,
+var _reAttrib = /\s(\S+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
 	_reTail = /\s|\/|$/;
 
 Parser.prototype._options = {

From 9c8a5fb9cc97103f80f03291dafbe6df6eef338d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 28 Dec 2011 11:29:39 +0100
Subject: [PATCH 151/450] Use Array#lastIndexOf inside Parser#_processCloseTag

Might be a performance plus in future versions of node (currently it
doesn't have much impact, it's just more readable)
---
 lib/Parser.js | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 7b580e7..7acc5de 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -256,14 +256,13 @@ var emptyTags = {
 
 Parser.prototype._processCloseTag = function(name){
 	if(this._stack && (!emptyTags[name] || this._options.xmlMode)){
-		var i = this._stack.length;
-		while(i !== 0 && this._stack[--i] !== name){}
-		if(i !== 0 || this._stack[0] === name)
+		var pos = this._stack.lastIndexOf(name);
+		if(pos !== -1)
 			if(this._cbs.onclosetag){
-				while(i < this._stack.length)
-					this._cbs.onclosetag(this._stack.pop());
+				pos = this._stack.length - pos;
+				while(pos--) this._cbs.onclosetag(this._stack.pop());
 			}
-			else this._stack.splice(i);
+			else this._stack.splice(pos);
 	}
 	//many browsers (eg. Safari, Chrome) convert </br> to <br>
 	else if(name === "br" && !this._options.xmlMode)

From de99728f32af00253d7577e63f27aa41f95d01c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 31 Dec 2011 14:05:57 +0100
Subject: [PATCH 152/450] 2.1.3

---
 package.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/package.json b/package.json
index b875cfb..4b1ca20 100644
--- a/package.json
+++ b/package.json
@@ -1,8 +1,9 @@
 {
 	"name": "htmlparser2",
 	"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
-	"version": "2.1.2",
+	"version": "2.1.3",
 	"author": "Felix Boehm <me@feedic.com>",
+	"keywords": ["html", "parser", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
 	"repository": {
 		"type": "git",

From 0eb23186ecfda26a132213ffde65e69bb819ae30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 31 Dec 2011 14:06:28 +0100
Subject: [PATCH 153/450] Removed FeedHandler prototype

It's currently not finished
---
 lib/_FeedHandler.js | 108 --------------------------------------------
 1 file changed, 108 deletions(-)
 delete mode 100644 lib/_FeedHandler.js

diff --git a/lib/_FeedHandler.js b/lib/_FeedHandler.js
deleted file mode 100644
index 73059d4..0000000
--- a/lib/_FeedHandler.js
+++ /dev/null
@@ -1,108 +0,0 @@
-// NOT FINISHED YET! DON'T USE IT!
-
-//opening tags
-var searchRoot = function(tagName){
-	if(tagName === "rss" || tagName === "rdf:RDF" || tagName === "feed"){
-		if(tagName === "rdf:RDF") this.feed.type = "rdf";
-		else this.feed.type = tagName;
-		this._map = RssFeedMap;
-		this.onopentag = getChannelElement;
-	}
-	else if(tagName === "feed"){
-		this.feed.type = "atom";
-		this._map = AtomFeedMap;
-		this.onclosetag = getFeedElements;
-		this.ontext = writeText;
-		this.onopentag = getOpenTag;
-	}
-}
-
-var getChannelElement = function(tagName){
-	if(tagName === "channel"){
-		this.onopentag = getOpenTag;
-		this.onclosetag = getFeedElements;
-		this.ontext = writeText;
-	}
-}
-
-var getOpenTag = function(tagName, attribs){
-	this._level += 1;
-	if(tagName === this._childName){
-		if(this._feed.type === "atom"){
-		}
-		else{
-		
-		}
-	} else if(tagName === "link" && this._level === 1 
-		&& this._feed.type === "atom" && attribs.href){
-			this.feed.link = attribs.href;
-	}
-};
-
-//text
-var writeText = function(text){
-	if(this._stack[this._level]){
-		this._stack[this._level] += text;
-	} else this._stack[this._level] = text;
-};
-
-//closing tags
-var getFeedElements = function(tagName){
-	var text = this._stack.pop();
-	if(this._level-- === 1){
-		var elemName = this._map[tagName];
-		if(elemName){
-			if(elemName === "updated") text = Date(text);	
-			this._feed[elemName] = text;
-		}
-	}
-};
-
-//mappings
-var RssFeedMap = {
-	title: "title",
-	link: "link",
-	description: "description",
-	lastBuildDate: "updated",
-	managingEditor: "author"/*,
-	item: "item"*/
-};
-
-var RssItemMap = {
-
-};
-
-var AtomFeedMap = {
-	id: "id",
-	title: "title",
-	subtitle: "description",
-	updated: "updated",
-	email: "author"/*,
-	entry: "item"*/
-};
-
-var AtomItemMap = {
-
-};
-
-//TODO: make this a trully streamable handler
-function FeedHandler(callback, onitem){
-	this.onopentag = searchRoot;
-	this.feed = {
-		type: null,
-		id: "",
-		title: null,
-		link: null,
-		description: null,
-		updated: null,
-		author: null,
-		items: []
-	};
-	this._level = 0;
-	this._stack = [];
-	this._map = null;
-	this.onend = callback;
-	this.onitem = onitem; //called when a new item was found
-}
-
-module.exports = FeedHandler;
\ No newline at end of file

From c5d1bd3f969f19fbfdece36bf860e64a95d76eab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 31 Dec 2011 15:00:25 +0100
Subject: [PATCH 154/450] Added a Stream interface

might be useful when working with other streams (eg. generated by
request)
---
 lib/Stream.js | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/index.js  |  4 ++++
 2 files changed, 62 insertions(+)
 create mode 100644 lib/Stream.js

diff --git a/lib/Stream.js b/lib/Stream.js
new file mode 100644
index 0000000..bab2969
--- /dev/null
+++ b/lib/Stream.js
@@ -0,0 +1,58 @@
+var Parser = require("./Parser.js");
+
+var Stream = function(options){
+	Parser.call(this, new cbs(this), options);
+};
+
+require("util").inherits(Stream, require("stream"));
+
+//util.inherits would overwrite the prototype when called twice,
+//so we need a different approach
+Object.getOwnPropertyNames(Parser.prototype).forEach(function(name){
+	Stream.prototype[name] = Parser.prototype[name];
+});
+
+Stream.prototype.writable = true;
+Stream.prototype.readable = true;
+
+var cbs = function(scope){
+	this.scope = scope;
+};
+
+cbs.prototype = {
+	oncdataend: function(){
+		this.scope.emit("cdataend");
+	},
+	oncdatastart: function(){
+		this.scope.emit("cdatastart");
+	},
+	onclosetag: function(name){
+    	this.scope.emit("closetag", name);
+    },
+	oncomment: function(text){
+    	this.scope.emit("comment", text);
+    },
+	oncommentend: function(){
+		this.scope.emit("commentend");
+	},
+	onerror: function(err){
+    	this.scope.emit("error", err);
+    },
+	onopentag: function(name, attribs, type){
+    	this.scope.emit("opentag", name, attribs, type);
+    },
+	onprocessinginstruction: function(name, data){
+		this.scope.emit("processinginstruction", name, data);
+	},
+	onreset: function(){
+		this.scope.emit("reset");
+	},
+    ontext: function(text){
+    	this.scope.emit("text", text);
+    	//let the 'pipe' function do something useful
+    	//this.scope.emit("data", text);
+    }
+};
+
+module.exports = Stream;
+Stream.cbs = cbs;
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index 45b07cb..df40916 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -15,6 +15,10 @@ module.exports = {
 		Object.defineProperty(this, "ElementType", {value:require("./ElementType.js")});
 		return this.ElementType;
 	},
+	get Stream(){
+		Object.defineProperty(this, "Stream", {value:require("./Stream.js")});
+		return this.Stream;
+	},
 	get DomUtils(){
 		Object.defineProperty(this, "DomUtils", {value:require("./DomUtils.js")});
 		return this.DomUtils;

From d62f465d55e2c103ccddfaeb4b8197bbbcb2a0b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 31 Dec 2011 15:34:54 +0100
Subject: [PATCH 155/450] Minor changes

---
 lib/Parser.js | 2 +-
 lib/Stream.js | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 7acc5de..e73230a 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -183,7 +183,7 @@ Parser.prototype._parseTags = function(force){
 					}
 				}
 				else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
-				//This tag is a directive
+				//TODO: This isn't a processing instruction, needs a new name
 				else if(this._cbs.onprocessinginstruction){
 					this._cbs.onprocessinginstruction(
 						"!" + this._parseTagName(elementData.substr(1)), 
diff --git a/lib/Stream.js b/lib/Stream.js
index bab2969..350dfe1 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -54,5 +54,4 @@ cbs.prototype = {
     }
 };
 
-module.exports = Stream;
-Stream.cbs = cbs;
\ No newline at end of file
+module.exports = Stream;
\ No newline at end of file

From 2469a0d6fc272b320bff7601fc983286bba8a3ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 6 Jan 2012 13:44:00 +0100
Subject: [PATCH 156/450] Added two new events, `onopentagname` and `on
 attribut`

They should allow lighter functions
---
 lib/Parser.js                     | 12 ++++++++++++
 lib/Stream.js                     |  6 ++++++
 tests/Events/01-simple.js         | 13 +++++++++++++
 tests/Events/02-template.js       | 13 +++++++++++++
 tests/Events/03-lowercase_tags.js | 15 ++++++++++++++-
 tests/Events/04-cdata.js          |  8 +++++++-
 tests/test-helper.js              |  2 +-
 7 files changed, 66 insertions(+), 3 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index e73230a..741cc64 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -269,15 +269,27 @@ Parser.prototype._processCloseTag = function(name){
 		this._processOpenTag(name, "/");
 };
 
+Parser.prototype._parseAttributes = function(data){
+	for(var match; match = _reAttrib.exec(data);){
+		this._cbs.onattribute(match[1], match[2] || match[3] || match[4] || match[1]);
+	}
+};
+
 Parser.prototype._processOpenTag = function(name, data){
 	var type = ElementType.Tag;
 	if(this._options.xmlMode){ /*do nothing*/ }
 	else if(name === "script") type = ElementType.Script;
 	else if(name === "style")  type = ElementType.Style;
 	
+	if(this._cbs.onopentagname){
+		this._cbs.onopentagname(name);
+	}
 	if(this._cbs.onopentag){
 		this._cbs.onopentag(name, parseAttributes(data), type);
 	}
+	if(this._cbs.onattribute){
+		this._parseAttributes(data);
+	}
 	
 	//If tag self-terminates, add an explicit, separate closing tag
 	if(data.substr(-1) === "/" || (emptyTags[name] && !this._options.xmlMode)){
diff --git a/lib/Stream.js b/lib/Stream.js
index 350dfe1..f02abbf 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -40,6 +40,12 @@ cbs.prototype = {
     },
 	onopentag: function(name, attribs, type){
     	this.scope.emit("opentag", name, attribs, type);
+    },
+    onopentagname: function(name){
+    	this.scope.emit("opentagname", name);
+    },
+    onattribute: function(name, value){
+    	this.scope.emit("attribute", name, value);
     },
 	onprocessinginstruction: function(name, data){
 		this.scope.emit("processinginstruction", name, data);
diff --git a/tests/Events/01-simple.js b/tests/Events/01-simple.js
index cfecf9c..2307b05 100644
--- a/tests/Events/01-simple.js
+++ b/tests/Events/01-simple.js
@@ -2,6 +2,12 @@ exports.name = "simple";
 exports.options = {handler: {}, parser: {}};
 exports.html = "<h1 class=test>adsf</h1>";
 exports.expected = [
+  {
+    "event": "opentagname",
+    "data": [
+      "h1"
+    ]
+  },
   {
     "event": "opentag",
     "data": [
@@ -12,6 +18,13 @@ exports.expected = [
       "tag"
     ]
   },
+  {
+    "event": "attribute",
+    "data": [
+      "class",
+      "test"
+    ]
+  },
   {
     "event": "text",
     "data": [
diff --git a/tests/Events/02-template.js b/tests/Events/02-template.js
index 6f68857..467f02e 100644
--- a/tests/Events/02-template.js
+++ b/tests/Events/02-template.js
@@ -2,6 +2,12 @@ exports.name = "Template script tags";
 exports.options = {handler: {}, parser: {}};
 exports.html = "<script type=\"text/template\"><h1>Heading1</h1></script>";
 exports.expected = [
+  {
+    "event": "opentagname",
+    "data": [
+      "script"
+    ]
+  },
   {
     "event": "opentag",
     "data": [
@@ -12,6 +18,13 @@ exports.expected = [
       "script"
     ]
   },
+  {
+    "event": "attribute",
+    "data": [
+      "type",
+      "text/template"
+    ]
+  },
   {
     "event": "text",
     "data": [
diff --git a/tests/Events/03-lowercase_tags.js b/tests/Events/03-lowercase_tags.js
index 42348f9..55e8ac5 100644
--- a/tests/Events/03-lowercase_tags.js
+++ b/tests/Events/03-lowercase_tags.js
@@ -1,7 +1,13 @@
-exports.name = "simple";
+exports.name = "Lowercase tags";
 exports.options = {handler: {}, parser: {lowerCaseTags:true}};
 exports.html = "<H1 class=test>adsf</H1>";
 exports.expected = [
+  {
+    "event": "opentagname",
+    "data": [
+      "h1"
+    ]
+  },
   {
     "event": "opentag",
     "data": [
@@ -12,6 +18,13 @@ exports.expected = [
       "tag"
     ]
   },
+  {
+    "event": "attribute",
+    "data": [
+      "class",
+      "test"
+    ]
+  },
   {
     "event": "text",
     "data": [
diff --git a/tests/Events/04-cdata.js b/tests/Events/04-cdata.js
index 197235d..b0d18be 100644
--- a/tests/Events/04-cdata.js
+++ b/tests/Events/04-cdata.js
@@ -1,7 +1,13 @@
-exports.name = "simple";
+exports.name = "CDATA";
 exports.options = {handler: {}, parser: {}};
 exports.html = "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag>";
 exports.expected = [
+  {
+    "event": "opentagname",
+    "data": [
+      "tag"
+    ]
+  },
   {
     "event": "opentag",
     "data": [
diff --git a/tests/test-helper.js b/tests/test-helper.js
index 8fc24fc..07af5c9 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -12,4 +12,4 @@ exports.writeToParser = function(handler, options, data){
 	parser.parseComplete(data);
 }
 
-exports.EVENTS = ["cdatastart", "cdataend", "text", "processinginstruction", "comment", "commentend", "closetag", "opentag"/*, "error", "end"*/];
\ No newline at end of file
+exports.EVENTS = ["attribute", "cdatastart", "cdataend", "text", "processinginstruction", "comment", "commentend", "closetag", "opentag", "opentagname"/*, "error", "end"*/];
\ No newline at end of file

From 3ea9838e43abf3daaedbc292669dac26e821e74a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 6 Jan 2012 13:44:49 +0100
Subject: [PATCH 157/450] Moved feeds to /tests/Documents

They shouldn't be inside the test files
---
 tests/Documents/Atom_Example.xml | 25 +++++++++++++
 tests/Documents/RDF_Example.xml  | 63 ++++++++++++++++++++++++++++++++
 tests/Documents/RSS_Example.xml  | 48 ++++++++++++++++++++++++
 tests/Feeds/01-rss.js            | 49 +------------------------
 tests/Feeds/02-atom.js           | 28 +-------------
 tests/Feeds/03-rdf.js            |  2 +-
 6 files changed, 139 insertions(+), 76 deletions(-)
 create mode 100644 tests/Documents/Atom_Example.xml
 create mode 100644 tests/Documents/RDF_Example.xml
 create mode 100644 tests/Documents/RSS_Example.xml

diff --git a/tests/Documents/Atom_Example.xml b/tests/Documents/Atom_Example.xml
new file mode 100644
index 0000000..7349745
--- /dev/null
+++ b/tests/Documents/Atom_Example.xml
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- http://en.wikipedia.org/wiki/Atom_%28standard%29 -->
+<feed xmlns="http://www.w3.org/2005/Atom">
+	<title>Example Feed</title>
+	<subtitle>A subtitle.</subtitle>
+	<link href="http://example.org/feed/" rel="self" />
+	<link href="http://example.org/" />
+	<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
+	<updated>2003-12-13T18:30:02Z</updated>
+	<author>
+		<name>John Doe</name>
+		<email>johndoe@example.com</email>
+	</author>
+
+	<entry>
+		<title>Atom-Powered Robots Run Amok</title>
+		<link href="http://example.org/2003/12/13/atom03" />
+		<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
+		<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
+		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+		<updated>2003-12-13T18:30:02Z</updated>
+		<summary>Some text.</summary>
+	</entry>
+
+</feed>
\ No newline at end of file
diff --git a/tests/Documents/RDF_Example.xml b/tests/Documents/RDF_Example.xml
new file mode 100644
index 0000000..068da17
--- /dev/null
+++ b/tests/Documents/RDF_Example.xml
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">
+	<channel rdf:about="http://sfbay.craigslist.org/ccc/">
+		<title>craigslist | all community in SF bay area</title>
+		<link>http://sfbay.craigslist.org/ccc/</link>
+		<description/>
+		<dc:language>en-us</dc:language>
+		<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
+		<dc:publisher>webmaster@craigslist.org</dc:publisher>
+		<dc:creator>webmaster@craigslist.org</dc:creator>
+		<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>
+		<dc:title>craigslist | all community in SF bay area</dc:title>
+		<dc:type>Collection</dc:type>
+		<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>
+		<syn:updateFrequency>4</syn:updateFrequency>
+		<syn:updatePeriod>hourly</syn:updatePeriod>
+		<items>
+			<rdf:Seq>
+				<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>
+			</rdf:Seq>
+		</items>
+	</channel>
+	<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">
+		<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>
+		<link>
+http://sfbay.craigslist.org/sby/muc/2681301534.html
+</link>
+		<description><![CDATA[
+San Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->
+]]></description>
+		<dc:date>2011-11-04T09:35:17-07:00</dc:date>
+		<dc:language>en-us</dc:language>
+		<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
+		<dc:source>
+http://sfbay.craigslist.org/sby/muc/2681301534.html
+</dc:source>
+		<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>
+		<dc:type>text</dc:type>
+		<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>
+	</item>
+	<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">
+		<title><![CDATA[
+Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)
+]]></title>
+		<link>
+http://sfbay.craigslist.org/eby/rid/2685010755.html
+</link>
+		<description><![CDATA[
+Im offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->
+]]></description>
+		<dc:date>2011-11-04T09:34:54-07:00</dc:date>
+		<dc:language>en-us</dc:language>
+		<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>
+		<dc:source>
+http://sfbay.craigslist.org/eby/rid/2685010755.html
+</dc:source>
+		<dc:title><![CDATA[
+Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)
+]]></dc:title>
+		<dc:type>text</dc:type>
+		<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>
+	</item>
+</rdf:RDF>
\ No newline at end of file
diff --git a/tests/Documents/RSS_Example.xml b/tests/Documents/RSS_Example.xml
new file mode 100644
index 0000000..0d1fde8
--- /dev/null
+++ b/tests/Documents/RSS_Example.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0"?>
+<!-- http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
+<rss version="2.0">
+   <channel>
+      <title>Liftoff News</title>
+      <link>http://liftoff.msfc.nasa.gov/</link>
+      <description>Liftoff to Space Exploration.</description>
+      <language>en-us</language>
+      <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
+
+      <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
+      <docs>http://blogs.law.harvard.edu/tech/rss</docs>
+      <generator>Weblog Editor 2.0</generator>
+      <managingEditor>editor@example.com</managingEditor>
+      <webMaster>webmaster@example.com</webMaster>
+      <item>
+
+         <title>Star City</title>
+         <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
+         <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
+         <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
+         <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
+
+      </item>
+      <item>
+         <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
+         <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
+         <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
+
+      </item>
+      <item>
+         <title>The Engine That Does More</title>
+         <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
+         <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that.</description>
+         <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
+         <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
+
+      </item>
+      <item>
+         <title>Astronauts' Dirty Laundry</title>
+         <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
+         <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options.</description>
+         <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
+         <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
+
+      </item>
+   </channel>
+</rss>
\ No newline at end of file
diff --git a/tests/Feeds/01-rss.js b/tests/Feeds/01-rss.js
index 8059ed6..0af3e1a 100644
--- a/tests/Feeds/01-rss.js
+++ b/tests/Feeds/01-rss.js
@@ -6,54 +6,7 @@ exports.options = {
 	}
 };
 exports.type = "rss";
-//http://cyber.law.harvard.edu/rss/examples/rss2sample.xml
-exports.html = '<?xml version="1.0"?>\
-<rss version="2.0">\
-   <channel>\
-      <title>Liftoff News</title>\
-      <link>http://liftoff.msfc.nasa.gov/</link>\
-      <description>Liftoff to Space Exploration.</description>\
-      <language>en-us</language>\
-      <pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>\
-\
-      <lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>\
-      <docs>http://blogs.law.harvard.edu/tech/rss</docs>\
-      <generator>Weblog Editor 2.0</generator>\
-      <managingEditor>editor@example.com</managingEditor>\
-      <webMaster>webmaster@example.com</webMaster>\
-      <item>\
-\
-         <title>Star City</title>\
-         <link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>\
-         <description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia\'s &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>\
-         <pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>\
-         <guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>\
-\
-      </item>\
-      <item>\
-         <description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>\
-         <pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>\
-         <guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>\
-\
-      </item>\
-      <item>\
-         <title>The Engine That Does More</title>\
-         <link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>\
-         <description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that.</description>\
-         <pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>\
-         <guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>\
-\
-      </item>\
-      <item>\
-         <title>Astronauts\' Dirty Laundry</title>\
-         <link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>\
-         <description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options.</description>\
-         <pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>\
-         <guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>\
-\
-      </item>\
-   </channel>\
-</rss>';
+exports.html = require("fs").readFileSync(__dirname+"/../Documents/RSS_Example.xml").toString();
 exports.expected = {
 	type: "rss",
 	id: "",
diff --git a/tests/Feeds/02-atom.js b/tests/Feeds/02-atom.js
index 48674c9..a9c5c47 100644
--- a/tests/Feeds/02-atom.js
+++ b/tests/Feeds/02-atom.js
@@ -6,33 +6,7 @@ exports.options = {
 	}
 };
 exports.type = "rss";
-//http://en.wikipedia.org/wiki/Atom_%28standard%29
-exports.html = '<?xml version="1.0" encoding="utf-8"?>\
-\
-<feed xmlns="http://www.w3.org/2005/Atom">\
-\
-	<title>Example Feed</title>\
-	<subtitle>A subtitle.</subtitle>\
-	<link href="http://example.org/feed/" rel="self" />\
-	<link href="http://example.org/" />\
-	<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>\
-	<updated>2003-12-13T18:30:02Z</updated>\
-	<author>\
-		<name>John Doe</name>\
-		<email>johndoe@example.com</email>\
-	</author>\
-\
-	<entry>\
-		<title>Atom-Powered Robots Run Amok</title>\
-		<link href="http://example.org/2003/12/13/atom03" />\
-		<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>\
-		<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>\
-		<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>\
-		<updated>2003-12-13T18:30:02Z</updated>\
-		<summary>Some text.</summary>\
-	</entry>\
-\
-</feed>';
+exports.html = require("fs").readFileSync(__dirname+"/../Documents/Atom_Example.xml").toString();
 exports.expected = {
 	type: "atom",
 	id: "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
diff --git a/tests/Feeds/03-rdf.js b/tests/Feeds/03-rdf.js
index 96e4c89..90c05cb 100644
--- a/tests/Feeds/03-rdf.js
+++ b/tests/Feeds/03-rdf.js
@@ -6,7 +6,7 @@ exports.options = {
 	}
 };
 
-exports.html = '<?xml version="1.0" encoding="UTF-8"?>\n<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:ev="http://purl.org/rss/1.0/modules/event/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:admin="http://webns.net/mvcb/">\n\t<channel rdf:about="http://sfbay.craigslist.org/ccc/">\n\t\t<title>craigslist | all community in SF bay area</title>\n\t\t<link>http://sfbay.craigslist.org/ccc/</link>\n\t\t<description/>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:publisher>webmaster@craigslist.org</dc:publisher>\n\t\t<dc:creator>webmaster@craigslist.org</dc:creator>\n\t\t<dc:source>http://sfbay.craigslist.org/ccc//</dc:source>\n\t\t<dc:title>craigslist | all community in SF bay area</dc:title>\n\t\t<dc:type>Collection</dc:type>\n\t\t<syn:updateBase>2011-11-04T09:39:10-07:00</syn:updateBase>\n\t\t<syn:updateFrequency>4</syn:updateFrequency>\n\t\t<syn:updatePeriod>hourly</syn:updatePeriod>\n\t\t<items>\n\t\t\t<rdf:Seq>\n\t\t\t\t<rdf:li rdf:resource="http://sfbay.craigslist.org/sby/muc/2681301534.html"/>\n\t\t\t</rdf:Seq>\n\t\t</items>\n\t</channel>\n\t<item rdf:about="http://sfbay.craigslist.org/sby/muc/2681301534.html">\n\t\t<title><![CDATA[ Music Equipment Repair and Consignment ]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</link>\n\t\t<description><![CDATA[\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href="http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html" rel="nofollow">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:35:17-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n</dc:source>\n\t\t<dc:title><![CDATA[ Music Equipment Repair and Consignment ]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:35:17-07:00</dcterms:issued>\n\t</item>\n\t<item rdf:about="http://sfbay.craigslist.org/eby/rid/2685010755.html">\n\t\t<title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></title>\n\t\t<link>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</link>\n\t\t<description><![CDATA[\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n]]></description>\n\t\t<dc:date>2011-11-04T09:34:54-07:00</dc:date>\n\t\t<dc:language>en-us</dc:language>\n\t\t<dc:rights>Copyright 2011 craigslist, inc.</dc:rights>\n\t\t<dc:source>\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n</dc:source>\n\t\t<dc:title><![CDATA[\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n]]></dc:title>\n\t\t<dc:type>text</dc:type>\n\t\t<dcterms:issued>2011-11-04T09:34:54-07:00</dcterms:issued>\n\t</item>\n</rdf:RDF>';
+exports.html = require("fs").readFileSync(__dirname+"/../Documents/RDF_Example.xml").toString();
 
 exports.expected = {
   "type": "rdf:RDF",

From 34dd1b9389276a7d7279298d2e6d663276ba3098 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 6 Jan 2012 14:24:09 +0100
Subject: [PATCH 158/450] Added missing `end` event to Stream

---
 lib/Stream.js | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/Stream.js b/lib/Stream.js
index f02abbf..c4443f9 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -50,6 +50,9 @@ cbs.prototype = {
 	onprocessinginstruction: function(name, data){
 		this.scope.emit("processinginstruction", name, data);
 	},
+	onend: function(){
+		this.scope.emit("end");
+	},
 	onreset: function(){
 		this.scope.emit("reset");
 	},

From fbc29a9b8c48a7c34c91651cd118e0cf291c3d85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 6 Jan 2012 14:34:52 +0100
Subject: [PATCH 159/450] Added a test for Streams, made runtests async

---
 tests/00-runtests.js       |   54 +-
 tests/05-stream.js         |   53 ++
 tests/Documents/Basic.html |    1 +
 tests/HTML/01-basic.js     |    2 +-
 tests/Stream/01-basic.js   |   85 +++
 tests/Stream/02-RSS.js     | 1126 ++++++++++++++++++++++++++++++++++++
 6 files changed, 1295 insertions(+), 26 deletions(-)
 create mode 100644 tests/05-stream.js
 create mode 100644 tests/Documents/Basic.html
 create mode 100644 tests/Stream/01-basic.js
 create mode 100644 tests/Stream/02-RSS.js

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index af716b9..a8c8eb0 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -1,8 +1,8 @@
 var fs = require("fs");
 
-var testCount = 0,
-	failCount = 0,
-	totalTime = 0;
+var runCount = 0,
+	testCount = 0,
+	failCount = 0;
 
 function runTests(test){
 	var begin = Date.now();
@@ -14,15 +14,14 @@ function runTests(test){
 	}).forEach(function(file){
 		if(file === false) return;
 		var second = false,
-			failed = false,
-			start = Date.now(),
-			took = 0;
+			failed = false;
+		
+		runCount++;
 		
 		console.log("Testing:", file.name);
 		
 		test.test(file, function(err, dom){
 			if(err) console.log("Handler error:", err);
-			took += Date.now() - start;
 			
 			var expected = JSON.stringify(file.expected, null, 2),
 				got = JSON.stringify(dom, null, 2);
@@ -31,32 +30,37 @@ function runTests(test){
 				console.log("Expected", expected, "Got", got, second);
 			}
 			
-			start = Date.now();
-			
 			if(second){
-				testCount+=1;
-				if(failed) failCount+=1;
+				runCount--;
+				testCount++;
+				if(failed) failCount++;
 				
-				console.log("["+file.name+"]:",failed?"failed":"passed","(took",took,"ms)"); 
+				console.log("["+file.name+"]:", failed ? "failed":"passed"); 
 			}
 			else second = true;
 		});
 	});
-	var took = Date.now()-begin;
-	totalTime+=took;
-	console.log(test.dir,"took",took);
+	console.log("->", test.dir.slice(1, -1), "iterated");
 };
 
 //run all tests
-["./01-html.js", "./02-feed.js", "./03-events.js", "./04-dom_utils.js"]
-	.map(require)
-	.forEach(runTests);
+[
+ "./01-html.js",
+ "./02-feed.js",
+ "./03-events.js",
+ "./04-dom_utils.js",
+ "./05-stream.js"
+].map(require).forEach(runTests);
 
 //log the results
-console.log("Total time:", totalTime);
-console.log("Total tests:", testCount);
-console.log("Failed tests:", failCount);
-
-if(failCount !== 0){
-	throw Error("Encountered " + failCount + " errors!");
-}
\ No newline at end of file
+(function check(){
+	if(runCount !== 0){
+		return setTimeout(check, 50);
+	}
+	console.log("Total tests:", testCount);
+	console.log("Failed tests:", failCount);
+	
+	if(failCount !== 0){
+		throw Error("Encountered " + failCount + " errors!");
+	}
+})();
\ No newline at end of file
diff --git a/tests/05-stream.js b/tests/05-stream.js
new file mode 100644
index 0000000..92f45bb
--- /dev/null
+++ b/tests/05-stream.js
@@ -0,0 +1,53 @@
+var helper = require("./test-helper.js"),
+	Stream = require("..").Stream,
+	sliceArr = Array.prototype.slice,
+	fs = require("fs");
+
+exports.dir = "/Stream/";
+
+exports.test = function(test, cb){
+	var tokens = [],
+		stream = new Stream(test.options),
+		second = false;
+	
+	if(typeof Proxy !== "undefined"){
+		stream._events = Proxy.create({ get: function(a, name){
+			if(name === "end"){
+				return function(){
+					cb(null, tokens.splice(0));
+					if(!second){
+						second = true;
+						stream.parseComplete(fs.readFileSync(__dirname + test.file).toString());
+					}
+				};
+			}
+			if(helper.EVENTS.indexOf(name) !== -1) return function(){
+				tokens.push({
+					event: name,
+					data: sliceArr.apply(arguments)
+				});
+			}
+		}});
+	}
+	else {
+		stream._events = {
+			error: cb,
+			end: function(){
+				cb(null, tokens.splice(0));
+				if(!second){
+					second = true;
+					stream.parseComplete(fs.readFileSync(__dirname + test.file).toString());
+				}
+			}
+		};
+		helper.EVENTS.forEach(function(name){
+			stream._events[name] = function(){
+				tokens.push({
+					event: name,
+					data: sliceArr.apply(arguments)
+				});
+			}
+		});
+	}
+	fs.createReadStream(__dirname + test.file).pipe(stream);
+};
\ No newline at end of file
diff --git a/tests/Documents/Basic.html b/tests/Documents/Basic.html
new file mode 100644
index 0000000..65957a2
--- /dev/null
+++ b/tests/Documents/Basic.html
@@ -0,0 +1 @@
+<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>
\ No newline at end of file
diff --git a/tests/HTML/01-basic.js b/tests/HTML/01-basic.js
index cba7edd..beaa970 100644
--- a/tests/HTML/01-basic.js
+++ b/tests/HTML/01-basic.js
@@ -3,7 +3,7 @@ exports.options = {
 	  handler: {}
 	, parser: {}
 };
-exports.html = "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>";
+exports.html = require("fs").readFileSync(__dirname + "/../Documents/Basic.html").toString();
 exports.expected = [
   {
     "name": "!DOCTYPE",
diff --git a/tests/Stream/01-basic.js b/tests/Stream/01-basic.js
new file mode 100644
index 0000000..eada9ab
--- /dev/null
+++ b/tests/Stream/01-basic.js
@@ -0,0 +1,85 @@
+exports.name = "Basic html";
+exports.options = {};
+
+exports.file = "/Documents/Basic.html";
+exports.expected = [
+  {
+    "event": "processinginstruction",
+    "data": [
+      "!DOCTYPE",
+      "!DOCTYPE html"
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "html"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "html",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "title",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "The Title"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "body"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "body",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Hello world"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "body"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "html"
+    ]
+  }
+];
\ No newline at end of file
diff --git a/tests/Stream/02-RSS.js b/tests/Stream/02-RSS.js
new file mode 100644
index 0000000..8accd12
--- /dev/null
+++ b/tests/Stream/02-RSS.js
@@ -0,0 +1,1126 @@
+exports.name = "RSS feed";
+exports.options = {};
+
+exports.file = "/Documents/RSS_Example.xml";
+exports.expected = [
+  {
+    "event": "processinginstruction",
+    "data": [
+      "?xml",
+      "?xml version=\"1.0\"?"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n"
+    ]
+  },
+  {
+    "event": "comment",
+    "data": [
+      " http://cyber.law.harvard.edu/rss/examples/rss2sample.xml "
+    ]
+  },
+  {
+    "event": "commentend",
+    "data": []
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n"
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "rss"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "rss",
+      {
+        "version": "2.0"
+      },
+      "tag"
+    ]
+  },
+  {
+    "event": "attribute",
+    "data": [
+      "version",
+      "2.0"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n   "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "channel"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "channel",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "title",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Liftoff News"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "link"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "link",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "link"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://liftoff.msfc.nasa.gov/"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "description",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Liftoff to Space Exploration."
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "language"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "language",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "en-us"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "language"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "pubDate",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Tue, 10 Jun 2003 04:00:00 GMT"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "lastBuildDate"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "lastBuildDate",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Tue, 10 Jun 2003 09:41:01 GMT"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "lastBuildDate"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "docs"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "docs",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://blogs.law.harvard.edu/tech/rss"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "docs"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "generator"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "generator",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Weblog Editor 2.0"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "generator"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "managingEditor"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "managingEditor",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "editor@example.com"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "managingEditor"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "webMaster"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "webMaster",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "webmaster@example.com"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "webMaster"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "item"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "item",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "title",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Star City"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "link"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "link",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "link"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "description",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\"&gt;Star City&lt;/a&gt;."
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "pubDate",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Tue, 03 Jun 2003 09:39:21 GMT"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "guid"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "guid",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "guid"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n\n      "
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "item"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "item"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "item",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "description",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st."
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "pubDate",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Fri, 30 May 2003 11:06:42 GMT"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "guid"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "guid",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "guid"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n\n      "
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "item"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "item"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "item",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "title",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "The Engine That Does More"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "link"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "link",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "link"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "description",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that."
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "pubDate",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Tue, 27 May 2003 08:37:32 GMT"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "guid"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "guid",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "guid"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n\n      "
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "item"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n      "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "item"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "item",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "title",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Astronauts' Dirty Laundry"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "title"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "link"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "link",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "link"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "description",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options."
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "description"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "pubDate",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "Tue, 20 May 2003 08:56:02 GMT"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "pubDate"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n         "
+    ]
+  },
+  {
+    "event": "opentagname",
+    "data": [
+      "guid"
+    ]
+  },
+  {
+    "event": "opentag",
+    "data": [
+      "guid",
+      {},
+      "tag"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "guid"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n\n      "
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "item"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n   "
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "channel"
+    ]
+  },
+  {
+    "event": "text",
+    "data": [
+      "\n"
+    ]
+  },
+  {
+    "event": "closetag",
+    "data": [
+      "rss"
+    ]
+  }
+];
\ No newline at end of file

From 98098ed5880dc651f42f6d83e6152048c8303fe6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 7 Jan 2012 22:33:50 +0100
Subject: [PATCH 160/450] Changed comments

---
 lib/ElementType.js | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/ElementType.js b/lib/ElementType.js
index b2c8a10..618465e 100644
--- a/lib/ElementType.js
+++ b/lib/ElementType.js
@@ -1,10 +1,10 @@
 //Types of elements found in the DOM
 module.exports = {
-	Text: "text", //Plain text
-	Directive: "directive", //Special tag <!...>
-	Comment: "comment", //Special tag <!--...-->
-	Script: "script", //Special tag <script>...</script>
-	Style: "style", //Special tag <style>...</style>
-	Tag: "tag", //Any tag that isn't special
-	CDATA: "cdata"
+	Text: "text", //Text
+	Directive: "directive", //<? ... ?>
+	Comment: "comment", //<!-- ... -->
+	Script: "script", //<script> tags
+	Style: "style", //<style> tags
+	Tag: "tag", //Any tag
+	CDATA: "cdata" //<![CDATA[ ... ]]>
 };
\ No newline at end of file

From 4a3a310e2ac5e3cfb4cd327d55826c701b4cb6d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 7 Jan 2012 22:34:23 +0100
Subject: [PATCH 161/450] Introduced TagValues in Parser

To simplify logic inside #_parseTags
---
 lib/Parser.js | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 741cc64..0916fab 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -100,6 +100,11 @@ SpecialTags[ElementType.Script] = 2; //2^1
 SpecialTags[ElementType.Comment] = 4; //2^2
 SpecialTags[ElementType.CDATA] = 8; //2^3
 
+var TagValues = {
+	style: 1,
+	script: 2
+};
+
 //Parses through HTML text and returns an array of found elements
 Parser.prototype._parseTags = function(force){
 	var buffer = this._buffer, current = 0;
@@ -150,13 +155,9 @@ Parser.prototype._parseTags = function(force){
 				elementData = this._parseTagName(elementData.substr(1));
 				if(this._contentFlags !== 0){
 					//if it's a closing tag, remove the flag
-					if(this._contentFlags === SpecialTags[ElementType.Script] && elementData === "script"){
-						//remove the script flag
-						this._contentFlags -= SpecialTags[ElementType.Script];
-					}
-					else if(this._contentFlags === SpecialTags[ElementType.Style] && elementData === "style"){
-						//remove the style flag
-						this._contentFlags -= SpecialTags[ElementType.Style];
+					if(this._contentFlags >= TagValues[elementData]){
+						//remove the flag
+						this._contentFlags -= TagValues[elementData];
 					}
 					else {
 						this._writeSpecial(rawData, lastTagSep);

From 29cac6debbd37f4a5862651880ea67b80afaa7e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 7 Jan 2012 23:52:16 +0100
Subject: [PATCH 162/450] Removed proxy flag from tests

Failed on node 0.4
---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 4b1ca20..46266d9 100644
--- a/package.json
+++ b/package.json
@@ -18,7 +18,7 @@
 	},
 	"main": "./lib/",
 	"scripts": {
-		"test": "node --harmony_proxies tests/00-runtests.js"
+		"test": "node tests/00-runtests.js"
 	},
 	"engines": "node >= 0.3.0",
 	"licenses": [{

From 0c678c30e2f09912d6e22828685a754eb431786c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 7 Jan 2012 23:57:26 +0100
Subject: [PATCH 163/450] Fix for `stream` module in node 0.4.x

---
 lib/Stream.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Stream.js b/lib/Stream.js
index c4443f9..6b03e2c 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -4,7 +4,7 @@ var Stream = function(options){
 	Parser.call(this, new cbs(this), options);
 };
 
-require("util").inherits(Stream, require("stream"));
+require("util").inherits(Stream, require("stream").Stream);
 
 //util.inherits would overwrite the prototype when called twice,
 //so we need a different approach

From 74de18a4bee677fe40136872a195eec9eeef7cde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 9 Jan 2012 15:25:34 +0100
Subject: [PATCH 164/450] Renamed DefaultHandler to DomHandler

The new name expresses better what it does.

The old name is still available via
`require("htmlparser2").DefaultHandler`, so support for code written
for `htmlparser` didn't break.
---
 lib/{DefaultHandler.js => DomHandler.js} | 28 ++++++++++++------------
 lib/FeedHandler.js                       |  8 +++----
 lib/index.js                             | 26 +++++++++++++++-------
 tests/01-html.js                         |  2 +-
 tests/04-dom_utils.js                    |  2 +-
 5 files changed, 38 insertions(+), 28 deletions(-)
 rename lib/{DefaultHandler.js => DomHandler.js} (75%)

diff --git a/lib/DefaultHandler.js b/lib/DomHandler.js
similarity index 75%
rename from lib/DefaultHandler.js
rename to lib/DomHandler.js
index bcb1c05..2e53103 100644
--- a/lib/DefaultHandler.js
+++ b/lib/DomHandler.js
@@ -1,6 +1,6 @@
 var ElementType = require("./ElementType.js");
 
-function DefaultHandler(callback, options){
+function DomHandler(callback, options){
 	this.dom = [];
 	this._done = false;
 	this._inSpecialTag = false;
@@ -13,22 +13,22 @@ function DefaultHandler(callback, options){
 }
 
 //default options
-DefaultHandler.prototype._options = {
+DomHandler.prototype._options = {
 	ignoreWhitespace: false //Keep whitespace-only text nodes
 };
 
 //Resets the handler back to starting state
-DefaultHandler.prototype.onreset = DefaultHandler;
+DomHandler.prototype.onreset = DomHandler;
 
 //Signals the handler that parsing is done
-DefaultHandler.prototype.onend = function(){
+DomHandler.prototype.onend = function(){
 	if(this._done) return;
 	this._done = true;
 	this._handleCallback(null);
 };
 
-DefaultHandler.prototype._handleCallback = 
-DefaultHandler.prototype.onerror = function(error){
+DomHandler.prototype._handleCallback = 
+DomHandler.prototype.onerror = function(error){
 	if(typeof this._callback === "function"){
 		this._callback(error, this.dom);
 	} else {
@@ -36,11 +36,11 @@ DefaultHandler.prototype.onerror = function(error){
 	}
 };
 
-DefaultHandler.prototype.onclosetag = function(name){
+DomHandler.prototype.onclosetag = function(name){
 	if(this._tagStack.pop().name !== name) this._handleCallback(Error("tagname didn't match!"));
 };
 
-DefaultHandler.prototype._addDomElement = function(element){
+DomHandler.prototype._addDomElement = function(element){
 	var lastChild,
 		lastTag = this._tagStack[this._tagStack.length - 1];
 	
@@ -61,7 +61,7 @@ DefaultHandler.prototype._addDomElement = function(element){
 	}
 };
 
-DefaultHandler.prototype.onopentag = function(name, attribs, type){
+DomHandler.prototype.onopentag = function(name, attribs, type){
 	if(type === ElementType.Script || type === ElementType.Style){
 		this._inSpecialTag = true;
 	}
@@ -78,7 +78,7 @@ DefaultHandler.prototype.onopentag = function(name, attribs, type){
 	this._tagStack.push(element);
 };
 
-DefaultHandler.prototype.ontext = function(data){
+DomHandler.prototype.ontext = function(data){
 	if(this._options.ignoreWhitespace && data.trim() === "") return;
 	this._addDomElement({
 		data: data,
@@ -86,7 +86,7 @@ DefaultHandler.prototype.ontext = function(data){
 	});
 };
 
-DefaultHandler.prototype.oncomment = function(data){
+DomHandler.prototype.oncomment = function(data){
 	var lastTag = this._tagStack[this._tagStack.length - 1];
 
 	if(lastTag && lastTag.type === ElementType.Comment){
@@ -106,11 +106,11 @@ DefaultHandler.prototype.oncomment = function(data){
 	this._tagStack.push(element);
 };
 
-DefaultHandler.prototype.oncommentend = function(){
+DomHandler.prototype.oncommentend = function(){
 	this._tagStack.pop();
 };
 
-DefaultHandler.prototype.onprocessinginstruction = function(name, data){
+DomHandler.prototype.onprocessinginstruction = function(name, data){
 	this._addDomElement({
 		name: name,
 		data: data,
@@ -118,4 +118,4 @@ DefaultHandler.prototype.onprocessinginstruction = function(name, data){
 	});
 };
 
-module.exports = DefaultHandler;
\ No newline at end of file
+module.exports = DomHandler;
\ No newline at end of file
diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 8df0c67..8c2720e 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -1,4 +1,4 @@
-var DefaultHandler = require("./DefaultHandler.js"),
+var DomHandler = require("./DomHandler.js"),
 	DomUtils = require("./DomUtils.js");
 
 //TODO: make this a streamable handler
@@ -6,9 +6,9 @@ function FeedHandler(callback){
 	this.init(callback, { ignoreWhitespace: true });
 }
 
-require("util").inherits(FeedHandler, DefaultHandler);
+require("util").inherits(FeedHandler, DomHandler);
 
-FeedHandler.prototype.init = DefaultHandler;
+FeedHandler.prototype.init = DomHandler;
 
 function getElements(what, where, one, recurse){
 	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
@@ -100,7 +100,7 @@ FeedHandler.prototype.onend = function() {
 		}
 	}
 	this.dom = feed;
-	DefaultHandler.prototype._handleCallback.call(this);
+	DomHandler.prototype._handleCallback.call(this);
 };
 
 module.exports = FeedHandler;
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index df40916..46add8d 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -1,26 +1,36 @@
+var defineProp = Object.defineProperty;
+
 module.exports = {
 	get Parser(){
-		Object.defineProperty(this, "Parser", {value:require("./Parser.js")});
+		defineProp(this, "Parser", {value:require("./Parser.js")});
 		return this.Parser;
 	},
-	get DefaultHandler(){
-		Object.defineProperty(this, "DefaultHandler", {value:require("./DefaultHandler.js")});
-		return this.DefaultHandler;
+	get DomHandler(){
+		defineProp(this, "DomHandler", {value:require("./DomHandler.js")});
+		return this.DomHandler;
 	},
 	get FeedHandler(){
-		Object.defineProperty(this, "FeedHandler", {value:require("./FeedHandler.js")});
+		defineProp(this, "FeedHandler", {value:require("./FeedHandler.js")});
 		return this.FeedHandler;
 	},
 	get ElementType(){
-		Object.defineProperty(this, "ElementType", {value:require("./ElementType.js")});
+		defineProp(this, "ElementType", {value:require("./ElementType.js")});
 		return this.ElementType;
 	},
 	get Stream(){
-		Object.defineProperty(this, "Stream", {value:require("./Stream.js")});
+		defineProp(this, "Stream", {value:require("./Stream.js")});
 		return this.Stream;
 	},
 	get DomUtils(){
-		Object.defineProperty(this, "DomUtils", {value:require("./DomUtils.js")});
+		defineProp(this, "DomUtils", {value:require("./DomUtils.js")});
 		return this.DomUtils;
+	},
+	get DefaultHandler(){
+		defineProp(this, "DefaultHandler", {value: this.DomHandler});
+		return this.DefaultHandler;
+	},
+	get RssHandler(){
+		defineProp(this, "RssHandler", {value: this.FeedHandler});
+		return this.FeedHandler;
 	}
 }
\ No newline at end of file
diff --git a/tests/01-html.js b/tests/01-html.js
index f20d59c..578669d 100644
--- a/tests/01-html.js
+++ b/tests/01-html.js
@@ -1,7 +1,7 @@
 //Runs tests for HTML
 
 var helper = require("./test-helper.js"),
-	DefaultHandler = require("../lib/DefaultHandler.js");
+	DefaultHandler = require("../lib/DomHandler.js");
 
 exports.dir = "/HTML/";
 
diff --git a/tests/04-dom_utils.js b/tests/04-dom_utils.js
index 1c8809b..0d14c6c 100644
--- a/tests/04-dom_utils.js
+++ b/tests/04-dom_utils.js
@@ -1,5 +1,5 @@
 //generate a dom
-var handler = new (require("../lib/DefaultHandler.js"))();
+var handler = new (require("../lib/DomHandler.js"))();
 
 (new (require("../lib/Parser.js"))(handler)).parseComplete(
 	Array(21).join("<?xml><tag1 id='asdf'> <script>text</script> <!-- comment --> <tag2> text </tag1>")

From c3259d3bb7e4fb4898538e31cd5f0c537de6d6e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 9 Jan 2012 15:27:37 +0100
Subject: [PATCH 165/450] Use the new name inside the readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d6697af..1a4fb12 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ The support for location data and verbose output was removed a couple of version
 ```javascript
 var htmlparser = require("htmlparser");
 var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< /  script><!--<!-- Waah! -- -->";
-var handler = new htmlparser.DefaultHandler(function (error, dom) {
+var handler = new htmlparser.DomHandler(function (error, dom) {
     if (error)
     	[...do something for errors...]
     else

From 6b61835fcae3716bb3d0333463ed04716df013fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 9 Jan 2012 15:53:22 +0100
Subject: [PATCH 166/450] Updated readme, changed examples from DomHandler to
 just the parser

---
 README.md | 75 ++++++++++++++++++++++++-------------------------------
 1 file changed, 32 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index 1a4fb12..ce08800 100644
--- a/README.md
+++ b/README.md
@@ -12,59 +12,52 @@ This project is linked to [Travis CI](http://travis-ci.org/). The latest builds
 [![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser)
 
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
-This is a fork of the project above. The main difference is that this is just intended to be used with node. Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
+This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
 
-Besides, the parser now provides the interface of [sax.js](https://github.com/isaacs/sax-js) (originally intended for my readability port [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
+The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
 
-The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose) (if you really need it, for whatever reason that may be).
+The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose) (if you really need it, for whatever reason that may be). 
+
+The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.
 
 ##Usage
 
 ```javascript
-var htmlparser = require("htmlparser");
-var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< /  script><!--<!-- Waah! -- -->";
-var handler = new htmlparser.DomHandler(function (error, dom) {
-    if (error)
-    	[...do something for errors...]
-    else
-    	[...parsing done, do something...]
-        console.log(dom);
+var htmlparser = require("htmlparser2");
+var parser = new htmlparser.Parser({
+	onopentag: function(name, attribs){
+		if(name === "script" && attribs["language"] === "javascript"){
+			console.log("JS! Hooray!");
+		}
+	},
+	ontext: function(text){
+		console.log(text);
+	},
+	onclosetag: function(tagname){
+		if(name === "script"){
+			console.log("That's it?!");
+		}
+	}
 });
-var parser = new htmlparser.Parser(handler);
-parser.write(rawHtml);
+parser.write("Xyz <script language= javascript>var foo = '<<bar>>';< /  script>");
 parser.done();
 ```
 
 Output:
 
 ```javascript
-[{
-    data: 'Xyz ',
-    type: 'text'
-}, {
-    type: 'script',
-    name: 'script',
-    attribs: {
-    	language: 'javascript'
-    },
-    children: [{
-    	data: 'var foo = \'<bar>\';<',
-    	type: 'text'
-    }]
-}, {
-    data: '<!-- Waah! -- ',
-    type: 'comment'
-}]
+--> Xyz 
+JS! Hooray!
+--> var foo = '<<bar>>';
+That's it?!
 ```
 
-##Streaming To Parser
-```javascript
-while (...) {
-    ...
-    parser.write(chunk);
-}
-parser.done();
-```
+Read more about the parser in the [wiki](https://github.com/FB55/node-htmlparser/wiki/Parser-options).
+
+##Get a DOM
+The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that may be manipulated using the `DomUtils` helper.
+
+Read more about the DomHandler in the [wiki](https://github.com/FB55/node-htmlparser/wiki/DomHandler).
 
 ##Parsing RSS/RDF/Atom Feeds
 
@@ -72,8 +65,4 @@ parser.done();
 new htmlparser.FeedHandler(function (error, feed) {
     ...
 });
-```
-
-##Further reading
-* [Parser options](https://github.com/FB55/node-htmlparser/wiki/Parser-options)
-* [DefaultHandler options](https://github.com/FB55/node-htmlparser/wiki/DefaultHandler-options)
\ No newline at end of file
+```
\ No newline at end of file

From 17a6f9edbaf70be9f88c1de5d4ab54a13c4d8c9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 9 Jan 2012 15:55:12 +0100
Subject: [PATCH 167/450] Removed information about running test

moved travis badge
---
 README.md | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index ce08800..c9728c0 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,10 @@
-#htmlparser2
+#htmlparser2 [![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser)
+
 A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
 
 ##Installing
 	npm install htmlparser2
 
-##Running Tests
-	node tests/00-runtests.js
-
-This project is linked to [Travis CI](http://travis-ci.org/). The latest builds status is:
-
-[![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser)
-
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
 This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
 

From ebaadb716c2fd812de010e9b5265e74b9c51f119 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 9 Jan 2012 16:03:45 +0100
Subject: [PATCH 168/450] Fixed bug in example

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c9728c0..ce0064a 100644
--- a/README.md
+++ b/README.md
@@ -25,10 +25,10 @@ var parser = new htmlparser.Parser({
 		}
 	},
 	ontext: function(text){
-		console.log(text);
+		console.log("-->", text);
 	},
 	onclosetag: function(tagname){
-		if(name === "script"){
+		if(tagname === "script"){
 			console.log("That's it?!");
 		}
 	}

From 6960af42d843a386dd22f6bed9aa6040a190adc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 13 Jan 2012 17:17:15 +0100
Subject: [PATCH 169/450] Changed compare

---
 lib/FeedHandler.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 8c2720e..9a9b019 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -16,7 +16,7 @@ function getElements(what, where, one, recurse){
 }
 function fetch(what, where, recurse){
 	var ret = getElements(what, where, true, recurse);
-	if(ret && (ret = ret.children) && ret.length > 0) return ret[0].data;
+	if(ret && (ret = ret.children) && ret.length !== 0) return ret[0].data;
 	else return false;
 }
 

From f4650ffa94b9acd6322efdade5b9a5e9e7900ced Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 13 Jan 2012 17:19:16 +0100
Subject: [PATCH 170/450] Added Parser#pause and Parser#resume

---
 lib/Parser.js | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 0916fab..aefd476 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -10,6 +10,7 @@ function Parser(cbs, options){
 	this._wroteSpecial = false;
 	this._contentFlags = 0;
 	this._done = false;
+	this._paused = false;
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
@@ -48,7 +49,7 @@ Parser.prototype.write =
 Parser.prototype.parseChunk = function(data){
 	if(this._done) this._handleError(Error("Attempted to parse chunk after parsing already done"));
 	this._buffer += data; //FIXME: this can be a bottleneck
-	this._parseTags();
+	if(!this._paused) this._parseTags();
 };
 
 //Tells the parser that the HTML being parsed is complete
@@ -57,7 +58,11 @@ Parser.prototype.end = Parser.prototype.done = function(chunk){
 
 	if(chunk) this.write(chunk);
 	this._done = true;
+	
+	if(!this._paused) this._finishParsing();
+};
 
+Parser.prototype._finishParsing = function(){
 	//Parse the buffer to its end
 	if(this._buffer) this._parseTags(true);
 	
@@ -68,6 +73,16 @@ Parser.prototype.end = Parser.prototype.done = function(chunk){
 	if(this._cbs.onend) this._cbs.onend();
 };
 
+Parser.prototype.pause = function(){
+	if(!this._done) this._paused = true;
+};
+
+Parser.prototype.resume = function(){
+	this._paused = false;
+	this._parseTags();
+	if(this._done) this._finishParsing();
+};
+
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
 	Parser.call(this);
@@ -116,7 +131,8 @@ Parser.prototype._parseTags = function(force){
 	//if force is true, parse everything
 	if(force) opening = 1/0;
 
-	while(opening !== closing){ //just false if both are -1
+	//opening !== closing is just false if both are -1
+	while(opening !== closing && !this._paused){
 		lastTagSep = this._tagSep;
 		
 		if((opening !== -1 && opening < closing) || closing === -1){

From 2d723258e5d92ef3d7ae9363f7ea486a26048e29 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 13 Jan 2012 19:39:51 +0100
Subject: [PATCH 171/450] Updated package.json

---
 package.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/package.json b/package.json
index 46266d9..4ddd2cf 100644
--- a/package.json
+++ b/package.json
@@ -1,9 +1,9 @@
 {
 	"name": "htmlparser2",
-	"description": "Forgiving HTML/XML/RSS Parser for Node. This version is optimised and cleaned and provides a SAX interface.",
-	"version": "2.1.3",
+	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
+	"version": "2.2.0",
 	"author": "Felix Boehm <me@feedic.com>",
-	"keywords": ["html", "parser", "xml", "dom", "rss", "feed", "atom"],
+	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
 	"repository": {
 		"type": "git",

From 387b2097d3579b37e7b9d12c07ee4057118fb7ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 13 Jan 2012 19:44:31 +0100
Subject: [PATCH 172/450] Renamed Parser#_paused to _running, added check in
 Parser#continue, added logic to write ">" inside text fields

The new name for paused doesn't require negating it in every check.
If anyone wrote >s inside their code without any context, they
shouldn't be ignored
---
 lib/Parser.js | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index aefd476..95a0763 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -10,7 +10,7 @@ function Parser(cbs, options){
 	this._wroteSpecial = false;
 	this._contentFlags = 0;
 	this._done = false;
-	this._paused = false;
+	this._running = true; //false if paused
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
@@ -45,21 +45,22 @@ Parser.prototype.parseComplete = function(data){
 };
 
 //Parses a piece of an HTML document
-Parser.prototype.write =
-Parser.prototype.parseChunk = function(data){
+Parser.prototype.parseChunk =
+Parser.prototype.write = function(data){
 	if(this._done) this._handleError(Error("Attempted to parse chunk after parsing already done"));
 	this._buffer += data; //FIXME: this can be a bottleneck
-	if(!this._paused) this._parseTags();
+	if(this._running) this._parseTags();
 };
 
 //Tells the parser that the HTML being parsed is complete
-Parser.prototype.end = Parser.prototype.done = function(chunk){
+Parser.prototype.done =
+Parser.prototype.end = function(chunk){
 	if(this._done) return;
 
 	if(chunk) this.write(chunk);
 	this._done = true;
 	
-	if(!this._paused) this._finishParsing();
+	if(this._running) this._finishParsing();
 };
 
 Parser.prototype._finishParsing = function(){
@@ -74,11 +75,12 @@ Parser.prototype._finishParsing = function(){
 };
 
 Parser.prototype.pause = function(){
-	if(!this._done) this._paused = true;
+	if(!this._done) this._running = false;
 };
 
 Parser.prototype.resume = function(){
-	this._paused = false;
+	if(this._running) return;
+	this._running = true;
 	this._parseTags();
 	if(this._done) this._finishParsing();
 };
@@ -132,7 +134,7 @@ Parser.prototype._parseTags = function(force){
 	if(force) opening = 1/0;
 
 	//opening !== closing is just false if both are -1
-	while(opening !== closing && !this._paused){
+	while(opening !== closing && this._running){
 		lastTagSep = this._tagSep;
 		
 		if((opening !== -1 && opening < closing) || closing === -1){
@@ -195,7 +197,7 @@ Parser.prototype._parseTags = function(force){
 						if(this._cbs.ontext) this._cbs.ontext(elementData.slice(8, -2));
 					}
 					else{
-						if(this._cbs.ontext) this._cbs.ontext(elementData.substr(8));
+						if(this._cbs.ontext) this._cbs.ontext(elementData.substr(8) + this._tagSep);
 						this._contentFlags += SpecialTags[ElementType.CDATA];
 					}
 				}
@@ -221,9 +223,10 @@ Parser.prototype._parseTags = function(force){
 		}
 		else{
 			if(this._contentFlags !== 0){
-				this._writeSpecial(rawData, lastTagSep);
+				this._writeSpecial(rawData, ">");
 			}
 			else if(rawData !== "" && this._cbs.ontext){
+				if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
 				this._cbs.ontext(rawData);
 			}
 		}

From b0f6f985fbf0874a7b3ed1b1370001b31e04bc45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 14 Jan 2012 14:55:54 +0100
Subject: [PATCH 173/450] Remonde DomHandler#_inSpecialTag

It wasn't required at all and just lead to a bug
---
 lib/DomHandler.js | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lib/DomHandler.js b/lib/DomHandler.js
index 2e53103..f69f06e 100644
--- a/lib/DomHandler.js
+++ b/lib/DomHandler.js
@@ -3,7 +3,6 @@ var ElementType = require("./ElementType.js");
 function DomHandler(callback, options){
 	this.dom = [];
 	this._done = false;
-	this._inSpecialTag = false;
 	this._tagStack = [];
 	if(typeof callback === "object") this._options = callback;
 	else {
@@ -50,7 +49,7 @@ DomHandler.prototype._addDomElement = function(element){
 			return;
 		}
 		lastChild = lastTag.children[lastTag.children.length - 1];
-		if(this._inSpecialTag && element.type === ElementType.Text && lastChild.type === ElementType.Text){
+		if(element.type === ElementType.Text && lastChild.type === ElementType.Text){
 			lastChild.data += element.data;
 		} else {
 			lastTag.children.push(element);
@@ -62,9 +61,6 @@ DomHandler.prototype._addDomElement = function(element){
 };
 
 DomHandler.prototype.onopentag = function(name, attribs, type){
-	if(type === ElementType.Script || type === ElementType.Style){
-		this._inSpecialTag = true;
-	}
 	var element = {
 		type: type,
 		name: name

From 7d7345ffd46184c91f2de4adb152ecbcff727d22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 14 Jan 2012 14:57:11 +0100
Subject: [PATCH 174/450] Fixed tests

They were simply wrong and included errors. Their failure was just a
result of a bug in previous versions.
---
 tests/Events/04-cdata.js | 2 +-
 tests/Feeds/03-rdf.js    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/Events/04-cdata.js b/tests/Events/04-cdata.js
index b0d18be..3ffc8c5 100644
--- a/tests/Events/04-cdata.js
+++ b/tests/Events/04-cdata.js
@@ -23,7 +23,7 @@ exports.expected = [
   {
     "event": "text",
     "data": [
-      " asdf "
+      " asdf >"
     ]
   },
   {
diff --git a/tests/Feeds/03-rdf.js b/tests/Feeds/03-rdf.js
index 90c05cb..3460444 100644
--- a/tests/Feeds/03-rdf.js
+++ b/tests/Feeds/03-rdf.js
@@ -17,12 +17,12 @@ exports.expected = {
     {
       "title": " Music Equipment Repair and Consignment ",
       "link": "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n",
-      "description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065"
+      "description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->"
     },
     {
       "title": "\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n",
       "link": "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n",
-      "description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101."
+      "description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->"
     }
   ]
 };
\ No newline at end of file

From 7c1cbc646bc5534aa3bba542260b007c5df05d47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 14 Jan 2012 14:59:46 +0100
Subject: [PATCH 175/450] 2.2.1

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 4ddd2cf..1af97a6 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.0",
+	"version": "2.2.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From c9e4b4306c941fe5b7197e243ce83004a6e11346 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 17 Jan 2012 09:50:31 +0100
Subject: [PATCH 176/450] Minor changes

Use `in` inside Parser, added note that the output is simplified in
readme
---
 README.md     | 2 +-
 lib/Parser.js | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ce0064a..f1aad76 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ parser.write("Xyz <script language= javascript>var foo = '<<bar>>';< /  script>"
 parser.done();
 ```
 
-Output:
+Output (simplified):
 
 ```javascript
 --> Xyz 
diff --git a/lib/Parser.js b/lib/Parser.js
index 95a0763..c377ade 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -275,7 +275,7 @@ var emptyTags = {
 };
 
 Parser.prototype._processCloseTag = function(name){
-	if(this._stack && (!emptyTags[name] || this._options.xmlMode)){
+	if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){
 		var pos = this._stack.lastIndexOf(name);
 		if(pos !== -1)
 			if(this._cbs.onclosetag){
@@ -312,7 +312,7 @@ Parser.prototype._processOpenTag = function(name, data){
 	}
 	
 	//If tag self-terminates, add an explicit, separate closing tag
-	if(data.substr(-1) === "/" || (emptyTags[name] && !this._options.xmlMode)){
+	if(data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
 		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
 	} else {
 		if(type !== ElementType.Tag){

From ae762e7b6ccc37f199c9f52c80d799f0d992eafb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 17 Jan 2012 09:51:32 +0100
Subject: [PATCH 177/450] 2.2.2

Fixes #11
---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 1af97a6..8deb4b5 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.1",
+	"version": "2.2.2",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From b2e7fb422965313d910d1e818b855e807962e04d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 19 Jan 2012 15:47:13 +0100
Subject: [PATCH 178/450] Removed unnecessary code inside FeedHandler

Removed if/else, use Array#map instead of forEach
---
 lib/FeedHandler.js | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 9a9b019..8bf4075 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -34,13 +34,7 @@ FeedHandler.prototype.onend = function() {
 		if(feedRoot.name === "feed"){
 			childs = feedRoot.children;
 			items = getElements("entry", childs);
-		}
-		else{
-			items = getElements("item", feedRoot.children);
-			childs = getElements("channel", feedRoot.children, true).children;
-		}
-		
-		if (feedRoot.name === "feed"){
+
 			feed.type = "atom";
 			if(tmp = fetch("id", childs))
 				feed.id = tmp;
@@ -54,8 +48,7 @@ FeedHandler.prototype.onend = function() {
 				feed.updated = new Date(tmp);
 			if(tmp = fetch("email", childs, true))
 				feed.author = tmp;
-			feed.items = Array(items.length);
-			items.forEach(function(item, i){
+			feed.items = items.map(function(item){
 				var entry = {}, tmp;
 				if(tmp = fetch("id", item.children))
 					entry.id = tmp;
@@ -67,9 +60,12 @@ FeedHandler.prototype.onend = function() {
 					entry.description = tmp;
 				if(tmp = fetch("updated", item.children))
 					entry.pubDate = new Date(tmp);
-				feed.items[i] = entry;
+				return entry;
 			});
 		} else {
+			items = getElements("item", feedRoot.children);
+			childs = getElements("channel", feedRoot.children, true).children;
+
 			feed.type = feedRoot.name;
 			feed.id = "";
 			if(tmp = fetch("title", childs))
@@ -82,8 +78,7 @@ FeedHandler.prototype.onend = function() {
 				feed.updated = new Date(tmp);
 			if(tmp = fetch("managingEditor", childs))
 				feed.author = tmp;
-			feed.items = Array(items.length);
-			items.forEach(function(item, i){
+			feed.items = items.map(function(item){
 				var entry = {}, tmp;
 				if(tmp = fetch("guid", item.children))
 					entry.id = tmp;
@@ -95,7 +90,7 @@ FeedHandler.prototype.onend = function() {
 					entry.description = tmp;
 				if(tmp = fetch("pubDate", item.children))
 					entry.pubDate = new Date(tmp);
-				feed.items[i] = entry;
+				return entry;
 			});
 		}
 	}

From 9508e80a25f290cc82f3019fddff1d540f638e95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 19 Jan 2012 15:54:00 +0100
Subject: [PATCH 179/450] Removed newlines, items var from FeedHandler

---
 lib/FeedHandler.js | 78 +++++++++++++++++-----------------------------
 1 file changed, 28 insertions(+), 50 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 8bf4075..5bcda02 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -25,71 +25,49 @@ var isValidFeed = function(value) {
 }
 
 FeedHandler.prototype.onend = function() {
-	var feed = {};
-	var feedRoot;
-	var tmp, items, childs;
+	var feed = {},
+		feedRoot = getElements(isValidFeed, this.dom, true),
+		tmp, childs;
 
-	feedRoot = getElements(isValidFeed, this.dom, true);
 	if (feedRoot) {
 		if(feedRoot.name === "feed"){
 			childs = feedRoot.children;
-			items = getElements("entry", childs);
 
 			feed.type = "atom";
-			if(tmp = fetch("id", childs))
-				feed.id = tmp;
-			if(tmp = fetch("title", childs))
-				feed.title = tmp;
-			if((tmp = getElements("link", childs, true)) && (tmp = tmp.attribs) && (tmp = tmp.href))
-				feed.link = tmp;
-			if(tmp = fetch("subtitle", childs))
-				feed.description = tmp;
-			if(tmp = fetch("updated", childs))
-				feed.updated = new Date(tmp);
-			if(tmp = fetch("email", childs, true))
-				feed.author = tmp;
-			feed.items = items.map(function(item){
+			if(tmp = fetch("id", childs)) feed.id = tmp;
+			if(tmp = fetch("title", childs)) feed.title = tmp;
+			if((tmp = getElements("link", childs, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
+			if(tmp = fetch("subtitle", childs)) feed.description = tmp;
+			if(tmp = fetch("updated", childs)) feed.updated = new Date(tmp);
+			if(tmp = fetch("email", childs, true)) feed.author = tmp;
+
+			feed.items = getElements("entry", childs).map(function(item){
 				var entry = {}, tmp;
-				if(tmp = fetch("id", item.children))
-					entry.id = tmp;
-				if(tmp = fetch("title", item.children))
-					entry.title = tmp;
-				if((tmp = getElements("link", item.children, true)) && (tmp = tmp.attribs) && (tmp = tmp.href))
-					entry.link = tmp;
-				if(tmp = fetch("summary", item.children))
-					entry.description = tmp;
-				if(tmp = fetch("updated", item.children))
-					entry.pubDate = new Date(tmp);
+				if(tmp = fetch("id", item.children)) entry.id = tmp;
+				if(tmp = fetch("title", item.children)) entry.title = tmp;
+				if((tmp = getElements("link", item.children, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
+				if(tmp = fetch("summary", item.children)) entry.description = tmp;
+				if(tmp = fetch("updated", item.children)) entry.pubDate = new Date(tmp);
 				return entry;
 			});
 		} else {
-			items = getElements("item", feedRoot.children);
 			childs = getElements("channel", feedRoot.children, true).children;
 
 			feed.type = feedRoot.name;
 			feed.id = "";
-			if(tmp = fetch("title", childs))
-				feed.title = tmp;
-			if(tmp = fetch("link", childs))
-				feed.link = tmp;
-			if(tmp = fetch("description", childs))
-				feed.description = tmp;
-			if(tmp = fetch("lastBuildDate", childs))
-				feed.updated = new Date(tmp);
-			if(tmp = fetch("managingEditor", childs))
-				feed.author = tmp;
-			feed.items = items.map(function(item){
+			if(tmp = fetch("title", childs)) feed.title = tmp;
+			if(tmp = fetch("link", childs)) feed.link = tmp;
+			if(tmp = fetch("description", childs)) feed.description = tmp;
+			if(tmp = fetch("lastBuildDate", childs)) feed.updated = new Date(tmp);
+			if(tmp = fetch("managingEditor", childs)) feed.author = tmp;
+
+			feed.items = getElements("item", feedRoot.children).map(function(item){
 				var entry = {}, tmp;
-				if(tmp = fetch("guid", item.children))
-					entry.id = tmp;
-				if(tmp = fetch("title", item.children))
-					entry.title = tmp;
-				if(tmp = fetch("link", item.children))
-					entry.link = tmp;
-				if(tmp = fetch("description", item.children))
-					entry.description = tmp;
-				if(tmp = fetch("pubDate", item.children))
-					entry.pubDate = new Date(tmp);
+				if(tmp = fetch("guid", item.children)) entry.id = tmp;
+				if(tmp = fetch("title", item.children)) entry.title = tmp;
+				if(tmp = fetch("link", item.children)) entry.link = tmp;
+				if(tmp = fetch("description", item.children)) entry.description = tmp;
+				if(tmp = fetch("pubDate", item.children)) entry.pubDate = new Date(tmp);
 				return entry;
 			});
 		}

From 11feb538c0523f37924efb9bd690e7d1c173cabe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 19 Jan 2012 16:18:53 +0100
Subject: [PATCH 180/450] Cache item.children inside FeedHandler

---
 lib/FeedHandler.js | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 5bcda02..6ec480b 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -43,14 +43,17 @@ FeedHandler.prototype.onend = function() {
 
 			feed.items = getElements("entry", childs).map(function(item){
 				var entry = {}, tmp;
-				if(tmp = fetch("id", item.children)) entry.id = tmp;
-				if(tmp = fetch("title", item.children)) entry.title = tmp;
-				if((tmp = getElements("link", item.children, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
-				if(tmp = fetch("summary", item.children)) entry.description = tmp;
-				if(tmp = fetch("updated", item.children)) entry.pubDate = new Date(tmp);
+
+				item = item.children;
+
+				if(tmp = fetch("id", item)) entry.id = tmp;
+				if(tmp = fetch("title", item)) entry.title = tmp;
+				if((tmp = getElements("link", item, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
+				if(tmp = fetch("summary", item)) entry.description = tmp;
+				if(tmp = fetch("updated", item)) entry.pubDate = new Date(tmp);
 				return entry;
 			});
-		} else {
+		} else{
 			childs = getElements("channel", feedRoot.children, true).children;
 
 			feed.type = feedRoot.name;
@@ -63,11 +66,14 @@ FeedHandler.prototype.onend = function() {
 
 			feed.items = getElements("item", feedRoot.children).map(function(item){
 				var entry = {}, tmp;
-				if(tmp = fetch("guid", item.children)) entry.id = tmp;
-				if(tmp = fetch("title", item.children)) entry.title = tmp;
-				if(tmp = fetch("link", item.children)) entry.link = tmp;
-				if(tmp = fetch("description", item.children)) entry.description = tmp;
-				if(tmp = fetch("pubDate", item.children)) entry.pubDate = new Date(tmp);
+
+				item = item.children;
+
+				if(tmp = fetch("guid", item)) entry.id = tmp;
+				if(tmp = fetch("title", item)) entry.title = tmp;
+				if(tmp = fetch("link", item)) entry.link = tmp;
+				if(tmp = fetch("description", item)) entry.description = tmp;
+				if(tmp = fetch("pubDate", item)) entry.pubDate = new Date(tmp);
 				return entry;
 			});
 		}

From 528d73b37706ab91907c924525f25ed340201564 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 21 Jan 2012 13:32:52 +0100
Subject: [PATCH 181/450] Removed `type` attribute from cbs.onopentag

---
 lib/DomHandler.js                 |   4 +-
 lib/Parser.js                     |   2 +-
 tests/Events/01-simple.js         |   3 +-
 tests/Events/02-template.js       |   3 +-
 tests/Events/03-lowercase_tags.js |   3 +-
 tests/Events/04-cdata.js          |   3 +-
 tests/Stream/01-basic.js          |   9 +--
 tests/Stream/02-RSS.js            | 102 ++++++++++--------------------
 8 files changed, 44 insertions(+), 85 deletions(-)

diff --git a/lib/DomHandler.js b/lib/DomHandler.js
index f69f06e..3000afd 100644
--- a/lib/DomHandler.js
+++ b/lib/DomHandler.js
@@ -60,9 +60,9 @@ DomHandler.prototype._addDomElement = function(element){
 	}
 };
 
-DomHandler.prototype.onopentag = function(name, attribs, type){
+DomHandler.prototype.onopentag = function(name, attribs){
 	var element = {
-		type: type,
+		type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,
 		name: name
 	};
 	//for some reason, an if doesn't work
diff --git a/lib/Parser.js b/lib/Parser.js
index c377ade..ee3153f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -305,7 +305,7 @@ Parser.prototype._processOpenTag = function(name, data){
 		this._cbs.onopentagname(name);
 	}
 	if(this._cbs.onopentag){
-		this._cbs.onopentag(name, parseAttributes(data), type);
+		this._cbs.onopentag(name, parseAttributes(data));
 	}
 	if(this._cbs.onattribute){
 		this._parseAttributes(data);
diff --git a/tests/Events/01-simple.js b/tests/Events/01-simple.js
index 2307b05..e66aace 100644
--- a/tests/Events/01-simple.js
+++ b/tests/Events/01-simple.js
@@ -14,8 +14,7 @@ exports.expected = [
       "h1",
       {
         "class": "test"
-      },
-      "tag"
+      }
     ]
   },
   {
diff --git a/tests/Events/02-template.js b/tests/Events/02-template.js
index 467f02e..0210fe0 100644
--- a/tests/Events/02-template.js
+++ b/tests/Events/02-template.js
@@ -14,8 +14,7 @@ exports.expected = [
       "script",
       {
         "type": "text/template"
-      },
-      "script"
+      }
     ]
   },
   {
diff --git a/tests/Events/03-lowercase_tags.js b/tests/Events/03-lowercase_tags.js
index 55e8ac5..8e8670b 100644
--- a/tests/Events/03-lowercase_tags.js
+++ b/tests/Events/03-lowercase_tags.js
@@ -14,8 +14,7 @@ exports.expected = [
       "h1",
       {
         "class": "test"
-      },
-      "tag"
+      }
     ]
   },
   {
diff --git a/tests/Events/04-cdata.js b/tests/Events/04-cdata.js
index 3ffc8c5..b338a69 100644
--- a/tests/Events/04-cdata.js
+++ b/tests/Events/04-cdata.js
@@ -12,8 +12,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "tag",
-      {},
-      "tag"
+      {}
     ]
   },
   {
diff --git a/tests/Stream/01-basic.js b/tests/Stream/01-basic.js
index eada9ab..c86137b 100644
--- a/tests/Stream/01-basic.js
+++ b/tests/Stream/01-basic.js
@@ -20,8 +20,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "html",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -34,8 +33,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "title",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -60,8 +58,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "body",
-      {},
-      "tag"
+      {}
     ]
   },
   {
diff --git a/tests/Stream/02-RSS.js b/tests/Stream/02-RSS.js
index 8accd12..68f1fdb 100644
--- a/tests/Stream/02-RSS.js
+++ b/tests/Stream/02-RSS.js
@@ -44,8 +44,7 @@ exports.expected = [
       "rss",
       {
         "version": "2.0"
-      },
-      "tag"
+      }
     ]
   },
   {
@@ -71,8 +70,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "channel",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -91,8 +89,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "title",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -123,8 +120,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "link",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -155,8 +151,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "description",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -187,8 +182,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "language",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -219,8 +213,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "pubDate",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -251,8 +244,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "lastBuildDate",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -283,8 +275,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "docs",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -315,8 +306,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "generator",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -347,8 +337,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "managingEditor",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -379,8 +368,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "webMaster",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -411,8 +399,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "item",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -431,8 +418,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "title",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -463,8 +449,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "link",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -495,8 +480,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "description",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -527,8 +511,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "pubDate",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -559,8 +542,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "guid",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -603,8 +585,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "item",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -623,8 +604,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "description",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -655,8 +635,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "pubDate",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -687,8 +666,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "guid",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -731,8 +709,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "item",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -751,8 +728,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "title",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -783,8 +759,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "link",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -815,8 +790,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "description",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -847,8 +821,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "pubDate",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -879,8 +852,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "guid",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -923,8 +895,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "item",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -943,8 +914,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "title",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -975,8 +945,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "link",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -1007,8 +976,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "description",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -1039,8 +1007,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "pubDate",
-      {},
-      "tag"
+      {}
     ]
   },
   {
@@ -1071,8 +1038,7 @@ exports.expected = [
     "event": "opentag",
     "data": [
       "guid",
-      {},
-      "tag"
+      {}
     ]
   },
   {

From e5748418dd7980940038af1aacbf33655e05c06e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 21 Jan 2012 13:34:35 +0100
Subject: [PATCH 182/450] Moved event names to index.js as `EVENTS

---
 lib/Stream.js        | 68 ++++++++++++++++----------------------------
 lib/index.js         | 14 +++++++++
 tests/test-helper.js | 11 +++++--
 3 files changed, 48 insertions(+), 45 deletions(-)

diff --git a/lib/Stream.js b/lib/Stream.js
index 6b03e2c..1f66c71 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -19,48 +19,30 @@ var cbs = function(scope){
 	this.scope = scope;
 };
 
-cbs.prototype = {
-	oncdataend: function(){
-		this.scope.emit("cdataend");
-	},
-	oncdatastart: function(){
-		this.scope.emit("cdatastart");
-	},
-	onclosetag: function(name){
-    	this.scope.emit("closetag", name);
-    },
-	oncomment: function(text){
-    	this.scope.emit("comment", text);
-    },
-	oncommentend: function(){
-		this.scope.emit("commentend");
-	},
-	onerror: function(err){
-    	this.scope.emit("error", err);
-    },
-	onopentag: function(name, attribs, type){
-    	this.scope.emit("opentag", name, attribs, type);
-    },
-    onopentagname: function(name){
-    	this.scope.emit("opentagname", name);
-    },
-    onattribute: function(name, value){
-    	this.scope.emit("attribute", name, value);
-    },
-	onprocessinginstruction: function(name, data){
-		this.scope.emit("processinginstruction", name, data);
-	},
-	onend: function(){
-		this.scope.emit("end");
-	},
-	onreset: function(){
-		this.scope.emit("reset");
-	},
-    ontext: function(text){
-    	this.scope.emit("text", text);
-    	//let the 'pipe' function do something useful
-    	//this.scope.emit("data", text);
-    }
-};
+var EVENTS = require("..").EVENTS;
+
+Object.keys(EVENTS).forEach(function(name){
+	switch(EVENTS[name]){
+		case 0:{
+			cbs.prototype["on" + name] = function(){
+				this.scope.emit(name);
+			};
+			break;
+		}
+		case 1:{
+			cbs.prototype["on" + name] = function(a){
+				this.scope.emit(name, a);
+			};
+			break;
+		}
+		case 2:{
+			cbs.prototype["on" + name] = function(a, b){
+				this.scope.emit(name, a, b);
+			};
+			break;
+		}
+		default: throw Error("wrong number of arguments!");
+	}
+});
 
 module.exports = Stream;
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index 46add8d..7bf87b0 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -32,5 +32,19 @@ module.exports = {
 	get RssHandler(){
 		defineProp(this, "RssHandler", {value: this.FeedHandler});
 		return this.FeedHandler;
+	},
+	EVENTS: { /* Format: eventname: number of arguments */
+		attribute: 2,
+		cdatastart: 0,
+		cdataend: 0,
+		text: 1,
+		processinginstruction: 2,
+		comment: 1,
+		commentend: 0,
+		closetag: 1,
+		opentag: 2,
+		opentagname: 1,
+		error: 1,
+		end: 0
 	}
 }
\ No newline at end of file
diff --git a/tests/test-helper.js b/tests/test-helper.js
index 07af5c9..31c83fe 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -1,4 +1,5 @@
-var Parser = require("../lib/Parser.js"),
+var htmlparser = require(".."),
+	Parser = htmlparser.Parser,
 	chunkSize = 5;
 
 exports.writeToParser = function(handler, options, data){
@@ -12,4 +13,10 @@ exports.writeToParser = function(handler, options, data){
 	parser.parseComplete(data);
 }
 
-exports.EVENTS = ["attribute", "cdatastart", "cdataend", "text", "processinginstruction", "comment", "commentend", "closetag", "opentag", "opentagname"/*, "error", "end"*/];
\ No newline at end of file
+var EVENTS = Object.keys(htmlparser.EVENTS);
+
+//remove onend and onerror from events
+EVENTS.splice(EVENTS.indexOf("end"), 1);
+EVENTS.splice(EVENTS.indexOf("error"), 1);
+
+exports.EVENTS = EVENTS;
\ No newline at end of file

From ab48db45e135797aa094418da23c6c990b48fdb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 21 Jan 2012 13:47:34 +0100
Subject: [PATCH 183/450] Added WritableStream interface

Has the same interface as the parser, but you may pipe to it.
---
 lib/Stream.js         | 15 ++++-----------
 lib/WritableStream.js | 19 +++++++++++++++++++
 lib/index.js          |  6 ++++++
 3 files changed, 29 insertions(+), 11 deletions(-)
 create mode 100644 lib/WritableStream.js

diff --git a/lib/Stream.js b/lib/Stream.js
index 1f66c71..fd3abcc 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -1,25 +1,18 @@
-var Parser = require("./Parser.js");
+var WritableStream = require("./WritableStream.js");
 
 var Stream = function(options){
-	Parser.call(this, new cbs(this), options);
+	WritableStream.call(this, new cbs(this), options);
 };
 
-require("util").inherits(Stream, require("stream").Stream);
+require("util").inherits(Stream, WritableStream);
 
-//util.inherits would overwrite the prototype when called twice,
-//so we need a different approach
-Object.getOwnPropertyNames(Parser.prototype).forEach(function(name){
-	Stream.prototype[name] = Parser.prototype[name];
-});
-
-Stream.prototype.writable = true;
 Stream.prototype.readable = true;
 
 var cbs = function(scope){
 	this.scope = scope;
 };
 
-var EVENTS = require("..").EVENTS;
+var EVENTS = require("../").EVENTS;
 
 Object.keys(EVENTS).forEach(function(name){
 	switch(EVENTS[name]){
diff --git a/lib/WritableStream.js b/lib/WritableStream.js
new file mode 100644
index 0000000..b9e5c3e
--- /dev/null
+++ b/lib/WritableStream.js
@@ -0,0 +1,19 @@
+var Parser = require("./Parser.js");
+
+var WritableStream = function(cbs, options){
+	Parser.call(this, cbs, options);
+};
+
+require("util").inherits(WritableStream, require("stream").Stream);
+
+//util.inherits would overwrite the prototype when called twice,
+//so we need a different approach
+Object.getOwnPropertyNames(Parser.prototype).forEach(function(name){
+	WritableStream.prototype[name] = Parser.prototype[name];
+});
+
+WritableStream.prototype.writable = true;
+
+// TODO improve support for Parser#pause and Parser#continue
+
+module.exports = WritableStream;
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index 7bf87b0..f6e788e 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -21,10 +21,15 @@ module.exports = {
 		defineProp(this, "Stream", {value:require("./Stream.js")});
 		return this.Stream;
 	},
+	get WritableStream(){
+		defineProp(this, "WritableStream", {value:require("./WritableStream.js")});
+		return this.WritableStream;
+	},
 	get DomUtils(){
 		defineProp(this, "DomUtils", {value:require("./DomUtils.js")});
 		return this.DomUtils;
 	},
+	// For legacy support
 	get DefaultHandler(){
 		defineProp(this, "DefaultHandler", {value: this.DomHandler});
 		return this.DefaultHandler;
@@ -33,6 +38,7 @@ module.exports = {
 		defineProp(this, "RssHandler", {value: this.FeedHandler});
 		return this.FeedHandler;
 	},
+	// List of all events that the parser emits
 	EVENTS: { /* Format: eventname: number of arguments */
 		attribute: 2,
 		cdatastart: 0,

From 5c0fa09a37397889b08c8541c43aba89bd56e5dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 21 Jan 2012 14:01:03 +0100
Subject: [PATCH 184/450] Renamed Parser#_processComment to _writeComment,
 moved logic for CDATA to #_writeCDATA

That fixed a problem where an empty CDATA section would call ontext
with an empty string
---
 lib/Parser.js | 47 +++++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index ee3153f..51f4a0b 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -23,6 +23,12 @@ Parser.prototype._options = {
 };
 
 Parser.prototype._cbs = {
+	/*
+		This is just a plain object
+		so that the parser doesn't
+		throw if no arguments were
+		provided.
+	*/
 	/*
 		oncdataend,
 		oncdatastart,
@@ -153,18 +159,13 @@ Parser.prototype._parseTags = function(force){
 		current = next + 1;
 		
 		if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
-			if(this._tagSep === ">" && rawData.substr(-2) === "]]"){
-				if(rawData.length !== 2 && this._cbs.ontext){
-					this._cbs.ontext(rawData.slice(0,-2));
-				}
-				this._contentFlags -= SpecialTags[ElementType.CDATA];
-				if(this._cbs.oncdataend) this._cbs.oncdataend();
-			}
-			else if(this._cbs.ontext) this._cbs.ontext(rawData + this._tagSep);
+			// We're inside a CDATA section
+			this._writeCDATA(rawData);
+
 		}
 		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
-			//We're currently in a comment tag
-			this._processComment(rawData);
+			//We're in a comment tag
+			this._writeComment(rawData);
 		}
 		else if(lastTagSep === "<"){
 			elementData = rawData.trimLeft();
@@ -188,18 +189,12 @@ Parser.prototype._parseTags = function(force){
 				if(elementData.substr(1, 2) === "--"){
 					//This tag is a comment
 					this._contentFlags += SpecialTags[ElementType.Comment];
-					this._processComment(rawData.substr(3));
+					this._writeComment(rawData.substr(3));
 				}
 				else if(elementData.substr(1, 7) === "[CDATA["){
+					this._contentFlags += SpecialTags[ElementType.CDATA];
 					if(this._cbs.oncdatastart) this._cbs.oncdatastart();
-					if(this._tagSep === ">" && elementData.substr(-2) === "]]"){
-						if(this._cbs.oncdataend) this._cbs.oncdataend();
-						if(this._cbs.ontext) this._cbs.ontext(elementData.slice(8, -2));
-					}
-					else{
-						if(this._cbs.ontext) this._cbs.ontext(elementData.substr(8) + this._tagSep);
-						this._contentFlags += SpecialTags[ElementType.CDATA];
-					}
+					this._writeCDATA(elementData.substr(8));
 				}
 				else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
 				//TODO: This isn't a processing instruction, needs a new name
@@ -235,7 +230,19 @@ Parser.prototype._parseTags = function(force){
 	this._buffer = buffer.substring(current);
 };
 
-Parser.prototype._processComment = function(rawData){
+Parser.prototype._writeCDATA = function(data){
+	if(this._tagSep === ">" && data.substr(-2) === "]]"){
+		// CDATA ends
+    	if(data.length !== 2 && this._cbs.ontext){
+    		this._cbs.ontext(data.slice(0,-2));
+    	}
+    	this._contentFlags -= SpecialTags[ElementType.CDATA];
+    	if(this._cbs.oncdataend) this._cbs.oncdataend();
+    }
+    else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
+};
+
+Parser.prototype._writeComment = function(rawData){
 	if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
 		//remove the written flag (also removes the comment flag)
 		this._contentFlags -= SpecialTags[ElementType.Comment];

From e40e0a44c3c89ba27ea55ab148dcbc9f2210db22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 21 Jan 2012 14:16:05 +0100
Subject: [PATCH 185/450] Singular attributes should just return an empty
 string, not their names

That's a task for the consumer, not the parser
---
 lib/Parser.js                       | 24 +++++++++++-------------
 tests/HTML/10-singular_attribute.js |  2 +-
 2 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 51f4a0b..a243cc7 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -97,17 +97,6 @@ Parser.prototype.reset = function(){
 	if(this._cbs.onreset) this._cbs.onreset();
 };
 
-//parses the attribute string
-var parseAttributes = function(data){
-	var attrs = {}, match;
-	
-	while(match = _reAttrib.exec(data)){
-		attrs[match[1]] = match[2] || match[3] || match[4] || match[1];
-	}
-
-	return attrs;
-};
-
 //Extracts the base tag name from the data value of an element
 Parser.prototype._parseTagName = function(data){
 	var match = data.substr(0, data.search(_reTail));
@@ -137,7 +126,7 @@ Parser.prototype._parseTags = function(force){
 	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
 
 	//if force is true, parse everything
-	if(force) opening = 1/0;
+	if(force) opening = Infinity;
 
 	//opening !== closing is just false if both are -1
 	while(opening !== closing && this._running){
@@ -298,10 +287,19 @@ Parser.prototype._processCloseTag = function(name){
 
 Parser.prototype._parseAttributes = function(data){
 	for(var match; match = _reAttrib.exec(data);){
-		this._cbs.onattribute(match[1], match[2] || match[3] || match[4] || match[1]);
+		this._cbs.onattribute(match[1], match[2] || match[3] || match[4] || "");
 	}
 };
 
+//parses the attribute string
+var parseAttributes = function(data){
+	var attrs = {}, match;
+	while(match = _reAttrib.exec(data)){
+		attrs[match[1]] = match[2] || match[3] || match[4] || "";
+	}
+	return attrs;
+};
+
 Parser.prototype._processOpenTag = function(name, data){
 	var type = ElementType.Tag;
 	if(this._options.xmlMode){ /*do nothing*/ }
diff --git a/tests/HTML/10-singular_attribute.js b/tests/HTML/10-singular_attribute.js
index ca978e2..af10053 100644
--- a/tests/HTML/10-singular_attribute.js
+++ b/tests/HTML/10-singular_attribute.js
@@ -10,7 +10,7 @@ exports.expected = [
     "name": "option",
     "attribs": {
       "value": "foo",
-      "selected": "selected"
+      "selected": ""
     }
   }
 ];
\ No newline at end of file

From b321265f13b4700c0cc2b5dab4d082fee489ef80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 21 Jan 2012 14:29:37 +0100
Subject: [PATCH 186/450] Cosmetic changes

---
 lib/Parser.js | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index a243cc7..21ca835 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -293,8 +293,8 @@ Parser.prototype._parseAttributes = function(data){
 
 //parses the attribute string
 var parseAttributes = function(data){
-	var attrs = {}, match;
-	while(match = _reAttrib.exec(data)){
+	var attrs = {};
+	for(var match; match = _reAttrib.exec(data);){
 		attrs[match[1]] = match[2] || match[3] || match[4] || "";
 	}
 	return attrs;
@@ -305,16 +305,10 @@ Parser.prototype._processOpenTag = function(name, data){
 	if(this._options.xmlMode){ /*do nothing*/ }
 	else if(name === "script") type = ElementType.Script;
 	else if(name === "style")  type = ElementType.Style;
-	
-	if(this._cbs.onopentagname){
-		this._cbs.onopentagname(name);
-	}
-	if(this._cbs.onopentag){
-		this._cbs.onopentag(name, parseAttributes(data));
-	}
-	if(this._cbs.onattribute){
-		this._parseAttributes(data);
-	}
+
+	if(this._cbs.onopentagname)	this._cbs.onopentagname(name);
+	if(this._cbs.onopentag)		this._cbs.onopentag(name, parseAttributes(data));
+	if(this._cbs.onattribute)	this._parseAttributes(data);
 	
 	//If tag self-terminates, add an explicit, separate closing tag
 	if(data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){

From 9620a2809fdc6615798b58981911aef24825376e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 21 Jan 2012 14:31:02 +0100
Subject: [PATCH 187/450] Renamed HTML dir to `DOM`

---
 tests/01-html.js                                    | 4 ++--
 tests/{HTML => DOM}/01-basic.js                     | 0
 tests/{HTML => DOM}/02-single_tag_1.js              | 0
 tests/{HTML => DOM}/03-single_tag_2.js              | 0
 tests/{HTML => DOM}/04-unescaped_in_script.js       | 0
 tests/{HTML => DOM}/05-tags_in_comment.js           | 0
 tests/{HTML => DOM}/06-comment_in_script.js         | 0
 tests/{HTML => DOM}/07-unescaped_in_style.js        | 0
 tests/{HTML => DOM}/08-extra_spaces_in_tag.js       | 0
 tests/{HTML => DOM}/09-unquoted_attrib.js           | 0
 tests/{HTML => DOM}/10-singular_attribute.js        | 0
 tests/{HTML => DOM}/11-text_outside_tags.js         | 0
 tests/{HTML => DOM}/12-text_only.js                 | 0
 tests/{HTML => DOM}/13-comment_in_text.js           | 0
 tests/{HTML => DOM}/14-comment_in_text_in_script.js | 0
 tests/{HTML => DOM}/15-non-verbose.js               | 0
 tests/{HTML => DOM}/16-ignore_whitespace.js         | 0
 tests/{HTML => DOM}/17-xml_namespace.js             | 0
 tests/{HTML => DOM}/18-enforce_empty_tags.js        | 0
 tests/{HTML => DOM}/19-ignore_empty_tags.js         | 0
 tests/{HTML => DOM}/20-template_script_tags.js      | 0
 tests/{HTML => DOM}/21-conditional_comments.js      | 0
 tests/{HTML => DOM}/22-lowercase_tags.js            | 0
 23 files changed, 2 insertions(+), 2 deletions(-)
 rename tests/{HTML => DOM}/01-basic.js (100%)
 rename tests/{HTML => DOM}/02-single_tag_1.js (100%)
 rename tests/{HTML => DOM}/03-single_tag_2.js (100%)
 rename tests/{HTML => DOM}/04-unescaped_in_script.js (100%)
 rename tests/{HTML => DOM}/05-tags_in_comment.js (100%)
 rename tests/{HTML => DOM}/06-comment_in_script.js (100%)
 rename tests/{HTML => DOM}/07-unescaped_in_style.js (100%)
 rename tests/{HTML => DOM}/08-extra_spaces_in_tag.js (100%)
 rename tests/{HTML => DOM}/09-unquoted_attrib.js (100%)
 rename tests/{HTML => DOM}/10-singular_attribute.js (100%)
 rename tests/{HTML => DOM}/11-text_outside_tags.js (100%)
 rename tests/{HTML => DOM}/12-text_only.js (100%)
 rename tests/{HTML => DOM}/13-comment_in_text.js (100%)
 rename tests/{HTML => DOM}/14-comment_in_text_in_script.js (100%)
 rename tests/{HTML => DOM}/15-non-verbose.js (100%)
 rename tests/{HTML => DOM}/16-ignore_whitespace.js (100%)
 rename tests/{HTML => DOM}/17-xml_namespace.js (100%)
 rename tests/{HTML => DOM}/18-enforce_empty_tags.js (100%)
 rename tests/{HTML => DOM}/19-ignore_empty_tags.js (100%)
 rename tests/{HTML => DOM}/20-template_script_tags.js (100%)
 rename tests/{HTML => DOM}/21-conditional_comments.js (100%)
 rename tests/{HTML => DOM}/22-lowercase_tags.js (100%)

diff --git a/tests/01-html.js b/tests/01-html.js
index 578669d..f4ce1fb 100644
--- a/tests/01-html.js
+++ b/tests/01-html.js
@@ -1,9 +1,9 @@
-//Runs tests for HTML
+//Runs tests for the DOM handler
 
 var helper = require("./test-helper.js"),
 	DefaultHandler = require("../lib/DomHandler.js");
 
-exports.dir = "/HTML/";
+exports.dir = "/DOM/";
 
 /*
 	function test()
diff --git a/tests/HTML/01-basic.js b/tests/DOM/01-basic.js
similarity index 100%
rename from tests/HTML/01-basic.js
rename to tests/DOM/01-basic.js
diff --git a/tests/HTML/02-single_tag_1.js b/tests/DOM/02-single_tag_1.js
similarity index 100%
rename from tests/HTML/02-single_tag_1.js
rename to tests/DOM/02-single_tag_1.js
diff --git a/tests/HTML/03-single_tag_2.js b/tests/DOM/03-single_tag_2.js
similarity index 100%
rename from tests/HTML/03-single_tag_2.js
rename to tests/DOM/03-single_tag_2.js
diff --git a/tests/HTML/04-unescaped_in_script.js b/tests/DOM/04-unescaped_in_script.js
similarity index 100%
rename from tests/HTML/04-unescaped_in_script.js
rename to tests/DOM/04-unescaped_in_script.js
diff --git a/tests/HTML/05-tags_in_comment.js b/tests/DOM/05-tags_in_comment.js
similarity index 100%
rename from tests/HTML/05-tags_in_comment.js
rename to tests/DOM/05-tags_in_comment.js
diff --git a/tests/HTML/06-comment_in_script.js b/tests/DOM/06-comment_in_script.js
similarity index 100%
rename from tests/HTML/06-comment_in_script.js
rename to tests/DOM/06-comment_in_script.js
diff --git a/tests/HTML/07-unescaped_in_style.js b/tests/DOM/07-unescaped_in_style.js
similarity index 100%
rename from tests/HTML/07-unescaped_in_style.js
rename to tests/DOM/07-unescaped_in_style.js
diff --git a/tests/HTML/08-extra_spaces_in_tag.js b/tests/DOM/08-extra_spaces_in_tag.js
similarity index 100%
rename from tests/HTML/08-extra_spaces_in_tag.js
rename to tests/DOM/08-extra_spaces_in_tag.js
diff --git a/tests/HTML/09-unquoted_attrib.js b/tests/DOM/09-unquoted_attrib.js
similarity index 100%
rename from tests/HTML/09-unquoted_attrib.js
rename to tests/DOM/09-unquoted_attrib.js
diff --git a/tests/HTML/10-singular_attribute.js b/tests/DOM/10-singular_attribute.js
similarity index 100%
rename from tests/HTML/10-singular_attribute.js
rename to tests/DOM/10-singular_attribute.js
diff --git a/tests/HTML/11-text_outside_tags.js b/tests/DOM/11-text_outside_tags.js
similarity index 100%
rename from tests/HTML/11-text_outside_tags.js
rename to tests/DOM/11-text_outside_tags.js
diff --git a/tests/HTML/12-text_only.js b/tests/DOM/12-text_only.js
similarity index 100%
rename from tests/HTML/12-text_only.js
rename to tests/DOM/12-text_only.js
diff --git a/tests/HTML/13-comment_in_text.js b/tests/DOM/13-comment_in_text.js
similarity index 100%
rename from tests/HTML/13-comment_in_text.js
rename to tests/DOM/13-comment_in_text.js
diff --git a/tests/HTML/14-comment_in_text_in_script.js b/tests/DOM/14-comment_in_text_in_script.js
similarity index 100%
rename from tests/HTML/14-comment_in_text_in_script.js
rename to tests/DOM/14-comment_in_text_in_script.js
diff --git a/tests/HTML/15-non-verbose.js b/tests/DOM/15-non-verbose.js
similarity index 100%
rename from tests/HTML/15-non-verbose.js
rename to tests/DOM/15-non-verbose.js
diff --git a/tests/HTML/16-ignore_whitespace.js b/tests/DOM/16-ignore_whitespace.js
similarity index 100%
rename from tests/HTML/16-ignore_whitespace.js
rename to tests/DOM/16-ignore_whitespace.js
diff --git a/tests/HTML/17-xml_namespace.js b/tests/DOM/17-xml_namespace.js
similarity index 100%
rename from tests/HTML/17-xml_namespace.js
rename to tests/DOM/17-xml_namespace.js
diff --git a/tests/HTML/18-enforce_empty_tags.js b/tests/DOM/18-enforce_empty_tags.js
similarity index 100%
rename from tests/HTML/18-enforce_empty_tags.js
rename to tests/DOM/18-enforce_empty_tags.js
diff --git a/tests/HTML/19-ignore_empty_tags.js b/tests/DOM/19-ignore_empty_tags.js
similarity index 100%
rename from tests/HTML/19-ignore_empty_tags.js
rename to tests/DOM/19-ignore_empty_tags.js
diff --git a/tests/HTML/20-template_script_tags.js b/tests/DOM/20-template_script_tags.js
similarity index 100%
rename from tests/HTML/20-template_script_tags.js
rename to tests/DOM/20-template_script_tags.js
diff --git a/tests/HTML/21-conditional_comments.js b/tests/DOM/21-conditional_comments.js
similarity index 100%
rename from tests/HTML/21-conditional_comments.js
rename to tests/DOM/21-conditional_comments.js
diff --git a/tests/HTML/22-lowercase_tags.js b/tests/DOM/22-lowercase_tags.js
similarity index 100%
rename from tests/HTML/22-lowercase_tags.js
rename to tests/DOM/22-lowercase_tags.js

From 82e3ab4f81b581954b9d90ae7eff48ea01a4feb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 21 Jan 2012 14:32:35 +0100
Subject: [PATCH 188/450] Renamed `01-html.js` to `01-dom.js

It tests the DOMHandler
---
 tests/00-runtests.js            | 2 +-
 tests/{01-html.js => 01-dom.js} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename tests/{01-html.js => 01-dom.js} (100%)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index a8c8eb0..03f5e33 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -45,7 +45,7 @@ function runTests(test){
 
 //run all tests
 [
- "./01-html.js",
+ "./01-dom.js",
  "./02-feed.js",
  "./03-events.js",
  "./04-dom_utils.js",
diff --git a/tests/01-html.js b/tests/01-dom.js
similarity index 100%
rename from tests/01-html.js
rename to tests/01-dom.js

From 1a0be3ce41099d543a86bf9cea1a25207ea9a383 Mon Sep 17 00:00:00 2001
From: Siddharth Mahendraker <siddharth_mahen@me.com>
Date: Fri, 20 Jan 2012 23:45:13 +0200
Subject: [PATCH 189/450] Cleaned up DomUtils

* Attempted to perform some optimization
* Fixed some style inconsistencies
* Added test for DomUtils HTML methods
---
 lib/DomUtils.js                 | 288 +++++++++++++++++---------------
 tests/DomUtils/04-outer_html.js |  10 ++
 tests/DomUtils/05-inner_html.js |  10 ++
 3 files changed, 176 insertions(+), 132 deletions(-)
 create mode 100644 tests/DomUtils/04-outer_html.js
 create mode 100644 tests/DomUtils/05-inner_html.js

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 6fd8a77..02e2de5 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -1,139 +1,163 @@
-var ElementType = require("./ElementType.js");
+var ElementType = require("./ElementType.js"),
+    arrayPush = Array.prototype.push,
+    DomUtils = module.exports;
 
-function getTest (checkVal) {
-	return function (value) { return value === checkVal; };
-}
-
-var arrayPush = Array.prototype.push;
 function filterArray(test, arr, recurse, limit){
-	var result = [], childs;
-	
-	for(var i = 0, j = arr.length; i < j; i++){
-		if(test(arr[i])){
-			result.push(arr[i]);
-			if(--limit <= 0) break;
-		}
-		
-		if(recurse && (childs = arr[i].children)){
-			childs = filterArray(test, childs, recurse, limit);
-			arrayPush.apply(result, childs);
-			limit -= childs.length;
-			if(limit <= 0) break;
-		}
-	}
-	return result;
+    var result = [], childs;
+
+    for(var i = 0, j = arr.length; i < j; i++){
+        if(test(arr[i])){
+            result.push(arr[i]);
+            if(--limit <= 0) break;
+        }
+
+        childs = arr[i].children;
+        if(recurse && childs){
+            childs = filterArray(test, childs, recurse, limit);
+            arrayPush.apply(result, childs);
+            limit -= childs.length;
+            if(limit <= 0) break;
+        }
+    }
+
+    return result;
 }
 
 function filter(test, element, recurse, limit){
-	if(recurse !== false) recurse = true;
-	if(isNaN(limit)) limit = 1/0;
-	if(!Array.isArray(element)){
-		element = [element];
-	}
-	return filterArray(test, element, recurse, limit);
+    if(recurse !== false) recurse = true;
+    if(isNaN(limit)) limit = Infinity;
+    if(!Array.isArray(element)) element = [element];
+
+    return filterArray(test, element, recurse, limit);
+}
+
+DomUtils.testElement = function(options, element){
+    var type = element.type,
+        keys = Object.keys(options),
+        len = keys.length;
+
+    for(var i = 0; i < len; i++){
+        var key = keys[i];
+
+        switch(key){
+            case "tag_name":
+                if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
+                if(!options.tag_name(element.name)) return false;
+                break;
+            case "tag_type":
+                if(!options.tag_type(type)) return false;
+                break;
+            case "tag_contains":
+                if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
+                if(!options.tag_contains(element.data)) return false;
+                break;
+            default:
+                if(!element.attribs || !options[key](element.attribs[key])) return false;
+                break;
+        }
+    }
+
+    return true;
+}
+
+DomUtils.getElements = function(options, element, recurse, limit){
+    var keys = Object.keys(options),
+        len = keys.length;
+
+    for(var i = 0; i < len; i++){
+        var key = keys[i];
+
+        if(typeof options[key] !== "function"){
+            var checker = options[key];
+            options[key] = function(val){ return val === checker };
+        }
+    }
+
+    return filter(this.testElement.bind(null, options), element, recurse, limit);
+}
+
+DomUtils.getElementById = function(id, element, recurse){
+    var result;
+
+    if(typeof id === "function"){
+        result = filter(function(elem){ return elem.attribs && id(elem.attribs) }, element, recurse, 1);
+    }else{
+        result = filter(function(elem){ return elem.attribs && elem.attribs.id === id }, element, recurse, 1);
+    }
+
+    return result.length ? result[0] : null;
 }
 
-module.exports = {
-	testElement: function testElement(options, element) {
-		 var type = element.type;
-	
-		for(var key in options){
-			if(key === "tag_name"){
-				if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
-				if(!options.tag_name(element.name)) return false;
-			} else if(key === "tag_type") {
-				if(!options.tag_type(type)) return false;
-			} else if(key === "tag_contains") {
-				if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
-				if(!options.tag_contains(element.data)) return false;
-			} else if(!element.attribs || !options[key](element.attribs[key]))
-				return false;
-		}
-	
-		 return true;
-	}, 
-	
-	getElements: function(options, element, recurse, limit){
-		for(var key in options){
-			if(typeof options[key] !== "function"){
-				options[key] = getTest(options[key]);
-			}
-		}
-		
-		return filter(this.testElement.bind(null, options), element, recurse, limit);
-	},
-
-	getElementById: function(id, element, recurse) {
-		var result;
-		if(typeof id === "function"){
-			result = filter(function(elem){
-				return elem.attribs && id(elem.attribs);
-			}, element, recurse, 1);
-		}
-		else{
-			result = filter(function(elem){
-				return elem.attribs && elem.attribs.id === id;
-			}, element, recurse, 1);
-		}
-		return result.length ? result[0] : null;
-	},
-
-	getElementsByTagName: function(name, element, recurse, limit){
-		if(typeof name === "function") return filter(function(elem){
-			var type = elem.type;
-			if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
-			return name(elem.name);
-		}, element, recurse, limit);
-		
-		return filter(function(elem){
-			var type = elem.type;
-			if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
-			return elem.name === name;
-		}, element, recurse, limit);
-	},
-
-	getElementsByTagType: function(type, element, recurse, limit){
-		if(typeof type === "function"){
-			return filter(function(elem){return type(elem.type);}, element, recurse, limit);
-		}
-		else return filter(function(elem){return elem.type === type;}, element, recurse, limit);
-	},
-	
-	getInnerHTML: function(elem){
-		if(!elem.children) return "";
-		
-		var childs = elem.children,
-			childNum = childs.length,
-			ret = "";
-		
-		for(var i = 0; i < childNum; i++){
-			ret += this.getOuterHTML(childs[i]);
-		}
-		
-		return ret;
-	},
-	
-	getOuterHTML: function(elem){
-		var type = elem.type;
-
-		if(type === ElementType.Text) return elem.data;
-		if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
-		
-		var ret = "<" + elem.name;
-		
-		var value;
-		for(var name in elem.attribs){
-			value = elem.attribs[name];
-			ret += " " + name + "=";
-			
-			if(/^[^\s"\'\`\=\<\>]+$/.test(value)) ret += value;
-			else if(value.indeOf("\"") !== -1) ret += "'" + value + "'";
-			else ret += "\"" + value + "\"";
-		}
-		
-		if(type === ElementType.Directive) return ret + ">";
-		if(type === ElementType.Tag && !elem.children) return ret + " />";
-		
-		return ">" + ret + this.getInnerHTML(elem) + "</" + elem.name + ">";
-	}
-};
\ No newline at end of file
+DomUtils.getElementsByTagName = function(name, element, recurse, limit){
+    if(typeof name === "function") return filter(function(elem){
+        var type = elem.type;
+        if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)
+            return false;
+        return name(elem.name);
+    }, element, recurse, limit);
+
+    return filter(function(elem){
+        var type = elem.type;
+        if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)
+            return false;
+        return elem.name === name;
+    }, element, recurse, limit);
+}
+
+DomUtils.getElementsByTagType = function(type, element, recurse, limit){
+    if(typeof type === "function")
+        return filter(function(elem){ return type(elem.type) }, element, recurse, limit);
+    else
+        return filter(function(elem){ return elem.type === type }, element, recurse, limit);
+}
+
+DomUtils.getInnerHTML = function(elem){
+    if(!elem.children) return "";
+
+    var childs = elem.children,
+        childNum = childs.length,
+        ret = "";
+
+    for(var i = 0; i < childNum; i++){
+        ret += this.getOuterHTML(childs[i]);
+    }
+
+    return ret;
+}
+
+DomUtils.getOuterHTML = function(elem){
+    var type = elem.type,
+        name = elem.name;
+
+    if(type === ElementType.Text) return elem.data;
+    if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
+
+    var attrStr = "";
+    if(elem.attribs){
+        var attrs = Object.keys(elem.attribs),
+            len = attrs.length;
+
+        for(var i = 0; i < len; i++){
+            var attr = attrs[i],
+                val = elem.attribs[attr];
+
+            attrStr += " " + attr + "=\"" + val + "\"";
+
+            /* Is this required? Method forgets quotes
+            if(/^[^\s"\'\`\=\<\>]+$/.test(val))
+                attrStr += val;
+            else if(val.indeOf("\"") !== -1)
+                attrStr += "'" + val + "'";
+            else
+                attrStr += "\"" + val + "\"";
+            */
+        }
+    }
+
+    var ret = "<" + name + attrStr + ">";
+
+    if(type === ElementType.Directive) return ret;
+
+    ret += this.getInnerHTML(elem) + "</" + name + ">"
+    return ret;
+}
diff --git a/tests/DomUtils/04-outer_html.js b/tests/DomUtils/04-outer_html.js
new file mode 100644
index 0000000..0ed8374
--- /dev/null
+++ b/tests/DomUtils/04-outer_html.js
@@ -0,0 +1,10 @@
+var DomUtils = require("../../lib/DomUtils.js");
+
+exports.name = "Get outer HTML";
+exports.getElements = function(dom){
+    return '<tag1 id="asdf"> <script>text</script> <!-- comment --> <tag2> text </tag2></tag1>';
+};
+exports.getByFunction = function(dom){
+    return DomUtils.getOuterHTML(DomUtils.getElementById("asdf", dom, true));
+};
+exports.expected = '<tag1 id="asdf"> <script>text</script> <!-- comment --> <tag2> text </tag2></tag1>';
diff --git a/tests/DomUtils/05-inner_html.js b/tests/DomUtils/05-inner_html.js
new file mode 100644
index 0000000..72dba0e
--- /dev/null
+++ b/tests/DomUtils/05-inner_html.js
@@ -0,0 +1,10 @@
+var DomUtils = require("../../lib/DomUtils.js");
+
+exports.name = "Get inner HTML";
+exports.getElements = function(dom){
+    return ' <script>text</script> <!-- comment --> <tag2> text </tag2>';
+};
+exports.getByFunction = function(dom){
+    return DomUtils.getInnerHTML(DomUtils.getElementById("asdf", dom, true));
+};
+exports.expected = ' <script>text</script> <!-- comment --> <tag2> text </tag2>';

From 862e0b73a3c69ae42abfbd2c2b47f644502a9648 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 28 Jan 2012 17:00:46 +0100
Subject: [PATCH 190/450] Fixed a bug

`<tag />` got `/` as an attribute
---
 lib/Parser.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 21ca835..7827f52 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -14,7 +14,7 @@ function Parser(cbs, options){
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reAttrib = /\s(\S+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
+var _reAttrib = /\s([^\s\/]+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
 	_reTail = /\s|\/|$/;
 
 Parser.prototype._options = {

From a6ce1f43ed4bf9123d920b59e635e8d873569590 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 29 Jan 2012 13:27:12 +0100
Subject: [PATCH 191/450] 2.2.3

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 8deb4b5..624215b 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.2",
+	"version": "2.2.3",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 1ec1578c8c5857b91fba9413fb5dc74a67fb7778 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 29 Jan 2012 13:29:15 +0100
Subject: [PATCH 192/450] Sort properties before stringify in runtests

---
 tests/00-runtests.js | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 03f5e33..ab997ed 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -4,6 +4,14 @@ var runCount = 0,
 	testCount = 0,
 	failCount = 0;
 
+function getSortedObject(obj){
+	if(typeof obj !== "object" || Array.isArray(obj)) return obj;
+	return Object.keys(obj).sort().reduce(function(o, name){
+		o[name] = obj[name];
+		return o;
+	}, {});
+};
+
 function runTests(test){
 	var begin = Date.now();
 	//read files, load them, run them
@@ -23,8 +31,8 @@ function runTests(test){
 		test.test(file, function(err, dom){
 			if(err) console.log("Handler error:", err);
 			
-			var expected = JSON.stringify(file.expected, null, 2),
-				got = JSON.stringify(dom, null, 2);
+			var expected = JSON.stringify(getSortedObject(file.expected), null, 2),
+				got = JSON.stringify(getSortedObject(dom), null, 2);
 			if(expected !== got){
 				failed = true;
 				console.log("Expected", expected, "Got", got, second);

From d2bdcbc046a6d3eb63d946fbd008d258d57ae6a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 29 Jan 2012 13:29:59 +0100
Subject: [PATCH 193/450] RDF feeds now have a type of `rdf`

---
 lib/FeedHandler.js    | 2 +-
 tests/Feeds/03-rdf.js | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 6ec480b..ca6c856 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -56,7 +56,7 @@ FeedHandler.prototype.onend = function() {
 		} else{
 			childs = getElements("channel", feedRoot.children, true).children;
 
-			feed.type = feedRoot.name;
+			feed.type = feedRoot.name.substr(0, 3);
 			feed.id = "";
 			if(tmp = fetch("title", childs)) feed.title = tmp;
 			if(tmp = fetch("link", childs)) feed.link = tmp;
diff --git a/tests/Feeds/03-rdf.js b/tests/Feeds/03-rdf.js
index 3460444..96065b1 100644
--- a/tests/Feeds/03-rdf.js
+++ b/tests/Feeds/03-rdf.js
@@ -9,7 +9,7 @@ exports.options = {
 exports.html = require("fs").readFileSync(__dirname+"/../Documents/RDF_Example.xml").toString();
 
 exports.expected = {
-  "type": "rdf:RDF",
+  "type": "rdf",
   "id": "",
   "title": "craigslist | all community in SF bay area",
   "link": "http://sfbay.craigslist.org/ccc/",

From 9eebc264c4b99f1b3434b9491cdf7f385040dae4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 29 Jan 2012 14:13:54 +0100
Subject: [PATCH 194/450] Removed clutter from tests

---
 tests/00-runtests.js   | 1 -
 tests/02-feed.js       | 9 ++++++---
 tests/Feeds/01-rss.js  | 7 -------
 tests/Feeds/02-atom.js | 7 -------
 tests/Feeds/03-rdf.js  | 8 --------
 5 files changed, 6 insertions(+), 26 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index ab997ed..54f7fe0 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -13,7 +13,6 @@ function getSortedObject(obj){
 };
 
 function runTests(test){
-	var begin = Date.now();
 	//read files, load them, run them
 	fs.readdirSync(__dirname + test.dir
 	).map(function(file){
diff --git a/tests/02-feed.js b/tests/02-feed.js
index e2b0911..cd5dc56 100644
--- a/tests/02-feed.js
+++ b/tests/02-feed.js
@@ -1,7 +1,10 @@
 //Runs tests for feeds
 
 var helper = require("./test-helper.js"),
-	FeedHandler = require("../lib/FeedHandler.js");
+	FeedHandler = require("../lib/FeedHandler.js"),
+	parserOpts = {
+		xmlMode: true
+	};
 
 exports.dir = "/Feeds/";
 
@@ -9,6 +12,6 @@ exports.test = function(test, cb){
 	var handler = new FeedHandler(function(err, dom){
 		if(err) cb(err, 0); //return the error
 		else cb(null, dom);
-	}, test.options.handler);
-	helper.writeToParser(handler, test.options.parser, test.html);
+	});
+	helper.writeToParser(handler, parserOpts, test.html);
 };
\ No newline at end of file
diff --git a/tests/Feeds/01-rss.js b/tests/Feeds/01-rss.js
index 0af3e1a..b50a468 100644
--- a/tests/Feeds/01-rss.js
+++ b/tests/Feeds/01-rss.js
@@ -1,11 +1,4 @@
 exports.name = "RSS (2.0)";
-exports.options = {
-	handler: {},
-	parser: {
-		xmlMode: true
-	}
-};
-exports.type = "rss";
 exports.html = require("fs").readFileSync(__dirname+"/../Documents/RSS_Example.xml").toString();
 exports.expected = {
 	type: "rss",
diff --git a/tests/Feeds/02-atom.js b/tests/Feeds/02-atom.js
index a9c5c47..c986d73 100644
--- a/tests/Feeds/02-atom.js
+++ b/tests/Feeds/02-atom.js
@@ -1,11 +1,4 @@
 exports.name = "Atom (1.0)";
-exports.options = {
-	handler: {},
-	parser: {
-		xmlMode: true
-	}
-};
-exports.type = "rss";
 exports.html = require("fs").readFileSync(__dirname+"/../Documents/Atom_Example.xml").toString();
 exports.expected = {
 	type: "atom",
diff --git a/tests/Feeds/03-rdf.js b/tests/Feeds/03-rdf.js
index 96065b1..2c7383c 100644
--- a/tests/Feeds/03-rdf.js
+++ b/tests/Feeds/03-rdf.js
@@ -1,13 +1,5 @@
 exports.name = "RDF test";
-exports.options = {
-	handler: {},
-	parser: {
-		xmlMode: true
-	}
-};
-
 exports.html = require("fs").readFileSync(__dirname+"/../Documents/RDF_Example.xml").toString();
-
 exports.expected = {
   "type": "rdf",
   "id": "",

From e7cb57f87043b358b24a8193d9720c7e6ef3413c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 7 Feb 2012 19:53:44 +0100
Subject: [PATCH 195/450] Set prototype of emptyTags to null

Otherwise, tags named after properties of the object prototype would
have been found.
---
 lib/Parser.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Parser.js b/lib/Parser.js
index 7827f52..b2033aa 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -254,6 +254,7 @@ Parser.prototype._writeSpecial = function(rawData, lastTagSep){
 };
 
 var emptyTags = {
+	__proto__: null,
 	area: true,
 	base: true,
 	basefont: true,

From f406be9731952c436ebfe9721ea4c44fb87a32a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 8 Feb 2012 20:05:09 +0100
Subject: [PATCH 196/450] Added bench.js

taken from astro/node-expat
---
 README.md      |  32 ++++++++++------
 tests/bench.js | 100 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 121 insertions(+), 11 deletions(-)
 create mode 100644 tests/bench.js

diff --git a/README.md b/README.md
index f1aad76..cb32994 100644
--- a/README.md
+++ b/README.md
@@ -5,15 +5,6 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 ##Installing
 	npm install htmlparser2
 
-##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
-This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
-
-The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
-
-The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose) (if you really need it, for whatever reason that may be). 
-
-The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.
-
 ##Usage
 
 ```javascript
@@ -56,7 +47,26 @@ Read more about the DomHandler in the [wiki](https://github.com/FB55/node-htmlpa
 ##Parsing RSS/RDF/Atom Feeds
 
 ```javascript
-new htmlparser.FeedHandler(function (error, feed) {
+new htmlparser.FeedHandler(function(<error> error, <object> feed){
     ...
 });
-```
\ No newline at end of file
+```
+
+##Performance
+Using a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010):
+
+* [htmlparser](https://github.com/tautologistics/node-htmlparser): 51779 el/s
+* [sax.js](https://github.com/isaacs/sax-js): 53169 el/s
+* [node-expat](https://github.com/astro/node-expat): 103388 el/s
+* [htmlparser2](https://github.com/fb55/node-htmlparser): 118614 el/s
+
+The test may be found in `tests/bench.js`.
+
+##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
+This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
+
+The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
+
+The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose). 
+
+The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.
\ No newline at end of file
diff --git a/tests/bench.js b/tests/bench.js
new file mode 100644
index 0000000..1e7b599
--- /dev/null
+++ b/tests/bench.js
@@ -0,0 +1,100 @@
+/*
+var node_xml = require("node-xml");
+
+function NodeXmlParser() {
+    var parser = new node_xml.SaxParser(function(cb) { });
+    this.parse = function(s) {
+	parser.parseString(s);
+    };
+}
+
+var p = new NodeXmlParser();
+*//*
+var libxml = require("libxmljs");
+
+function LibXmlJsParser() {
+    var parser = new libxml.SaxPushParser(function(cb) { });
+    this.parse = function(s) {
+	parser.push(s, false);
+    };
+}
+
+var p = new LibXmlJsParser();
+*//*
+var sax = require('sax');
+
+function SaxParser() {
+    var parser = sax.parser();
+	this.parse = function(s) {
+	parser.write(s);
+	}
+}
+
+var p = new SaxParser();
+*//*
+var expat = require('node-expat');
+
+function ExpatParser() {
+    var parser = new expat.Parser();
+    this.parse = function(s) {
+	parser.parse(s, false);
+    };
+}
+
+var p = new ExpatParser();
+*//*
+var htmlparser = require('htmlparser');
+
+function HtmlParser() {
+    var handler = new htmlparser.DefaultHandler();
+    var parser = new htmlparser.Parser(handler);
+    this.parse = function(s) {
+    parser.parseComplete(s);
+    };
+}
+
+var p = new HtmlParser();
+*/
+var htmlparser2 = require('htmlparser2/lib/Parser.js');
+
+// provide callbacks
+// otherwise, parsing would be optimized
+var emptyCBs = {
+    onopentagname: function(){},
+    onattribute: function(){},
+    ontext: function(){},
+    onclosetag: function(){}
+};
+
+function HtmlParser2() {
+    var parser = new htmlparser2(emptyCBs);
+    this.parse = function(s) {
+    parser.write(s);
+    };
+}
+
+var p = new HtmlParser2();
+
+
+p.parse("<r>");
+var nEl = 0;
+(function d() {
+    p.parse("<foo bar='baz'>quux</foo>");
+    nEl++;
+    process.nextTick(d);
+})();
+
+var its =[];
+setInterval(function() {
+    console.log(nEl + " el/s");
+	its.push(nEl);
+    nEl = 0;
+}, 1e3);
+
+process.on('SIGINT', function () {
+	var average = its.reduce(function(average, v){
+		return average+v;
+	}) / its.length;
+	console.log("Average:", average, "el/s");
+	process.exit(0);
+});
\ No newline at end of file

From ebaf3a739fb33d9dc7ae875220abafdfe9cff38c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 16 Feb 2012 16:59:41 +0100
Subject: [PATCH 197/450] Added ProxyHandler

---
 lib/ProxyHandler.js | 17 +++++++++++++++++
 lib/index.js        |  4 ++++
 2 files changed, 21 insertions(+)
 create mode 100644 lib/ProxyHandler.js

diff --git a/lib/ProxyHandler.js b/lib/ProxyHandler.js
new file mode 100644
index 0000000..e67f36d
--- /dev/null
+++ b/lib/ProxyHandler.js
@@ -0,0 +1,17 @@
+var ProxyHandler = function(cbs){
+	if(cbs) this._cbs = cbs;
+};
+
+ProxyHandler.prototype._cbs = {};
+
+Object.keys(require("./").EVENTS).forEach(function(name){
+	ProxyHandler.prototype.__defineGetter__(name, function(){
+		return this._cbs[name];
+	});
+	ProxyHandler.prototype.__defineSetter__(name, function(value){
+		//allow functions to be overwritten
+		Object.defineProperty(this, name, {value: value});
+	});
+});
+
+module.exports = ProxyHandler;
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index f6e788e..621a06b 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -25,6 +25,10 @@ module.exports = {
 		defineProp(this, "WritableStream", {value:require("./WritableStream.js")});
 		return this.WritableStream;
 	},
+	get ProxyHandler(){
+		defineProp(this, "ProxyHandler", {value:require("./ProxyHandler.js")});
+		return this.ProxyHandler;
+	},
 	get DomUtils(){
 		defineProp(this, "DomUtils", {value:require("./DomUtils.js")});
 		return this.DomUtils;

From 6c3fbf2a748145a3426ee64b18223f07dfffb5a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 28 Feb 2012 21:19:36 +0100
Subject: [PATCH 198/450] Parser#_processOpenTag now takes a single argument

It does the name parsing itself
---
 lib/Parser.js | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index b2033aa..27a8a95 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -15,7 +15,7 @@ function Parser(cbs, options){
 
 //Regular expressions used for cleaning up and parsing (stateless)
 var _reAttrib = /\s([^\s\/]+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
-	_reTail = /\s|\/|$/;
+    _reTail = /\s|\/|$/;
 
 Parser.prototype._options = {
 	xmlMode: false, //Special behavior for script/style tags by default
@@ -203,7 +203,7 @@ Parser.prototype._parseTags = function(force){
 					);
 				}
 			}
-			else this._processOpenTag(this._parseTagName(elementData), elementData);
+			else this._processOpenTag(elementData);
 		}
 		else{
 			if(this._contentFlags !== 0){
@@ -283,7 +283,7 @@ Parser.prototype._processCloseTag = function(name){
 	}
 	//many browsers (eg. Safari, Chrome) convert </br> to <br>
 	else if(name === "br" && !this._options.xmlMode)
-		this._processOpenTag(name, "/");
+		this._processOpenTag(name + "/");
 };
 
 Parser.prototype._parseAttributes = function(data){
@@ -301,8 +301,10 @@ var parseAttributes = function(data){
 	return attrs;
 };
 
-Parser.prototype._processOpenTag = function(name, data){
-	var type = ElementType.Tag;
+Parser.prototype._processOpenTag = function(data){
+	var name = this._parseTagName(data),
+	    type = ElementType.Tag;
+	
 	if(this._options.xmlMode){ /*do nothing*/ }
 	else if(name === "script") type = ElementType.Script;
 	else if(name === "style")  type = ElementType.Style;

From 6ebf630eb763d4838bf865404db6bd64ba759f35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 28 Feb 2012 21:24:09 +0100
Subject: [PATCH 199/450] Use onopentagname and onattribute events in
 DomHandler

That ridiculous for-in-loop was removed!
---
 lib/DomHandler.js | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/DomHandler.js b/lib/DomHandler.js
index 3000afd..1f6eea3 100644
--- a/lib/DomHandler.js
+++ b/lib/DomHandler.js
@@ -60,20 +60,21 @@ DomHandler.prototype._addDomElement = function(element){
 	}
 };
 
-DomHandler.prototype.onopentag = function(name, attribs){
+DomHandler.prototype.onopentagname = function(name){
 	var element = {
 		type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,
 		name: name
 	};
-	//for some reason, an if doesn't work
-	for(var i in attribs){
-		element.attribs = attribs;
-		break;
-	}
 	this._addDomElement(element);
 	this._tagStack.push(element);
 };
 
+DomHandler.prototype.onattribute = function(name, value){
+	var element = this._tagStack[this._tagStack.length-1];
+	if(!("attribs" in element)) element.attribs = {};
+	element.attribs[name] = value;
+};
+
 DomHandler.prototype.ontext = function(data){
 	if(this._options.ignoreWhitespace && data.trim() === "") return;
 	this._addDomElement({

From 3f3c030945fc845fac5c8abe3221776a271d18cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 4 Mar 2012 15:50:54 +0100
Subject: [PATCH 200/450] Use JSON instead of JS files for describing tests

---
 tests/DOM/01-basic.js                       |   39 -
 tests/DOM/01-basic.json                     |   41 +
 tests/DOM/02-single_tag_1.js                |   20 -
 tests/DOM/02-single_tag_1.json              |   22 +
 tests/DOM/03-single_tag_2.js                |   20 -
 tests/DOM/03-single_tag_2.json              |   22 +
 tests/DOM/04-unescaped_in_script.js         |   27 -
 tests/DOM/04-unescaped_in_script.json       |   29 +
 tests/DOM/05-tags_in_comment.js             |   18 -
 tests/DOM/05-tags_in_comment.json           |   20 +
 tests/DOM/06-comment_in_script.js           |   18 -
 tests/DOM/06-comment_in_script.json         |   20 +
 tests/DOM/07-unescaped_in_style.js          |   17 -
 tests/DOM/07-unescaped_in_style.json        |   23 +
 tests/DOM/08-extra_spaces_in_tag.js         |   21 -
 tests/DOM/08-extra_spaces_in_tag.json       |   23 +
 tests/DOM/09-unquoted_attrib.js             |   21 -
 tests/DOM/09-unquoted_attrib.json           |   23 +
 tests/DOM/10-singular_attribute.js          |   16 -
 tests/DOM/10-singular_attribute.json        |   18 +
 tests/DOM/11-text_outside_tags.js           |   20 -
 tests/DOM/11-text_outside_tags.json         |   22 +
 tests/DOM/12-text_only.js                   |   12 -
 tests/DOM/12-text_only.json                 |   14 +
 tests/DOM/13-comment_in_text.js             |   20 -
 tests/DOM/13-comment_in_text.json           |   22 +
 tests/DOM/14-comment_in_text_in_script.js   |   26 -
 tests/DOM/14-comment_in_text_in_script.json |   28 +
 tests/DOM/15-non-verbose.js                 |   17 -
 tests/DOM/15-non-verbose.json               |   25 +
 tests/DOM/16-ignore_whitespace.js           |   38 -
 tests/DOM/16-ignore_whitespace.json         |   42 +
 tests/DOM/17-xml_namespace.js               |   18 -
 tests/DOM/17-xml_namespace.json             |   20 +
 tests/DOM/18-enforce_empty_tags.js          |   16 -
 tests/DOM/18-enforce_empty_tags.json        |   18 +
 tests/DOM/19-ignore_empty_tags.js           |   18 -
 tests/DOM/19-ignore_empty_tags.json         |   22 +
 tests/DOM/20-template_script_tags.js        |   13 -
 tests/DOM/20-template_script_tags.json      |   23 +
 tests/DOM/21-conditional_comments.js        |   16 -
 tests/DOM/21-conditional_comments.json      |   18 +
 tests/DOM/22-lowercase_tags.js              |   39 -
 tests/DOM/22-lowercase_tags.json            |   43 +
 tests/Events/01-simple.js                   |   39 -
 tests/Events/01-simple.json                 |   44 +
 tests/Events/02-template.js                 |   57 -
 tests/Events/02-template.json               |   62 ++
 tests/Events/03-lowercase_tags.js           |   39 -
 tests/Events/03-lowercase_tags.json         |   46 +
 tests/Events/04-cdata.js                    |   80 --
 tests/Events/04-cdata.json                  |   85 ++
 tests/Stream/01-basic.j.json                |   83 ++
 tests/Stream/01-basic.js                    |   82 --
 tests/Stream/02-RSS.j.json                  | 1093 +++++++++++++++++++
 tests/Stream/02-RSS.js                      | 1092 ------------------
 56 files changed, 1951 insertions(+), 1859 deletions(-)
 delete mode 100644 tests/DOM/01-basic.js
 create mode 100644 tests/DOM/01-basic.json
 delete mode 100644 tests/DOM/02-single_tag_1.js
 create mode 100644 tests/DOM/02-single_tag_1.json
 delete mode 100644 tests/DOM/03-single_tag_2.js
 create mode 100644 tests/DOM/03-single_tag_2.json
 delete mode 100644 tests/DOM/04-unescaped_in_script.js
 create mode 100644 tests/DOM/04-unescaped_in_script.json
 delete mode 100644 tests/DOM/05-tags_in_comment.js
 create mode 100644 tests/DOM/05-tags_in_comment.json
 delete mode 100644 tests/DOM/06-comment_in_script.js
 create mode 100644 tests/DOM/06-comment_in_script.json
 delete mode 100644 tests/DOM/07-unescaped_in_style.js
 create mode 100644 tests/DOM/07-unescaped_in_style.json
 delete mode 100644 tests/DOM/08-extra_spaces_in_tag.js
 create mode 100644 tests/DOM/08-extra_spaces_in_tag.json
 delete mode 100644 tests/DOM/09-unquoted_attrib.js
 create mode 100644 tests/DOM/09-unquoted_attrib.json
 delete mode 100644 tests/DOM/10-singular_attribute.js
 create mode 100644 tests/DOM/10-singular_attribute.json
 delete mode 100644 tests/DOM/11-text_outside_tags.js
 create mode 100644 tests/DOM/11-text_outside_tags.json
 delete mode 100644 tests/DOM/12-text_only.js
 create mode 100644 tests/DOM/12-text_only.json
 delete mode 100644 tests/DOM/13-comment_in_text.js
 create mode 100644 tests/DOM/13-comment_in_text.json
 delete mode 100644 tests/DOM/14-comment_in_text_in_script.js
 create mode 100644 tests/DOM/14-comment_in_text_in_script.json
 delete mode 100644 tests/DOM/15-non-verbose.js
 create mode 100644 tests/DOM/15-non-verbose.json
 delete mode 100644 tests/DOM/16-ignore_whitespace.js
 create mode 100644 tests/DOM/16-ignore_whitespace.json
 delete mode 100644 tests/DOM/17-xml_namespace.js
 create mode 100644 tests/DOM/17-xml_namespace.json
 delete mode 100644 tests/DOM/18-enforce_empty_tags.js
 create mode 100644 tests/DOM/18-enforce_empty_tags.json
 delete mode 100644 tests/DOM/19-ignore_empty_tags.js
 create mode 100644 tests/DOM/19-ignore_empty_tags.json
 delete mode 100644 tests/DOM/20-template_script_tags.js
 create mode 100644 tests/DOM/20-template_script_tags.json
 delete mode 100644 tests/DOM/21-conditional_comments.js
 create mode 100644 tests/DOM/21-conditional_comments.json
 delete mode 100644 tests/DOM/22-lowercase_tags.js
 create mode 100644 tests/DOM/22-lowercase_tags.json
 delete mode 100644 tests/Events/01-simple.js
 create mode 100644 tests/Events/01-simple.json
 delete mode 100644 tests/Events/02-template.js
 create mode 100644 tests/Events/02-template.json
 delete mode 100644 tests/Events/03-lowercase_tags.js
 create mode 100644 tests/Events/03-lowercase_tags.json
 delete mode 100644 tests/Events/04-cdata.js
 create mode 100644 tests/Events/04-cdata.json
 create mode 100644 tests/Stream/01-basic.j.json
 delete mode 100644 tests/Stream/01-basic.js
 create mode 100644 tests/Stream/02-RSS.j.json
 delete mode 100644 tests/Stream/02-RSS.js

diff --git a/tests/DOM/01-basic.js b/tests/DOM/01-basic.js
deleted file mode 100644
index beaa970..0000000
--- a/tests/DOM/01-basic.js
+++ /dev/null
@@ -1,39 +0,0 @@
-exports.name = "Basic test";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = require("fs").readFileSync(__dirname + "/../Documents/Basic.html").toString();
-exports.expected = [
-  {
-    "name": "!DOCTYPE",
-    "data": "!DOCTYPE html",
-    "type": "directive"
-  },
-  {
-    "type": "tag",
-    "name": "html",
-    "children": [
-      {
-        "type": "tag",
-        "name": "title",
-        "children": [
-          {
-            "data": "The Title",
-            "type": "text"
-          }
-        ]
-      },
-      {
-        "type": "tag",
-        "name": "body",
-        "children": [
-          {
-            "data": "Hello world",
-            "type": "text"
-          }
-        ]
-      }
-    ]
-  }
-]
diff --git a/tests/DOM/01-basic.json b/tests/DOM/01-basic.json
new file mode 100644
index 0000000..7453c30
--- /dev/null
+++ b/tests/DOM/01-basic.json
@@ -0,0 +1,41 @@
+{
+  "name": "Basic test",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>",
+  "expected": [
+    {
+      "name": "!DOCTYPE",
+      "data": "!DOCTYPE html",
+      "type": "directive"
+    },
+    {
+      "type": "tag",
+      "name": "html",
+      "children": [
+        {
+          "type": "tag",
+          "name": "title",
+          "children": [
+            {
+              "data": "The Title",
+              "type": "text"
+            }
+          ]
+        },
+        {
+          "type": "tag",
+          "name": "body",
+          "children": [
+            {
+              "data": "Hello world",
+              "type": "text"
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/02-single_tag_1.js b/tests/DOM/02-single_tag_1.js
deleted file mode 100644
index 12ecc38..0000000
--- a/tests/DOM/02-single_tag_1.js
+++ /dev/null
@@ -1,20 +0,0 @@
-exports.name = "Single Tag 1";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<br>text</br>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "br"
-  },
-  {
-    "data": "text",
-    "type": "text"
-  },
-  {
-    "type": "tag",
-    "name": "br"
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/02-single_tag_1.json b/tests/DOM/02-single_tag_1.json
new file mode 100644
index 0000000..4efff6a
--- /dev/null
+++ b/tests/DOM/02-single_tag_1.json
@@ -0,0 +1,22 @@
+{
+  "name": "Single Tag 1",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<br>text</br>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "br"
+    },
+    {
+      "data": "text",
+      "type": "text"
+    },
+    {
+      "type": "tag",
+      "name": "br"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/03-single_tag_2.js b/tests/DOM/03-single_tag_2.js
deleted file mode 100644
index eaeec64..0000000
--- a/tests/DOM/03-single_tag_2.js
+++ /dev/null
@@ -1,20 +0,0 @@
-exports.name = "Single Tag 2";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<br>text<br>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "br"
-  },
-  {
-    "data": "text",
-    "type": "text"
-  },
-  {
-    "type": "tag",
-    "name": "br"
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/03-single_tag_2.json b/tests/DOM/03-single_tag_2.json
new file mode 100644
index 0000000..e7b23b8
--- /dev/null
+++ b/tests/DOM/03-single_tag_2.json
@@ -0,0 +1,22 @@
+{
+  "name": "Single Tag 2",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<br>text<br>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "br"
+    },
+    {
+      "data": "text",
+      "type": "text"
+    },
+    {
+      "type": "tag",
+      "name": "br"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/04-unescaped_in_script.js b/tests/DOM/04-unescaped_in_script.js
deleted file mode 100644
index 5fdefc4..0000000
--- a/tests/DOM/04-unescaped_in_script.js
+++ /dev/null
@@ -1,27 +0,0 @@
-exports.name = "Unescaped chars in script";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>";
-exports.expected = [
-  {
-    'type': 'tag',
-    'name': 'head',
-    'children': [
-      {
-        'type': 'script',
-        'name': 'script',
-        'attribs': {
-          'language': 'Javascript'
-        },
-        'children': [
-          {
-            'data': 'var foo = "<bar>"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = "<<>>>><<";',
-            'type': 'text'
-          }
-        ]
-      }
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/04-unescaped_in_script.json b/tests/DOM/04-unescaped_in_script.json
new file mode 100644
index 0000000..029d202
--- /dev/null
+++ b/tests/DOM/04-unescaped_in_script.json
@@ -0,0 +1,29 @@
+{
+  "name": "Unescaped chars in script",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "head",
+      "children": [
+        {
+          "type": "script",
+          "name": "script",
+          "attribs": {
+            "language": "Javascript"
+          },
+          "children": [
+            {
+              "data": "var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";",
+              "type": "text"
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/05-tags_in_comment.js b/tests/DOM/05-tags_in_comment.js
deleted file mode 100644
index e0c770e..0000000
--- a/tests/DOM/05-tags_in_comment.js
+++ /dev/null
@@ -1,18 +0,0 @@
-exports.name = "Special char in comment";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<head><!-- commented out tags <title>Test</title>--></head>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "head",
-    "children": [
-      {
-        "data": " commented out tags <title>Test</title>",
-        "type": "comment"
-      }
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/05-tags_in_comment.json b/tests/DOM/05-tags_in_comment.json
new file mode 100644
index 0000000..577d23b
--- /dev/null
+++ b/tests/DOM/05-tags_in_comment.json
@@ -0,0 +1,20 @@
+{
+  "name": "Special char in comment",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<head><!-- commented out tags <title>Test</title>--></head>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "head",
+      "children": [
+        {
+          "data": " commented out tags <title>Test</title>",
+          "type": "comment"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/06-comment_in_script.js b/tests/DOM/06-comment_in_script.js
deleted file mode 100644
index 6022b91..0000000
--- a/tests/DOM/06-comment_in_script.js
+++ /dev/null
@@ -1,18 +0,0 @@
-exports.name = "Script source in comment";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<script><!--var foo = 1;--></script>";
-exports.expected = [
-  {
-    "type": "script",
-    "name": "script",
-    "children": [
-      {
-        "data": "var foo = 1;",
-        "type": "comment"
-      }
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/06-comment_in_script.json b/tests/DOM/06-comment_in_script.json
new file mode 100644
index 0000000..a4246f4
--- /dev/null
+++ b/tests/DOM/06-comment_in_script.json
@@ -0,0 +1,20 @@
+{
+  "name": "Script source in comment",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<script><!--var foo = 1;--></script>",
+  "expected": [
+    {
+      "type": "script",
+      "name": "script",
+      "children": [
+        {
+          "data": "var foo = 1;",
+          "type": "comment"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/07-unescaped_in_style.js b/tests/DOM/07-unescaped_in_style.js
deleted file mode 100644
index 3784336..0000000
--- a/tests/DOM/07-unescaped_in_style.js
+++ /dev/null
@@ -1,17 +0,0 @@
-exports.name = "Unescaped chars in style";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<style type=\"text/css\">\n body > p\n	{ font-weight: bold; }</style>";
-exports.expected =
-[ { type: 'style'
-  , name: 'style'
-  , attribs: { type: 'text/css' }
-  , children:
-     [ { data: '\n body > p\n	{ font-weight: bold; }'
-       , type: 'text'
-       }
-     ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/07-unescaped_in_style.json b/tests/DOM/07-unescaped_in_style.json
new file mode 100644
index 0000000..d6bf9fb
--- /dev/null
+++ b/tests/DOM/07-unescaped_in_style.json
@@ -0,0 +1,23 @@
+{
+  "name": "Unescaped chars in style",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<style type=\"text/css\">\n body > p\n\t{ font-weight: bold; }</style>",
+  "expected": [
+    {
+      "type": "style",
+      "name": "style",
+      "attribs": {
+        "type": "text/css"
+      },
+      "children": [
+        {
+          "data": "\n body > p\n\t{ font-weight: bold; }",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/08-extra_spaces_in_tag.js b/tests/DOM/08-extra_spaces_in_tag.js
deleted file mode 100644
index 8b6cda5..0000000
--- a/tests/DOM/08-extra_spaces_in_tag.js
+++ /dev/null
@@ -1,21 +0,0 @@
-exports.name = "Extra spaces in tag";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<\n font\t\n size='14' \n>the text<\n /	\nfont	 \n>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "font",
-    "attribs": {
-      "size": "14"
-    },
-    "children": [
-      {
-        "data": "the text",
-        "type": "text"
-      }
-    ]
-  }
-];
diff --git a/tests/DOM/08-extra_spaces_in_tag.json b/tests/DOM/08-extra_spaces_in_tag.json
new file mode 100644
index 0000000..78b30f4
--- /dev/null
+++ b/tests/DOM/08-extra_spaces_in_tag.json
@@ -0,0 +1,23 @@
+{
+  "name": "Extra spaces in tag",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<\n font\t\n size='14' \n>the text<\n /\t\nfont\t \n>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "font",
+      "attribs": {
+        "size": "14"
+      },
+      "children": [
+        {
+          "data": "the text",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/09-unquoted_attrib.js b/tests/DOM/09-unquoted_attrib.js
deleted file mode 100644
index c787422..0000000
--- a/tests/DOM/09-unquoted_attrib.js
+++ /dev/null
@@ -1,21 +0,0 @@
-exports.name = "Unquoted attributes";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<font size= 14>the text</font>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "font",
-    "attribs": {
-      "size": "14"
-    },
-    "children": [
-      {
-        "data": "the text",
-        "type": "text"
-      }
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/09-unquoted_attrib.json b/tests/DOM/09-unquoted_attrib.json
new file mode 100644
index 0000000..ae5f44c
--- /dev/null
+++ b/tests/DOM/09-unquoted_attrib.json
@@ -0,0 +1,23 @@
+{
+  "name": "Unquoted attributes",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<font size= 14>the text</font>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "font",
+      "attribs": {
+        "size": "14"
+      },
+      "children": [
+        {
+          "data": "the text",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/10-singular_attribute.js b/tests/DOM/10-singular_attribute.js
deleted file mode 100644
index af10053..0000000
--- a/tests/DOM/10-singular_attribute.js
+++ /dev/null
@@ -1,16 +0,0 @@
-exports.name = "Singular attribute";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<option value='foo' selected>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "option",
-    "attribs": {
-      "value": "foo",
-      "selected": ""
-    }
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/10-singular_attribute.json b/tests/DOM/10-singular_attribute.json
new file mode 100644
index 0000000..6de1ef2
--- /dev/null
+++ b/tests/DOM/10-singular_attribute.json
@@ -0,0 +1,18 @@
+{
+  "name": "Singular attribute",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<option value='foo' selected>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "option",
+      "attribs": {
+        "value": "foo",
+        "selected": ""
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/11-text_outside_tags.js b/tests/DOM/11-text_outside_tags.js
deleted file mode 100644
index d544b23..0000000
--- a/tests/DOM/11-text_outside_tags.js
+++ /dev/null
@@ -1,20 +0,0 @@
-exports.name = "Text outside tags";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "Line one\n<br>\nline two";
-exports.expected = [
-  {
-    "data": "Line one\n",
-    "type": "text"
-  },
-  {
-    "type": "tag",
-    "name": "br"
-  },
-  {
-    "data": "\nline two",
-    "type": "text"
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/11-text_outside_tags.json b/tests/DOM/11-text_outside_tags.json
new file mode 100644
index 0000000..234821f
--- /dev/null
+++ b/tests/DOM/11-text_outside_tags.json
@@ -0,0 +1,22 @@
+{
+  "name": "Text outside tags",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "Line one\n<br>\nline two",
+  "expected": [
+    {
+      "data": "Line one\n",
+      "type": "text"
+    },
+    {
+      "type": "tag",
+      "name": "br"
+    },
+    {
+      "data": "\nline two",
+      "type": "text"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/12-text_only.js b/tests/DOM/12-text_only.js
deleted file mode 100644
index 45d774f..0000000
--- a/tests/DOM/12-text_only.js
+++ /dev/null
@@ -1,12 +0,0 @@
-exports.name = "Only text";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "this is the text";
-exports.expected = [
-  {
-    "data": "this is the text",
-    "type": "text"
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/12-text_only.json b/tests/DOM/12-text_only.json
new file mode 100644
index 0000000..f2f8610
--- /dev/null
+++ b/tests/DOM/12-text_only.json
@@ -0,0 +1,14 @@
+{
+  "name": "Only text",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "this is the text",
+  "expected": [
+    {
+      "data": "this is the text",
+      "type": "text"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/13-comment_in_text.js b/tests/DOM/13-comment_in_text.js
deleted file mode 100644
index 46bd94d..0000000
--- a/tests/DOM/13-comment_in_text.js
+++ /dev/null
@@ -1,20 +0,0 @@
-exports.name = "Comment within text";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "this is <!-- the comment --> the text";
-exports.expected = [
-  {
-    "data": "this is ",
-    "type": "text"
-  },
-  {
-    "data": " the comment ",
-    "type": "comment"
-  },
-  {
-    "data": " the text",
-    "type": "text"
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/13-comment_in_text.json b/tests/DOM/13-comment_in_text.json
new file mode 100644
index 0000000..bc47305
--- /dev/null
+++ b/tests/DOM/13-comment_in_text.json
@@ -0,0 +1,22 @@
+{
+  "name": "Comment within text",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "this is <!-- the comment --> the text",
+  "expected": [
+    {
+      "data": "this is ",
+      "type": "text"
+    },
+    {
+      "data": " the comment ",
+      "type": "comment"
+    },
+    {
+      "data": " the text",
+      "type": "text"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/14-comment_in_text_in_script.js b/tests/DOM/14-comment_in_text_in_script.js
deleted file mode 100644
index c4fff65..0000000
--- a/tests/DOM/14-comment_in_text_in_script.js
+++ /dev/null
@@ -1,26 +0,0 @@
-exports.name = "Comment within text within script";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<script>this is <!-- the comment --> the text</script>";
-exports.expected = [
-  {
-    "type": "script",
-    "name": "script",
-    "children": [
-      {
-        "data": "this is ",
-        "type": "text"
-      },
-      {
-        "data": " the comment ",
-        "type": "comment"
-      },
-      {
-        "data": " the text",
-        "type": "text"
-      }
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/14-comment_in_text_in_script.json b/tests/DOM/14-comment_in_text_in_script.json
new file mode 100644
index 0000000..e573881
--- /dev/null
+++ b/tests/DOM/14-comment_in_text_in_script.json
@@ -0,0 +1,28 @@
+{
+  "name": "Comment within text within script",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<script>this is <!-- the comment --> the text</script>",
+  "expected": [
+    {
+      "type": "script",
+      "name": "script",
+      "children": [
+        {
+          "data": "this is ",
+          "type": "text"
+        },
+        {
+          "data": " the comment ",
+          "type": "comment"
+        },
+        {
+          "data": " the text",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/15-non-verbose.js b/tests/DOM/15-non-verbose.js
deleted file mode 100644
index 9d5a30f..0000000
--- a/tests/DOM/15-non-verbose.js
+++ /dev/null
@@ -1,17 +0,0 @@
-exports.name = "Option 'verbose' set to 'false'";
-exports.options = {
-	  handler: { verbose: false }
-	, parser: {}
-};
-exports.html = "<\n font	\n size='14' \n>the text<\n /	\nfont	 \n>";
-exports.expected =
-[ { type: 'tag'
-  , name: 'font'
-  , attribs: { size: '14' }
-  , children:
-     [ { data: 'the text'
-       , type: 'text'
-       }
-     ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/15-non-verbose.json b/tests/DOM/15-non-verbose.json
new file mode 100644
index 0000000..e887368
--- /dev/null
+++ b/tests/DOM/15-non-verbose.json
@@ -0,0 +1,25 @@
+{
+  "name": "Option 'verbose' set to 'false'",
+  "options": {
+    "handler": {
+      "verbose": false
+    },
+    "parser": {}
+  },
+  "html": "<\n font\t\n size='14' \n>the text<\n /\t\nfont\t \n>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "font",
+      "attribs": {
+        "size": "14"
+      },
+      "children": [
+        {
+          "data": "the text",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/16-ignore_whitespace.js b/tests/DOM/16-ignore_whitespace.js
deleted file mode 100644
index 9049bd8..0000000
--- a/tests/DOM/16-ignore_whitespace.js
+++ /dev/null
@@ -1,38 +0,0 @@
-exports.name = "Options 'ignoreWhitespace' set to 'true'";
-exports.options = {
-	  handler: { ignoreWhitespace: true }
-	, parser: {}
-};
-exports.html = "Line one\n<br> \t\n<br>\nline two<font>\n <br> x </font>";
-exports.expected = [
-  {
-    "data": "Line one\n",
-    "type": "text"
-  },
-  {
-    "type": "tag",
-    "name": "br"
-  },
-  {
-    "type": "tag",
-    "name": "br"
-  },
-  {
-    "data": "\nline two",
-    "type": "text"
-  },
-  {
-    "type": "tag",
-    "name": "font",
-    "children": [
-      {
-        "type": "tag",
-        "name": "br"
-      },
-      {
-        "data": " x ",
-        "type": "text"
-      }
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/16-ignore_whitespace.json b/tests/DOM/16-ignore_whitespace.json
new file mode 100644
index 0000000..ade32f3
--- /dev/null
+++ b/tests/DOM/16-ignore_whitespace.json
@@ -0,0 +1,42 @@
+{
+  "name": "Options 'ignoreWhitespace' set to 'true'",
+  "options": {
+    "handler": {
+      "ignoreWhitespace": true
+    },
+    "parser": {}
+  },
+  "html": "Line one\n<br> \t\n<br>\nline two<font>\n <br> x </font>",
+  "expected": [
+    {
+      "data": "Line one\n",
+      "type": "text"
+    },
+    {
+      "type": "tag",
+      "name": "br"
+    },
+    {
+      "type": "tag",
+      "name": "br"
+    },
+    {
+      "data": "\nline two",
+      "type": "text"
+    },
+    {
+      "type": "tag",
+      "name": "font",
+      "children": [
+        {
+          "type": "tag",
+          "name": "br"
+        },
+        {
+          "data": " x ",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/17-xml_namespace.js b/tests/DOM/17-xml_namespace.js
deleted file mode 100644
index 2789a6e..0000000
--- a/tests/DOM/17-xml_namespace.js
+++ /dev/null
@@ -1,18 +0,0 @@
-exports.name = "XML Namespace";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<ns:tag>text</ns:tag>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "ns:tag",
-    "children": [
-      {
-        "data": "text",
-        "type": "text"
-      }
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/17-xml_namespace.json b/tests/DOM/17-xml_namespace.json
new file mode 100644
index 0000000..4302144
--- /dev/null
+++ b/tests/DOM/17-xml_namespace.json
@@ -0,0 +1,20 @@
+{
+  "name": "XML Namespace",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<ns:tag>text</ns:tag>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "ns:tag",
+      "children": [
+        {
+          "data": "text",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/18-enforce_empty_tags.js b/tests/DOM/18-enforce_empty_tags.js
deleted file mode 100644
index 131a353..0000000
--- a/tests/DOM/18-enforce_empty_tags.js
+++ /dev/null
@@ -1,16 +0,0 @@
-exports.name = "Enforce empty tags";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<link>text</link>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "link"
-  },
-  {
-    "data": "text",
-    "type": "text"
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/18-enforce_empty_tags.json b/tests/DOM/18-enforce_empty_tags.json
new file mode 100644
index 0000000..ed5c44c
--- /dev/null
+++ b/tests/DOM/18-enforce_empty_tags.json
@@ -0,0 +1,18 @@
+{
+  "name": "Enforce empty tags",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<link>text</link>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "link"
+    },
+    {
+      "data": "text",
+      "type": "text"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/19-ignore_empty_tags.js b/tests/DOM/19-ignore_empty_tags.js
deleted file mode 100644
index 3445884..0000000
--- a/tests/DOM/19-ignore_empty_tags.js
+++ /dev/null
@@ -1,18 +0,0 @@
-exports.name = "Ignore empty tags (xml mode)";
-exports.options = {
-	  handler: {}
-	, parser: {xmlMode:true}
-};
-exports.html = "<link>text</link>";
-exports.expected = [
-  {
-    "type": "tag",
-    "name": "link",
-    "children": [
-      {
-        "data": "text",
-        "type": "text"
-      }
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/19-ignore_empty_tags.json b/tests/DOM/19-ignore_empty_tags.json
new file mode 100644
index 0000000..97270eb
--- /dev/null
+++ b/tests/DOM/19-ignore_empty_tags.json
@@ -0,0 +1,22 @@
+{
+  "name": "Ignore empty tags (xml mode)",
+  "options": {
+    "handler": {},
+    "parser": {
+      "xmlMode": true
+    }
+  },
+  "html": "<link>text</link>",
+  "expected": [
+    {
+      "type": "tag",
+      "name": "link",
+      "children": [
+        {
+          "data": "text",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/20-template_script_tags.js b/tests/DOM/20-template_script_tags.js
deleted file mode 100644
index e6b7b74..0000000
--- a/tests/DOM/20-template_script_tags.js
+++ /dev/null
@@ -1,13 +0,0 @@
-exports.name = "Template script tags";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<script type=\"text/template\"><h1>Heading1</h1></script>";
-exports.expected = [ {
-    type: 'script',
-    name: 'script',
-    attribs: { type: 'text/template' },
-    children: 
-     [ { data: '<h1>Heading1</h1>',
-         type: 'text' } ] } ];
\ No newline at end of file
diff --git a/tests/DOM/20-template_script_tags.json b/tests/DOM/20-template_script_tags.json
new file mode 100644
index 0000000..90a3709
--- /dev/null
+++ b/tests/DOM/20-template_script_tags.json
@@ -0,0 +1,23 @@
+{
+  "name": "Template script tags",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
+  "expected": [
+    {
+      "type": "script",
+      "name": "script",
+      "attribs": {
+        "type": "text/template"
+      },
+      "children": [
+        {
+          "data": "<h1>Heading1</h1>",
+          "type": "text"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/21-conditional_comments.js b/tests/DOM/21-conditional_comments.js
deleted file mode 100644
index 583981e..0000000
--- a/tests/DOM/21-conditional_comments.js
+++ /dev/null
@@ -1,16 +0,0 @@
-exports.name = "Conditional comments";
-exports.options = {
-	  handler: {}
-	, parser: {}
-};
-exports.html = "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->";
-exports.expected = [
-  {
-    "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
-    "type": "comment"
-  },
-  {
-    "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
-    "type": "comment"
-  }
-];
\ No newline at end of file
diff --git a/tests/DOM/21-conditional_comments.json b/tests/DOM/21-conditional_comments.json
new file mode 100644
index 0000000..56cf232
--- /dev/null
+++ b/tests/DOM/21-conditional_comments.json
@@ -0,0 +1,18 @@
+{
+  "name": "Conditional comments",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->",
+  "expected": [
+    {
+      "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
+      "type": "comment"
+    },
+    {
+      "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
+      "type": "comment"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/DOM/22-lowercase_tags.js b/tests/DOM/22-lowercase_tags.js
deleted file mode 100644
index 0bf07c8..0000000
--- a/tests/DOM/22-lowercase_tags.js
+++ /dev/null
@@ -1,39 +0,0 @@
-exports.name = "Basic test";
-exports.options = {
-	  handler: {}
-	, parser: {lowerCaseTags:true}
-};
-exports.html = "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>";
-exports.expected = [
-  {
-    "name": "!doctype",
-    "data": "!DOCTYPE html",
-    "type": "directive"
-  },
-  {
-    "type": "tag",
-    "name": "html",
-    "children": [
-      {
-        "type": "tag",
-        "name": "title",
-        "children": [
-          {
-            "data": "The Title",
-            "type": "text"
-          }
-        ]
-      },
-      {
-        "type": "tag",
-        "name": "body",
-        "children": [
-          {
-            "data": "Hello world",
-            "type": "text"
-          }
-        ]
-      }
-    ]
-  }
-]
diff --git a/tests/DOM/22-lowercase_tags.json b/tests/DOM/22-lowercase_tags.json
new file mode 100644
index 0000000..b619ffc
--- /dev/null
+++ b/tests/DOM/22-lowercase_tags.json
@@ -0,0 +1,43 @@
+{
+  "name": "Basic test",
+  "options": {
+    "handler": {},
+    "parser": {
+      "lowerCaseTags": true
+    }
+  },
+  "html": "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>",
+  "expected": [
+    {
+      "name": "!doctype",
+      "data": "!DOCTYPE html",
+      "type": "directive"
+    },
+    {
+      "type": "tag",
+      "name": "html",
+      "children": [
+        {
+          "type": "tag",
+          "name": "title",
+          "children": [
+            {
+              "data": "The Title",
+              "type": "text"
+            }
+          ]
+        },
+        {
+          "type": "tag",
+          "name": "body",
+          "children": [
+            {
+              "data": "Hello world",
+              "type": "text"
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Events/01-simple.js b/tests/Events/01-simple.js
deleted file mode 100644
index e66aace..0000000
--- a/tests/Events/01-simple.js
+++ /dev/null
@@ -1,39 +0,0 @@
-exports.name = "simple";
-exports.options = {handler: {}, parser: {}};
-exports.html = "<h1 class=test>adsf</h1>";
-exports.expected = [
-  {
-    "event": "opentagname",
-    "data": [
-      "h1"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "h1",
-      {
-        "class": "test"
-      }
-    ]
-  },
-  {
-    "event": "attribute",
-    "data": [
-      "class",
-      "test"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "adsf"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "h1"
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/Events/01-simple.json b/tests/Events/01-simple.json
new file mode 100644
index 0000000..ff469e7
--- /dev/null
+++ b/tests/Events/01-simple.json
@@ -0,0 +1,44 @@
+{
+  "name": "simple",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<h1 class=test>adsf</h1>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "h1"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "h1",
+        {
+          "class": "test"
+        }
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "adsf"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "h1"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Events/02-template.js b/tests/Events/02-template.js
deleted file mode 100644
index 0210fe0..0000000
--- a/tests/Events/02-template.js
+++ /dev/null
@@ -1,57 +0,0 @@
-exports.name = "Template script tags";
-exports.options = {handler: {}, parser: {}};
-exports.html = "<script type=\"text/template\"><h1>Heading1</h1></script>";
-exports.expected = [
-  {
-    "event": "opentagname",
-    "data": [
-      "script"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "script",
-      {
-        "type": "text/template"
-      }
-    ]
-  },
-  {
-    "event": "attribute",
-    "data": [
-      "type",
-      "text/template"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "<h1"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      ">Heading1"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "</h1"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      ">"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "script"
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/Events/02-template.json b/tests/Events/02-template.json
new file mode 100644
index 0000000..37348da
--- /dev/null
+++ b/tests/Events/02-template.json
@@ -0,0 +1,62 @@
+{
+  "name": "Template script tags",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "script",
+        {
+          "type": "text/template"
+        }
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "type",
+        "text/template"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<h1"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        ">Heading1"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "</h1"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        ">"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "script"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Events/03-lowercase_tags.js b/tests/Events/03-lowercase_tags.js
deleted file mode 100644
index 8e8670b..0000000
--- a/tests/Events/03-lowercase_tags.js
+++ /dev/null
@@ -1,39 +0,0 @@
-exports.name = "Lowercase tags";
-exports.options = {handler: {}, parser: {lowerCaseTags:true}};
-exports.html = "<H1 class=test>adsf</H1>";
-exports.expected = [
-  {
-    "event": "opentagname",
-    "data": [
-      "h1"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "h1",
-      {
-        "class": "test"
-      }
-    ]
-  },
-  {
-    "event": "attribute",
-    "data": [
-      "class",
-      "test"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "adsf"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "h1"
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/Events/03-lowercase_tags.json b/tests/Events/03-lowercase_tags.json
new file mode 100644
index 0000000..56763ad
--- /dev/null
+++ b/tests/Events/03-lowercase_tags.json
@@ -0,0 +1,46 @@
+{
+  "name": "Lowercase tags",
+  "options": {
+    "handler": {},
+    "parser": {
+      "lowerCaseTags": true
+    }
+  },
+  "html": "<H1 class=test>adsf</H1>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "h1"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "h1",
+        {
+          "class": "test"
+        }
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "adsf"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "h1"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Events/04-cdata.js b/tests/Events/04-cdata.js
deleted file mode 100644
index b338a69..0000000
--- a/tests/Events/04-cdata.js
+++ /dev/null
@@ -1,80 +0,0 @@
-exports.name = "CDATA";
-exports.options = {handler: {}, parser: {}};
-exports.html = "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag>";
-exports.expected = [
-  {
-    "event": "opentagname",
-    "data": [
-      "tag"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "tag",
-      {}
-    ]
-  },
-  {
-    "event": "cdatastart",
-    "data": []
-  },
-  {
-    "event": "text",
-    "data": [
-      " asdf >"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "<"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "asdf>"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "<"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "/adsf>"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "<"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      ">"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      " fo"
-    ]
-  },
-  {
-    "event": "cdataend",
-    "data": []
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "tag"
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/Events/04-cdata.json b/tests/Events/04-cdata.json
new file mode 100644
index 0000000..1ea9f47
--- /dev/null
+++ b/tests/Events/04-cdata.json
@@ -0,0 +1,85 @@
+{
+  "name": "CDATA",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "tag"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "tag",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        " asdf >"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "asdf>"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "/adsf>"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        ">"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        " fo"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "tag"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Stream/01-basic.j.json b/tests/Stream/01-basic.j.json
new file mode 100644
index 0000000..9fbe1eb
--- /dev/null
+++ b/tests/Stream/01-basic.j.json
@@ -0,0 +1,83 @@
+{
+  "name": "Basic html",
+  "options": {},
+  "file": "/Documents/Basic.html",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "!DOCTYPE",
+        "!DOCTYPE html"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "html",
+        {}
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "The Title"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "body"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "body",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Hello world"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "body"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "html"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Stream/01-basic.js b/tests/Stream/01-basic.js
deleted file mode 100644
index c86137b..0000000
--- a/tests/Stream/01-basic.js
+++ /dev/null
@@ -1,82 +0,0 @@
-exports.name = "Basic html";
-exports.options = {};
-
-exports.file = "/Documents/Basic.html";
-exports.expected = [
-  {
-    "event": "processinginstruction",
-    "data": [
-      "!DOCTYPE",
-      "!DOCTYPE html"
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "html"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "html",
-      {}
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "title",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "The Title"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "body"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "body",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Hello world"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "body"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "html"
-    ]
-  }
-];
\ No newline at end of file
diff --git a/tests/Stream/02-RSS.j.json b/tests/Stream/02-RSS.j.json
new file mode 100644
index 0000000..aa04734
--- /dev/null
+++ b/tests/Stream/02-RSS.j.json
@@ -0,0 +1,1093 @@
+{
+  "name": "RSS feed",
+  "options": {},
+  "file": "/Documents/RSS_Example.xml",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "?xml",
+        "?xml version=\"1.0\"?"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " http://cyber.law.harvard.edu/rss/examples/rss2sample.xml "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "rss"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "rss",
+        {
+          "version": "2.0"
+        }
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "version",
+        "2.0"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n   "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "channel"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "channel",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Liftoff News"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Liftoff to Space Exploration."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "language"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "language",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "en-us"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "language"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 10 Jun 2003 04:00:00 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "lastBuildDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "lastBuildDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 10 Jun 2003 09:41:01 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "lastBuildDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "docs"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "docs",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://blogs.law.harvard.edu/tech/rss"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "docs"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "generator"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "generator",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Weblog Editor 2.0"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "generator"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "managingEditor"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "managingEditor",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "editor@example.com"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "managingEditor"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "webMaster"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "webMaster",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "webmaster@example.com"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "webMaster"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Star City"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\"&gt;Star City&lt;/a&gt;."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 03 Jun 2003 09:39:21 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "guid",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Fri, 30 May 2003 11:06:42 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "guid",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "The Engine That Does More"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 27 May 2003 08:37:32 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "guid",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n      "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Astronauts' Dirty Laundry"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "pubDate",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Tue, 20 May 2003 08:56:02 GMT"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "pubDate"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n         "
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "guid",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "guid"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n      "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n   "
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "channel"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "rss"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Stream/02-RSS.js b/tests/Stream/02-RSS.js
deleted file mode 100644
index 68f1fdb..0000000
--- a/tests/Stream/02-RSS.js
+++ /dev/null
@@ -1,1092 +0,0 @@
-exports.name = "RSS feed";
-exports.options = {};
-
-exports.file = "/Documents/RSS_Example.xml";
-exports.expected = [
-  {
-    "event": "processinginstruction",
-    "data": [
-      "?xml",
-      "?xml version=\"1.0\"?"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n"
-    ]
-  },
-  {
-    "event": "comment",
-    "data": [
-      " http://cyber.law.harvard.edu/rss/examples/rss2sample.xml "
-    ]
-  },
-  {
-    "event": "commentend",
-    "data": []
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n"
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "rss"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "rss",
-      {
-        "version": "2.0"
-      }
-    ]
-  },
-  {
-    "event": "attribute",
-    "data": [
-      "version",
-      "2.0"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n   "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "channel"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "channel",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "title",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Liftoff News"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "link"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "link",
-      {}
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "link"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://liftoff.msfc.nasa.gov/"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "description",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Liftoff to Space Exploration."
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "language"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "language",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "en-us"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "language"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "pubDate",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Tue, 10 Jun 2003 04:00:00 GMT"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "lastBuildDate"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "lastBuildDate",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Tue, 10 Jun 2003 09:41:01 GMT"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "lastBuildDate"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "docs"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "docs",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://blogs.law.harvard.edu/tech/rss"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "docs"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "generator"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "generator",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Weblog Editor 2.0"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "generator"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "managingEditor"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "managingEditor",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "editor@example.com"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "managingEditor"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "webMaster"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "webMaster",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "webmaster@example.com"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "webMaster"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "item"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "item",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "title",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Star City"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "link"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "link",
-      {}
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "link"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "description",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href=\"http://howe.iki.rssi.ru/GCTC/gctc_e.htm\"&gt;Star City&lt;/a&gt;."
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "pubDate",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Tue, 03 Jun 2003 09:39:21 GMT"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "guid"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "guid",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "guid"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n\n      "
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "item"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "item"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "item",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "description",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href=\"http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm\"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st."
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "pubDate",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Fri, 30 May 2003 11:06:42 GMT"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "guid"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "guid",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "guid"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n\n      "
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "item"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "item"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "item",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "title",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "The Engine That Does More"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "link"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "link",
-      {}
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "link"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "description",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly.  The proposed VASIMR engine would do that."
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "pubDate",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Tue, 27 May 2003 08:37:32 GMT"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "guid"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "guid",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "guid"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n\n      "
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "item"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n      "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "item"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "item",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "title",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Astronauts' Dirty Laundry"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "title"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "link"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "link",
-      {}
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "link"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "description",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them.  Instead, astronauts have other options."
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "description"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "pubDate",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "Tue, 20 May 2003 08:56:02 GMT"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "pubDate"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n         "
-    ]
-  },
-  {
-    "event": "opentagname",
-    "data": [
-      "guid"
-    ]
-  },
-  {
-    "event": "opentag",
-    "data": [
-      "guid",
-      {}
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "guid"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n\n      "
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "item"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n   "
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "channel"
-    ]
-  },
-  {
-    "event": "text",
-    "data": [
-      "\n"
-    ]
-  },
-  {
-    "event": "closetag",
-    "data": [
-      "rss"
-    ]
-  }
-];
\ No newline at end of file

From b0744be07fe1d2e0556adcf60880d2a7f0670175 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 4 Mar 2012 15:52:07 +0100
Subject: [PATCH 201/450] Don't require `fs` inside the feed tests, just
 specify a path

Can't convert them to JSON, because dates aren't parsed
---
 tests/02-feed.js       | 4 +++-
 tests/Feeds/01-rss.js  | 2 +-
 tests/Feeds/02-atom.js | 2 +-
 tests/Feeds/03-rdf.js  | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/02-feed.js b/tests/02-feed.js
index cd5dc56..308079b 100644
--- a/tests/02-feed.js
+++ b/tests/02-feed.js
@@ -2,6 +2,7 @@
 
 var helper = require("./test-helper.js"),
 	FeedHandler = require("../lib/FeedHandler.js"),
+	fs = require("fs"),
 	parserOpts = {
 		xmlMode: true
 	};
@@ -13,5 +14,6 @@ exports.test = function(test, cb){
 		if(err) cb(err, 0); //return the error
 		else cb(null, dom);
 	});
-	helper.writeToParser(handler, parserOpts, test.html);
+	var file = fs.readFileSync(__dirname + "/Documents/" + test.file).toString();
+	helper.writeToParser(handler, parserOpts, file);
 };
\ No newline at end of file
diff --git a/tests/Feeds/01-rss.js b/tests/Feeds/01-rss.js
index b50a468..a3aae47 100644
--- a/tests/Feeds/01-rss.js
+++ b/tests/Feeds/01-rss.js
@@ -1,5 +1,5 @@
 exports.name = "RSS (2.0)";
-exports.html = require("fs").readFileSync(__dirname+"/../Documents/RSS_Example.xml").toString();
+exports.file = "/RSS_Example.xml";
 exports.expected = {
 	type: "rss",
 	id: "",
diff --git a/tests/Feeds/02-atom.js b/tests/Feeds/02-atom.js
index c986d73..92b6de3 100644
--- a/tests/Feeds/02-atom.js
+++ b/tests/Feeds/02-atom.js
@@ -1,5 +1,5 @@
 exports.name = "Atom (1.0)";
-exports.html = require("fs").readFileSync(__dirname+"/../Documents/Atom_Example.xml").toString();
+exports.file = "/Atom_Example.xml";
 exports.expected = {
 	type: "atom",
 	id: "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6",
diff --git a/tests/Feeds/03-rdf.js b/tests/Feeds/03-rdf.js
index 2c7383c..94f5e67 100644
--- a/tests/Feeds/03-rdf.js
+++ b/tests/Feeds/03-rdf.js
@@ -1,5 +1,5 @@
 exports.name = "RDF test";
-exports.html = require("fs").readFileSync(__dirname+"/../Documents/RDF_Example.xml").toString();
+exports.file = "/RDF_Example.xml";
 exports.expected = {
   "type": "rdf",
   "id": "",

From 3b51bcf3249b1404749fe4ad8932840806309ea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 4 Mar 2012 15:57:46 +0100
Subject: [PATCH 202/450] Use the assert library for equality checks, + some
 minor improvements

---
 tests/00-runtests.js | 48 ++++++++++++--------------------------------
 1 file changed, 13 insertions(+), 35 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 54f7fe0..3f76d43 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -1,53 +1,38 @@
-var fs = require("fs");
+var fs = require("fs"),
+	assert = require("assert");
 
 var runCount = 0,
-	testCount = 0,
-	failCount = 0;
-
-function getSortedObject(obj){
-	if(typeof obj !== "object" || Array.isArray(obj)) return obj;
-	return Object.keys(obj).sort().reduce(function(o, name){
-		o[name] = obj[name];
-		return o;
-	}, {});
-};
+	testCount = 0;
 
 function runTests(test){
 	//read files, load them, run them
 	fs.readdirSync(__dirname + test.dir
 	).map(function(file){
 		if(file[0] === ".") return false;
+		if(file.substr(-5) === ".json") return JSON.parse(
+			fs.readFileSync(__dirname + test.dir + file)
+		);
 		return require(__dirname + test.dir + file);
 	}).forEach(function(file){
-		if(file === false) return;
-		var second = false,
-			failed = false;
+		if(!file) return;
+		var second = false;
 		
 		runCount++;
 		
 		console.log("Testing:", file.name);
 		
 		test.test(file, function(err, dom){
-			if(err) console.log("Handler error:", err);
-			
-			var expected = JSON.stringify(getSortedObject(file.expected), null, 2),
-				got = JSON.stringify(getSortedObject(dom), null, 2);
-			if(expected !== got){
-				failed = true;
-				console.log("Expected", expected, "Got", got, second);
-			}
-			
+			assert.ifError(err);
+			assert.deepEqual(file.expected, dom, "didn't get expected output");
+						
 			if(second){
 				runCount--;
 				testCount++;
-				if(failed) failCount++;
-				
-				console.log("["+file.name+"]:", failed ? "failed":"passed"); 
 			}
 			else second = true;
 		});
 	});
-	console.log("->", test.dir.slice(1, -1), "iterated");
+	console.log("->", test.dir.slice(1, -1), "started");
 };
 
 //run all tests
@@ -61,13 +46,6 @@ function runTests(test){
 
 //log the results
 (function check(){
-	if(runCount !== 0){
-		return setTimeout(check, 50);
-	}
+	if(runCount !== 0) return process.nextTick(check);
 	console.log("Total tests:", testCount);
-	console.log("Failed tests:", failCount);
-	
-	if(failCount !== 0){
-		throw Error("Encountered " + failCount + " errors!");
-	}
 })();
\ No newline at end of file

From 6a9bae809e2bf12f0d444d575e4bd8d3968f15dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 4 Mar 2012 18:37:54 +0100
Subject: [PATCH 203/450] Fixed names of stream tests

---
 tests/Stream/{01-basic.j.json => 01-basic.json} | 0
 tests/Stream/{02-RSS.j.json => 02-RSS.json}     | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/Stream/{01-basic.j.json => 01-basic.json} (100%)
 rename tests/Stream/{02-RSS.j.json => 02-RSS.json} (100%)

diff --git a/tests/Stream/01-basic.j.json b/tests/Stream/01-basic.json
similarity index 100%
rename from tests/Stream/01-basic.j.json
rename to tests/Stream/01-basic.json
diff --git a/tests/Stream/02-RSS.j.json b/tests/Stream/02-RSS.json
similarity index 100%
rename from tests/Stream/02-RSS.j.json
rename to tests/Stream/02-RSS.json

From 15622d226f854814dacd2b94a889f84b8d10b9cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 4 Mar 2012 18:38:51 +0100
Subject: [PATCH 204/450] 2.2.4

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 624215b..423996f 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.3",
+	"version": "2.2.4",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 85d22274588a85f289da8b8688e96f92399fd072 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 5 Mar 2012 17:46:37 +0100
Subject: [PATCH 205/450] Also test with node 0.7.x

---
 .travis.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 381c985..2948edb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,5 @@
 language: node_js
 node_js:
   - 0.4
-  - 0.6
\ No newline at end of file
+  - 0.6
+  - 0.7

From 8010c536bf78a3b725ec59d3c21b9563a827cd96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 25 Mar 2012 21:01:34 +0200
Subject: [PATCH 206/450] Fixed ProxyHandler, use Object.defineProperty instead
 of __define[Getter/Setter]

Forgot to prepend "on" to the events - wouldn't have worked that way
---
 lib/ProxyHandler.js | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/lib/ProxyHandler.js b/lib/ProxyHandler.js
index e67f36d..edfbbf8 100644
--- a/lib/ProxyHandler.js
+++ b/lib/ProxyHandler.js
@@ -5,12 +5,14 @@ var ProxyHandler = function(cbs){
 ProxyHandler.prototype._cbs = {};
 
 Object.keys(require("./").EVENTS).forEach(function(name){
-	ProxyHandler.prototype.__defineGetter__(name, function(){
-		return this._cbs[name];
-	});
-	ProxyHandler.prototype.__defineSetter__(name, function(value){
-		//allow functions to be overwritten
-		Object.defineProperty(this, name, {value: value});
+	name = "on" + name;
+	Object.defineProperty(ProxyHandler.prototype, name, {
+		enumerable:true, configurable:true,
+		get: function(){ return this._cbs[name]; },
+		set: function(value){
+			//allow functions to be overwritten
+			Object.defineProperty(this, name, {value: value});
+		}
 	});
 });
 

From d3abd59aa104fcdba8051f9794c8ab6b3009d5fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 18 Apr 2012 20:44:17 +0200
Subject: [PATCH 207/450] Use bitwise operators for flags, fixed a bug

The bug: When a closing script tag appeared inside a style tag, the
flag became negative. Using the bitwise AND, only the specific bit gets
checked.

This change also comes with a (slight) performance benefit:
http://jsperf.com/or-equals-vs-plus-equals
---
 lib/Parser.js | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 27a8a95..25bec33 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -106,11 +106,11 @@ Parser.prototype._parseTagName = function(data){
 
 //Special tags that are treated differently
 var SpecialTags = {};
-//SpecialTags[ElementType.Tag] = 0;
-SpecialTags[ElementType.Style] = 1; //2^0
-SpecialTags[ElementType.Script] = 2; //2^1
-SpecialTags[ElementType.Comment] = 4; //2^2
-SpecialTags[ElementType.CDATA] = 8; //2^3
+//SpecialTags[ElementType.Tag]   = 0x0;
+SpecialTags[ElementType.Style]   = 0x1; //2^0
+SpecialTags[ElementType.Script]  = 0x2; //2^1
+SpecialTags[ElementType.Comment] = 0x4; //2^2
+SpecialTags[ElementType.CDATA]   = 0x8; //2^3
 
 var TagValues = {
 	style: 1,
@@ -163,9 +163,9 @@ Parser.prototype._parseTags = function(force){
 				elementData = this._parseTagName(elementData.substr(1));
 				if(this._contentFlags !== 0){
 					//if it's a closing tag, remove the flag
-					if(this._contentFlags >= TagValues[elementData]){
+					if(this._contentFlags & TagValues[elementData]){
 						//remove the flag
-						this._contentFlags -= TagValues[elementData];
+						this._contentFlags ^= TagValues[elementData];
 					}
 					else {
 						this._writeSpecial(rawData, lastTagSep);
@@ -177,11 +177,11 @@ Parser.prototype._parseTags = function(force){
 			else if(elementData.charAt(0) === "!"){
 				if(elementData.substr(1, 2) === "--"){
 					//This tag is a comment
-					this._contentFlags += SpecialTags[ElementType.Comment];
+					this._contentFlags |= SpecialTags[ElementType.Comment];
 					this._writeComment(rawData.substr(3));
 				}
 				else if(elementData.substr(1, 7) === "[CDATA["){
-					this._contentFlags += SpecialTags[ElementType.CDATA];
+					this._contentFlags |= SpecialTags[ElementType.CDATA];
 					if(this._cbs.oncdatastart) this._cbs.oncdatastart();
 					this._writeCDATA(elementData.substr(8));
 				}
@@ -225,7 +225,7 @@ Parser.prototype._writeCDATA = function(data){
     	if(data.length !== 2 && this._cbs.ontext){
     		this._cbs.ontext(data.slice(0,-2));
     	}
-    	this._contentFlags -= SpecialTags[ElementType.CDATA];
+    	this._contentFlags ^= SpecialTags[ElementType.CDATA];
     	if(this._cbs.oncdataend) this._cbs.oncdataend();
     }
     else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
@@ -234,7 +234,7 @@ Parser.prototype._writeCDATA = function(data){
 Parser.prototype._writeComment = function(rawData){
 	if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
 		//remove the written flag (also removes the comment flag)
-		this._contentFlags -= SpecialTags[ElementType.Comment];
+		this._contentFlags ^= SpecialTags[ElementType.Comment];
 		this._wroteSpecial = false;
 		if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
 		if(this._cbs.oncommentend) this._cbs.oncommentend();
@@ -318,7 +318,7 @@ Parser.prototype._processOpenTag = function(data){
 		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
 	} else {
 		if(type !== ElementType.Tag){
-			this._contentFlags += SpecialTags[type];
+			this._contentFlags |= SpecialTags[type];
 			this._wroteSpecial = false;	
 		}
 		this._stack.push(name);

From 27739906a4a186a21377879bc6e797812780c564 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 18 Apr 2012 21:31:17 +0200
Subject: [PATCH 208/450] Removed Object.keys calls and the switch statement,
 fixed a bug in DomUtils
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even though for…in isn't fast, it's much faster than Object.keys. And a
switch statement breaking after each check is stupid (and slow).
---
 lib/DomUtils.js | 230 ++++++++++++++++++++++--------------------------
 1 file changed, 103 insertions(+), 127 deletions(-)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 02e2de5..64e4791 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -3,161 +3,137 @@ var ElementType = require("./ElementType.js"),
     DomUtils = module.exports;
 
 function filterArray(test, arr, recurse, limit){
-    var result = [], childs;
-
-    for(var i = 0, j = arr.length; i < j; i++){
-        if(test(arr[i])){
-            result.push(arr[i]);
-            if(--limit <= 0) break;
-        }
-
-        childs = arr[i].children;
-        if(recurse && childs){
-            childs = filterArray(test, childs, recurse, limit);
-            arrayPush.apply(result, childs);
-            limit -= childs.length;
-            if(limit <= 0) break;
-        }
-    }
-
-    return result;
+	var result = [], childs;
+
+	for(var i = 0, j = arr.length; i < j; i++){
+		if(test(arr[i])){
+			result.push(arr[i]);
+			if(--limit <= 0) break;
+		}
+
+		childs = arr[i].children;
+		if(recurse && childs){
+			childs = filterArray(test, childs, recurse, limit);
+			arrayPush.apply(result, childs);
+			limit -= childs.length;
+			if(limit <= 0) break;
+		}
+	}
+
+	return result;
 }
 
 function filter(test, element, recurse, limit){
-    if(recurse !== false) recurse = true;
-    if(isNaN(limit)) limit = Infinity;
-    if(!Array.isArray(element)) element = [element];
+	if(recurse !== false) recurse = true;
+	if(isNaN(limit)) limit = Infinity;
+	if(!Array.isArray(element)) element = [element];
 
-    return filterArray(test, element, recurse, limit);
+	return filterArray(test, element, recurse, limit);
 }
 
 DomUtils.testElement = function(options, element){
-    var type = element.type,
-        keys = Object.keys(options),
-        len = keys.length;
-
-    for(var i = 0; i < len; i++){
-        var key = keys[i];
-
-        switch(key){
-            case "tag_name":
-                if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
-                if(!options.tag_name(element.name)) return false;
-                break;
-            case "tag_type":
-                if(!options.tag_type(type)) return false;
-                break;
-            case "tag_contains":
-                if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
-                if(!options.tag_contains(element.data)) return false;
-                break;
-            default:
-                if(!element.attribs || !options[key](element.attribs[key])) return false;
-                break;
-        }
-    }
-
-    return true;
+	var type = element.type;
+	for(var key in options){
+		if(!options.hasOwnProperty(key));
+		else if(key === "tag_name"){
+		    if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
+		    if(!options.tag_name(element.name)) return false;
+		} else if(key === "tag_type"){
+		    if(!options.tag_type(type)) return false;
+		} else if(key === "tag_contains"){
+		    if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
+		    if(!options.tag_contains(element.data)) return false;
+		} else if(!element.attribs || !options[key](element.attribs[key])) return false;
+	}
+
+	return true;
+};
+
+function getEqualityFunc(check){
+	return (function(val){ return val === check; });
 }
 
 DomUtils.getElements = function(options, element, recurse, limit){
-    var keys = Object.keys(options),
-        len = keys.length;
+	for(var key in options){
+		if(options.hasOwnProperty(key) && typeof options[key] !== "function"){
+			options[key] = getEqualityFunc(options[key]);
+		}
+	}
 
-    for(var i = 0; i < len; i++){
-        var key = keys[i];
-
-        if(typeof options[key] !== "function"){
-            var checker = options[key];
-            options[key] = function(val){ return val === checker };
-        }
-    }
-
-    return filter(this.testElement.bind(null, options), element, recurse, limit);
-}
+    return filter(function(elem){ return DomUtils.testElement(options, elem); }, element, recurse, limit);
+};
 
 DomUtils.getElementById = function(id, element, recurse){
-    var result;
+	var result;
 
-    if(typeof id === "function"){
-        result = filter(function(elem){ return elem.attribs && id(elem.attribs) }, element, recurse, 1);
-    }else{
-        result = filter(function(elem){ return elem.attribs && elem.attribs.id === id }, element, recurse, 1);
-    }
+	if(typeof id === "function"){
+		result = filter(function(elem){ return elem.attribs && id(elem.attribs); }, element, recurse, 1);
+	} else {
+		result = filter(function(elem){ return elem.attribs && elem.attribs.id === id; }, element, recurse, 1);
+	}
 
-    return result.length ? result[0] : null;
-}
+	return result.length ? result[0] : null;
+};
 
 DomUtils.getElementsByTagName = function(name, element, recurse, limit){
-    if(typeof name === "function") return filter(function(elem){
-        var type = elem.type;
-        if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)
-            return false;
-        return name(elem.name);
-    }, element, recurse, limit);
-
-    return filter(function(elem){
-        var type = elem.type;
-        if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)
-            return false;
-        return elem.name === name;
-    }, element, recurse, limit);
-}
+	if(typeof name === "function"){ 
+		return filter(function(elem){
+			var type = elem.type;
+			if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)
+				return false;
+			return name(elem.name);
+		}, element, recurse, limit);
+	}
+
+	return filter(function(elem){
+		var type = elem.type;
+		if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)
+			return false;
+		return elem.name === name;
+	}, element, recurse, limit);
+};
 
 DomUtils.getElementsByTagType = function(type, element, recurse, limit){
-    if(typeof type === "function")
-        return filter(function(elem){ return type(elem.type) }, element, recurse, limit);
-    else
-        return filter(function(elem){ return elem.type === type }, element, recurse, limit);
-}
+	if(typeof type === "function"){
+		return filter(function(elem){ return type(elem.type); }, element, recurse, limit);
+	} else {
+		return filter(function(elem){ return elem.type === type; }, element, recurse, limit);
+	}
+};
 
 DomUtils.getInnerHTML = function(elem){
-    if(!elem.children) return "";
+	if(!elem.children) return "";
 
-    var childs = elem.children,
-        childNum = childs.length,
-        ret = "";
+	var childs = elem.children,
+		childNum = childs.length,
+		ret = "";
 
-    for(var i = 0; i < childNum; i++){
-        ret += this.getOuterHTML(childs[i]);
-    }
+	for(var i = 0; i < childNum; i++){
+		ret += this.getOuterHTML(childs[i]);
+	}
 
-    return ret;
-}
+	return ret;
+};
 
 DomUtils.getOuterHTML = function(elem){
-    var type = elem.type,
-        name = elem.name;
-
-    if(type === ElementType.Text) return elem.data;
-    if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
-
-    var attrStr = "";
-    if(elem.attribs){
-        var attrs = Object.keys(elem.attribs),
-            len = attrs.length;
+	var type = elem.type,
+		name = elem.name;
 
-        for(var i = 0; i < len; i++){
-            var attr = attrs[i],
-                val = elem.attribs[attr];
+	if(type === ElementType.Text) return elem.data;
+	if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
 
-            attrStr += " " + attr + "=\"" + val + "\"";
+	var ret = "<" + name;
+	if(elem.hasOwnProperty("attribs")){
+		for(var attr in elem.attribs){
+			if(elem.attribs.hasOwnProperty(attr)){
+				ret += " " + attr + "=\"" + elem.attribs[attr] + "\"";
+			}
+		}
+	}
 
-            /* Is this required? Method forgets quotes
-            if(/^[^\s"\'\`\=\<\>]+$/.test(val))
-                attrStr += val;
-            else if(val.indeOf("\"") !== -1)
-                attrStr += "'" + val + "'";
-            else
-                attrStr += "\"" + val + "\"";
-            */
-        }
-    }
+	ret += ">";
 
-    var ret = "<" + name + attrStr + ">";
+	if(type === ElementType.Directive) return ret;
 
-    if(type === ElementType.Directive) return ret;
-
-    ret += this.getInnerHTML(elem) + "</" + name + ">"
-    return ret;
-}
+	return ret + this.getInnerHTML(elem) + "</" + name + ">";
+};
\ No newline at end of file

From 080d6626f0d3658bf5ece58288630ab1c9d48e1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 18 Apr 2012 21:34:09 +0200
Subject: [PATCH 209/450] 2.2.5

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 423996f..27de655 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.4",
+	"version": "2.2.5",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 972a751388aca4a65d39f2a9d09041202d1c14b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 19 Apr 2012 16:47:00 +0200
Subject: [PATCH 210/450] Fixed some errors in readme

---
 README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index cb32994..ae1bf44 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@ That's it?!
 Read more about the parser in the [wiki](https://github.com/FB55/node-htmlparser/wiki/Parser-options).
 
 ##Get a DOM
-The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that may be manipulated using the `DomUtils` helper.
+The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the `DomUtils` helper.
 
 Read more about the DomHandler in the [wiki](https://github.com/FB55/node-htmlparser/wiki/DomHandler).
 
@@ -53,7 +53,7 @@ new htmlparser.FeedHandler(function(<error> error, <object> feed){
 ```
 
 ##Performance
-Using a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010):
+Using a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010)):
 
 * [htmlparser](https://github.com/tautologistics/node-htmlparser): 51779 el/s
 * [sax.js](https://github.com/isaacs/sax-js): 53169 el/s
@@ -63,9 +63,9 @@ Using a slightly modified version of [node-expat](https://github.com/astro/node-
 The test may be found in `tests/bench.js`.
 
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
-This is a fork of the project above. The main difference is that this is just intended to be used with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
+This is a fork of the project above. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
 
-The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally intended for [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
+The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
 
 The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose). 
 

From 6df23d1a7a32290abfe23c192027d62f615f530d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 26 Apr 2012 10:35:16 +0200
Subject: [PATCH 211/450] Removed ifs from constructors (no more polymorphic
 objects)

Property lookups should now be much faster (resulting in faster parsing)

Also restructured vars in Parser#_parseTags (use this._buffer instead
of cached var)
---
 lib/DomHandler.js | 12 +++++-------
 lib/Parser.js     | 32 +++++++++++++++-----------------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/lib/DomHandler.js b/lib/DomHandler.js
index 1f6eea3..3eac950 100644
--- a/lib/DomHandler.js
+++ b/lib/DomHandler.js
@@ -1,18 +1,16 @@
 var ElementType = require("./ElementType.js");
 
 function DomHandler(callback, options){
+	var optionsObjectFirst = typeof callback === "object";
+	this._options = optionsObjectFirst ? callback : options || defaultOpts;
+	this._callback = optionsObjectFirst ? null : callback;
 	this.dom = [];
 	this._done = false;
 	this._tagStack = [];
-	if(typeof callback === "object") this._options = callback;
-	else {
-		if(options) this._options = options; //otherwise, the prototype is used
-		if(callback) this._callback = callback;	
-	}
 }
 
 //default options
-DomHandler.prototype._options = {
+var defaultOpts = {
 	ignoreWhitespace: false //Keep whitespace-only text nodes
 };
 
@@ -36,7 +34,7 @@ DomHandler.prototype.onerror = function(error){
 };
 
 DomHandler.prototype.onclosetag = function(name){
-	if(this._tagStack.pop().name !== name) this._handleCallback(Error("tagname didn't match!"));
+	if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
 };
 
 DomHandler.prototype._addDomElement = function(element){
diff --git a/lib/Parser.js b/lib/Parser.js
index 25bec33..4e91c22 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,9 +1,8 @@
 var ElementType = require("./ElementType.js");
 
 function Parser(cbs, options){
-	if(options) this._options = options;
-	if(cbs) this._cbs = cbs;
-
+	this._options = options || defaultOpts;
+	this._cbs = cbs || defaultCbs;
 	this._buffer = "";
 	this._tagSep = "";
 	this._stack = [];
@@ -17,12 +16,12 @@ function Parser(cbs, options){
 var _reAttrib = /\s([^\s\/]+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
     _reTail = /\s|\/|$/;
 
-Parser.prototype._options = {
+var defaultOpts = {
 	xmlMode: false, //Special behavior for script/style tags by default
 	lowerCaseTags: false //call .toLowerCase for each tag name
 };
 
-Parser.prototype._cbs = {
+var defaultCbs = {
 	/*
 		This is just a plain object
 		so that the parser doesn't
@@ -53,7 +52,7 @@ Parser.prototype.parseComplete = function(data){
 //Parses a piece of an HTML document
 Parser.prototype.parseChunk =
 Parser.prototype.write = function(data){
-	if(this._done) this._handleError(Error("Attempted to parse chunk after parsing already done"));
+	if(this._done) this._handleError("Attempted to parse chunk after parsing already done");
 	this._buffer += data; //FIXME: this can be a bottleneck
 	if(this._running) this._parseTags();
 };
@@ -119,11 +118,10 @@ var TagValues = {
 
 //Parses through HTML text and returns an array of found elements
 Parser.prototype._parseTags = function(force){
-	var buffer = this._buffer, current = 0;
-
-	var next, rawData, elementData, lastTagSep;
-	
-	var opening = buffer.indexOf("<"), closing = buffer.indexOf(">");
+	var current = 0,
+	    opening = this._buffer.indexOf("<"),
+	    closing = this._buffer.indexOf(">"),
+	    next, rawData, elementData, lastTagSep;
 
 	//if force is true, parse everything
 	if(force) opening = Infinity;
@@ -135,14 +133,14 @@ Parser.prototype._parseTags = function(force){
 		if((opening !== -1 && opening < closing) || closing === -1){
 			next = opening;
 			this._tagSep = "<";
-			opening = buffer.indexOf("<", next + 1);
+			opening = this._buffer.indexOf("<", next + 1);
 		}
 		else{
 			next = closing;
 			this._tagSep = ">";
-			closing = buffer.indexOf(">", next + 1);
+			closing = this._buffer.indexOf(">", next + 1);
 		}
-		rawData = buffer.substring(current, next); //The next chunk of data to parse
+		rawData = this._buffer.substring(current, next); //The next chunk of data to parse
 		
 		//set elements for next run
 		current = next + 1;
@@ -216,7 +214,7 @@ Parser.prototype._parseTags = function(force){
 		}
 	}
 
-	this._buffer = buffer.substring(current);
+	this._buffer = this._buffer.substr(current);
 };
 
 Parser.prototype._writeCDATA = function(data){
@@ -326,8 +324,8 @@ Parser.prototype._processOpenTag = function(data){
 };
 
 Parser.prototype._handleError = function(error){
-	if(this._cbs.onerror)
-		this._cbs.onerror(error);
+	error = new Error(error);
+	if(this._cbs.onerror) this._cbs.onerror(error);
 	else throw error;
 };
 

From 6fcf53e0e83708c8b0c95de72adb427c0a49d629 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 26 Apr 2012 10:44:26 +0200
Subject: [PATCH 212/450] 2.5.6

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 27de655..d5d525b 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.5",
+	"version": "2.2.6",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 2a7536f8403ab056dd977129ecf8411d81ffb3a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 26 Apr 2012 11:24:34 +0200
Subject: [PATCH 213/450] Fixed reset functions

---
 lib/DomHandler.js | 4 +++-
 lib/Parser.js     | 5 ++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/DomHandler.js b/lib/DomHandler.js
index 3eac950..c74e6dc 100644
--- a/lib/DomHandler.js
+++ b/lib/DomHandler.js
@@ -15,7 +15,9 @@ var defaultOpts = {
 };
 
 //Resets the handler back to starting state
-DomHandler.prototype.onreset = DomHandler;
+DomHandler.prototype.onreset = function(){
+	DomHandler.call(this, this._callback, this._options);
+};
 
 //Signals the handler that parsing is done
 DomHandler.prototype.onend = function(){
diff --git a/lib/Parser.js b/lib/Parser.js
index 4e91c22..35a477b 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -45,8 +45,7 @@ var defaultCbs = {
 //Parses a complete HTML and pushes it to the handler
 Parser.prototype.parseComplete = function(data){
 	this.reset();
-	this.write(data);
-	this.end();
+	this.end(data);
 };
 
 //Parses a piece of an HTML document
@@ -92,7 +91,7 @@ Parser.prototype.resume = function(){
 
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
-	Parser.call(this);
+	Parser.call(this, this._cbs, this._options);
 	if(this._cbs.onreset) this._cbs.onreset();
 };
 

From af565c47ac6f8a6bd530248e52122aafda0a223e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 26 Apr 2012 11:27:09 +0200
Subject: [PATCH 214/450] 2.2.7

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index d5d525b..e9b242f 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.6",
+	"version": "2.2.7",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From d7b29aa23f91e26d1bcb6cd7e9a2b43821a7dee8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 1 May 2012 16:53:59 +0200
Subject: [PATCH 215/450] Breaking change: XML-comments inside special tags
 don't get special treatment.

The original htmlparser module made this change, and it aligns with
browsers (at least Chrome and Safari). CDATA inside such tags still
gets parsed.
---
 lib/Parser.js                               | 12 ++++++------
 tests/DOM/06-comment_in_script.json         |  4 ++--
 tests/DOM/14-comment_in_text_in_script.json | 10 +---------
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 35a477b..7407319 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -172,17 +172,17 @@ Parser.prototype._parseTags = function(force){
 				this._processCloseTag(elementData);
 			}
 			else if(elementData.charAt(0) === "!"){
-				if(elementData.substr(1, 2) === "--"){
-					//This tag is a comment
-					this._contentFlags |= SpecialTags[ElementType.Comment];
-					this._writeComment(rawData.substr(3));
-				}
-				else if(elementData.substr(1, 7) === "[CDATA["){
+				if(elementData.substr(1, 7) === "[CDATA["){
 					this._contentFlags |= SpecialTags[ElementType.CDATA];
 					if(this._cbs.oncdatastart) this._cbs.oncdatastart();
 					this._writeCDATA(elementData.substr(8));
 				}
 				else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
+				else if(elementData.substr(1, 2) === "--"){
+					//This tag is a comment
+					this._contentFlags |= SpecialTags[ElementType.Comment];
+					this._writeComment(rawData.substr(3));
+				}
 				//TODO: This isn't a processing instruction, needs a new name
 				else if(this._cbs.onprocessinginstruction){
 					this._cbs.onprocessinginstruction(
diff --git a/tests/DOM/06-comment_in_script.json b/tests/DOM/06-comment_in_script.json
index a4246f4..8ec2c51 100644
--- a/tests/DOM/06-comment_in_script.json
+++ b/tests/DOM/06-comment_in_script.json
@@ -11,8 +11,8 @@
       "name": "script",
       "children": [
         {
-          "data": "var foo = 1;",
-          "type": "comment"
+          "data": "<!--var foo = 1;-->",
+          "type": "text"
         }
       ]
     }
diff --git a/tests/DOM/14-comment_in_text_in_script.json b/tests/DOM/14-comment_in_text_in_script.json
index e573881..2f07bc4 100644
--- a/tests/DOM/14-comment_in_text_in_script.json
+++ b/tests/DOM/14-comment_in_text_in_script.json
@@ -11,15 +11,7 @@
       "name": "script",
       "children": [
         {
-          "data": "this is ",
-          "type": "text"
-        },
-        {
-          "data": " the comment ",
-          "type": "comment"
-        },
-        {
-          "data": " the text",
+          "data": "this is <!-- the comment --> the text",
           "type": "text"
         }
       ]

From 19cfb315a5a315f4ea9f376773e9e1d1fd37b477 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 6 May 2012 17:47:46 +0200
Subject: [PATCH 216/450] DomHandler: Added an option to add a reference to
 each elements parent

---
 lib/DomHandler.js | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/DomHandler.js b/lib/DomHandler.js
index c74e6dc..2eba390 100644
--- a/lib/DomHandler.js
+++ b/lib/DomHandler.js
@@ -1,9 +1,8 @@
 var ElementType = require("./ElementType.js");
 
 function DomHandler(callback, options){
-	var optionsObjectFirst = typeof callback === "object";
-	this._options = optionsObjectFirst ? callback : options || defaultOpts;
-	this._callback = optionsObjectFirst ? null : callback;
+	this._options = typeof callback === "object" ? callback : options || defaultOpts;
+	this._callback = typeof callback === "object" ? null : callback;
 	this.dom = [];
 	this._done = false;
 	this._tagStack = [];
@@ -11,7 +10,8 @@ function DomHandler(callback, options){
 
 //default options
 var defaultOpts = {
-	ignoreWhitespace: false //Keep whitespace-only text nodes
+	ignoreWhitespace: false, //Keep whitespace-only text nodes
+	refParent: false //add a reference to the elements parent node
 };
 
 //Resets the handler back to starting state
@@ -44,6 +44,9 @@ DomHandler.prototype._addDomElement = function(element){
 		lastTag = this._tagStack[this._tagStack.length - 1];
 	
 	if(lastTag){ //There are parent elements
+		if(this._options.refParent){
+			element.parent = lastTag;
+		}
 		if(!lastTag.children){
 			lastTag.children = [element];
 			return;

From bd5a70e5d5337f65a2b0293d6d287071bbde5ba2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 6 May 2012 18:17:12 +0200
Subject: [PATCH 217/450] 2.2.8

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index e9b242f..4d6f57c 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.7",
+	"version": "2.2.8",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 2689073f9db67fe0bce24c6c63989778ee933a31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 2 Jun 2012 20:41:13 +0200
Subject: [PATCH 218/450] added support for the universal selector ("*")

---
 lib/DomUtils.js | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 64e4791..00fcf38 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -85,6 +85,11 @@ DomUtils.getElementsByTagName = function(name, element, recurse, limit){
 		}, element, recurse, limit);
 	}
 
+	if(name === "*") return filter(function(elem){
+		var type = elem.type;
+		return type === ElementType.Tag || type === ElementType.Script || type === ElementType.Style;
+	});
+
 	return filter(function(elem){
 		var type = elem.type;
 		if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)

From e2ca556a5dd54e7ca9a38f8c41e6e065594aad20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 2 Jun 2012 20:41:31 +0200
Subject: [PATCH 219/450] 2.2.0

---
 lib/Parser.js | 3 ++-
 package.json  | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 7407319..08cc5e6 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -279,8 +279,9 @@ Parser.prototype._processCloseTag = function(name){
 			else this._stack.splice(pos);
 	}
 	//many browsers (eg. Safari, Chrome) convert </br> to <br>
-	else if(name === "br" && !this._options.xmlMode)
+	else if(name === "br" && !this._options.xmlMode){
 		this._processOpenTag(name + "/");
+	}
 };
 
 Parser.prototype._parseAttributes = function(data){
diff --git a/package.json b/package.json
index 4d6f57c..51ebe72 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.8",
+	"version": "2.2.9",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 7750ec175c387812cc060144fca9489947cf8aec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 2 Jun 2012 20:45:10 +0200
Subject: [PATCH 220/450] removed switch in Stream.js

---
 lib/Stream.js | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/lib/Stream.js b/lib/Stream.js
index fd3abcc..ee0fdba 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -15,26 +15,20 @@ var cbs = function(scope){
 var EVENTS = require("../").EVENTS;
 
 Object.keys(EVENTS).forEach(function(name){
-	switch(EVENTS[name]){
-		case 0:{
-			cbs.prototype["on" + name] = function(){
-				this.scope.emit(name);
-			};
-			break;
-		}
-		case 1:{
-			cbs.prototype["on" + name] = function(a){
-				this.scope.emit(name, a);
-			};
-			break;
-		}
-		case 2:{
-			cbs.prototype["on" + name] = function(a, b){
-				this.scope.emit(name, a, b);
-			};
-			break;
-		}
-		default: throw Error("wrong number of arguments!");
+	if(EVENTS[name] === 0){
+		cbs.prototype["on" + name] = function(){
+			this.scope.emit(name);
+		};
+	} else if(EVENTS[name] === 1){
+		cbs.prototype["on" + name] = function(a){
+			this.scope.emit(name, a);
+		};
+	} else if(EVENTS[name] === 2){
+		cbs.prototype["on" + name] = function(a, b){
+			this.scope.emit(name, a, b);
+		};
+	} else {
+		throw Error("wrong number of arguments!");
 	}
 });
 

From 04476a013eee2303df8a8bfdca18ece79ad9703c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 21 Jun 2012 16:08:28 +0200
Subject: [PATCH 221/450] fixed whitespace

---
 lib/Parser.js | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 08cc5e6..c087569 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -163,8 +163,7 @@ Parser.prototype._parseTags = function(force){
 					if(this._contentFlags & TagValues[elementData]){
 						//remove the flag
 						this._contentFlags ^= TagValues[elementData];
-					}
-					else {
+					} else {
 						this._writeSpecial(rawData, lastTagSep);
 						continue;
 					}
@@ -186,7 +185,7 @@ Parser.prototype._parseTags = function(force){
 				//TODO: This isn't a processing instruction, needs a new name
 				else if(this._cbs.onprocessinginstruction){
 					this._cbs.onprocessinginstruction(
-						"!" + this._parseTagName(elementData.substr(1)), 
+						"!" + this._parseTagName(elementData.substr(1)),
 						elementData
 					);
 				}
@@ -195,7 +194,7 @@ Parser.prototype._parseTags = function(force){
 			else if(elementData.charAt(0) === "?"){
 				if(this._cbs.onprocessinginstruction){
 					this._cbs.onprocessinginstruction(
-						"?" + this._parseTagName(elementData.substr(1)), 
+						"?" + this._parseTagName(elementData.substr(1)),
 						elementData
 					);
 				}
@@ -219,11 +218,11 @@ Parser.prototype._parseTags = function(force){
 Parser.prototype._writeCDATA = function(data){
 	if(this._tagSep === ">" && data.substr(-2) === "]]"){
 		// CDATA ends
-    	if(data.length !== 2 && this._cbs.ontext){
-    		this._cbs.ontext(data.slice(0,-2));
-    	}
-    	this._contentFlags ^= SpecialTags[ElementType.CDATA];
-    	if(this._cbs.oncdataend) this._cbs.oncdataend();
+		if(data.length !== 2 && this._cbs.ontext){
+			this._cbs.ontext(data.slice(0,-2));
+		}
+		this._contentFlags ^= SpecialTags[ElementType.CDATA];
+		if(this._cbs.oncdataend) this._cbs.oncdataend();
     }
     else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
 };
@@ -242,11 +241,11 @@ Parser.prototype._writeComment = function(rawData){
 Parser.prototype._writeSpecial = function(rawData, lastTagSep){
 	//if the previous element is text, append the last tag sep to element
 	if(this._wroteSpecial){
-	    if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
+		if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
 	}
 	else{ //The previous element was not text
-	    this._wroteSpecial = true;
-	    if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
+		this._wroteSpecial = true;
+		if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
 	}
 };
 
@@ -317,7 +316,7 @@ Parser.prototype._processOpenTag = function(data){
 	} else {
 		if(type !== ElementType.Tag){
 			this._contentFlags |= SpecialTags[type];
-			this._wroteSpecial = false;	
+			this._wroteSpecial = false;
 		}
 		this._stack.push(name);
 	}

From 18d3f379c944b31aeff6cf70149eda6adc6cd240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 21 Jun 2012 16:10:33 +0200
Subject: [PATCH 222/450] quick fix for #19

---
 lib/DomHandler.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/DomHandler.js b/lib/DomHandler.js
index 2eba390..d131d92 100644
--- a/lib/DomHandler.js
+++ b/lib/DomHandler.js
@@ -36,7 +36,8 @@ DomHandler.prototype.onerror = function(error){
 };
 
 DomHandler.prototype.onclosetag = function(name){
-	if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
+	//if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
+	this._tagStack.pop();
 };
 
 DomHandler.prototype._addDomElement = function(element){

From 69c9f0f5255565f37ab0c39393f7b67bf77a24a0 Mon Sep 17 00:00:00 2001
From: lahmatiy <rdvornov@gmail.com>
Date: Wed, 4 Jul 2012 13:39:49 +0600
Subject: [PATCH 223/450] Fix getOuterHTML for directives

---
 lib/DomUtils.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/DomUtils.js b/lib/DomUtils.js
index 00fcf38..6bf3068 100644
--- a/lib/DomUtils.js
+++ b/lib/DomUtils.js
@@ -126,6 +126,7 @@ DomUtils.getOuterHTML = function(elem){
 
 	if(type === ElementType.Text) return elem.data;
 	if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
+	if(type === ElementType.Directive) return "<" + elem.data + ">";
 
 	var ret = "<" + name;
 	if(elem.hasOwnProperty("attribs")){

From 82455a9557d6c7b3daccbb6da08a2bbd4ff13916 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 11 Aug 2012 18:36:22 +0200
Subject: [PATCH 224/450] added lowerCaseAttributeNames option

yep, it's insanely short
---
 lib/Parser.js | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index c087569..acf2b66 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -18,6 +18,7 @@ var _reAttrib = /\s([^\s\/]+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|
 
 var defaultOpts = {
 	xmlMode: false, //Special behavior for script/style tags by default
+	lowerCaseAttributeNames: false, //call .toLowerCase for each attribute name
 	lowerCaseTags: false //call .toLowerCase for each tag name
 };
 
@@ -283,17 +284,17 @@ Parser.prototype._processCloseTag = function(name){
 	}
 };
 
-Parser.prototype._parseAttributes = function(data){
+Parser.prototype._parseAttributes = function(data, lcNames){
 	for(var match; match = _reAttrib.exec(data);){
-		this._cbs.onattribute(match[1], match[2] || match[3] || match[4] || "");
+		this._cbs.onattribute(lcNames ? match[1].toLowerCase() : match[1], match[2] || match[3] || match[4] || "");
 	}
 };
 
 //parses the attribute string
-var parseAttributes = function(data){
+var parseAttributes = function(data, lcNames){
 	var attrs = {};
 	for(var match; match = _reAttrib.exec(data);){
-		attrs[match[1]] = match[2] || match[3] || match[4] || "";
+		attrs[lcNames ? match[1].toLowerCase() : match[1]] = match[2] || match[3] || match[4] || "";
 	}
 	return attrs;
 };
@@ -307,8 +308,14 @@ Parser.prototype._processOpenTag = function(data){
 	else if(name === "style")  type = ElementType.Style;
 
 	if(this._cbs.onopentagname)	this._cbs.onopentagname(name);
-	if(this._cbs.onopentag)		this._cbs.onopentag(name, parseAttributes(data));
-	if(this._cbs.onattribute)	this._parseAttributes(data);
+	if(this._cbs.onopentag){
+		this._cbs.onopentag(name, parseAttributes(
+			data, this._options.lowerCaseAttributeNames
+		));
+	}
+	if(this._cbs.onattribute){
+		this._parseAttributes(data, this._options.lowerCaseAttributeNames);
+	}
 	
 	//If tag self-terminates, add an explicit, separate closing tag
 	if(data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){

From e0d359e4b4ec8d499250b730766d613cce993a59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 14 Aug 2012 14:35:46 +0200
Subject: [PATCH 225/450] 2.3.0

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 51ebe72..11d7e8f 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.2.9",
+	"version": "2.3.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From a8c13c8d35b243ebd5ba8fe0cec344c656faaf85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 14 Aug 2012 15:36:50 +0200
Subject: [PATCH 226/450] Added a `onopentagend` event

to get a signal when there won't be any more attributes coming
---
 lib/Parser.js | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/Parser.js b/lib/Parser.js
index acf2b66..201cb8a 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -37,6 +37,7 @@ var defaultCbs = {
 		oncommentend,
 		onerror,
 		onopentag,
+		onopentagend,
 		onprocessinginstruction,
 		onreset,
 		ontext
@@ -316,6 +317,7 @@ Parser.prototype._processOpenTag = function(data){
 	if(this._cbs.onattribute){
 		this._parseAttributes(data, this._options.lowerCaseAttributeNames);
 	}
+	if(this._cbs.onopentagend) this._cbs.onopentagend();
 	
 	//If tag self-terminates, add an explicit, separate closing tag
 	if(data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){

From c1dfddac9aee07e216804aeda9bf899d94b276dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 14 Aug 2012 16:48:08 +0200
Subject: [PATCH 227/450] moved DomHandler & DomUtils to their own module

they are now available as `domhandler`
---
 lib/DomHandler.js                           | 122 ----------------
 lib/DomUtils.js                             | 145 --------------------
 lib/FeedHandler.js                          |   5 +-
 lib/index.js                                |   4 +-
 package.json                                |   6 +-
 tests/00-runtests.js                        |   6 +-
 tests/01-dom.js                             |  18 ---
 tests/04-dom_utils.js                       |  15 --
 tests/DOM/01-basic.json                     |  41 ------
 tests/DOM/02-single_tag_1.json              |  22 ---
 tests/DOM/03-single_tag_2.json              |  22 ---
 tests/DOM/04-unescaped_in_script.json       |  29 ----
 tests/DOM/05-tags_in_comment.json           |  20 ---
 tests/DOM/06-comment_in_script.json         |  20 ---
 tests/DOM/07-unescaped_in_style.json        |  23 ----
 tests/DOM/08-extra_spaces_in_tag.json       |  23 ----
 tests/DOM/09-unquoted_attrib.json           |  23 ----
 tests/DOM/10-singular_attribute.json        |  18 ---
 tests/DOM/11-text_outside_tags.json         |  22 ---
 tests/DOM/12-text_only.json                 |  14 --
 tests/DOM/13-comment_in_text.json           |  22 ---
 tests/DOM/14-comment_in_text_in_script.json |  20 ---
 tests/DOM/15-non-verbose.json               |  25 ----
 tests/DOM/16-ignore_whitespace.json         |  42 ------
 tests/DOM/17-xml_namespace.json             |  20 ---
 tests/DOM/18-enforce_empty_tags.json        |  18 ---
 tests/DOM/19-ignore_empty_tags.json         |  22 ---
 tests/DOM/20-template_script_tags.json      |  23 ----
 tests/DOM/21-conditional_comments.json      |  18 ---
 tests/DOM/22-lowercase_tags.json            |  43 ------
 tests/DomUtils/01-by_id.js                  |  54 --------
 tests/DomUtils/02-by_tagname.js             |  22 ---
 tests/DomUtils/03-by_type.js                |  22 ---
 tests/DomUtils/04-outer_html.js             |  10 --
 tests/DomUtils/05-inner_html.js             |  10 --
 35 files changed, 11 insertions(+), 958 deletions(-)
 delete mode 100644 lib/DomHandler.js
 delete mode 100644 lib/DomUtils.js
 delete mode 100644 tests/01-dom.js
 delete mode 100644 tests/04-dom_utils.js
 delete mode 100644 tests/DOM/01-basic.json
 delete mode 100644 tests/DOM/02-single_tag_1.json
 delete mode 100644 tests/DOM/03-single_tag_2.json
 delete mode 100644 tests/DOM/04-unescaped_in_script.json
 delete mode 100644 tests/DOM/05-tags_in_comment.json
 delete mode 100644 tests/DOM/06-comment_in_script.json
 delete mode 100644 tests/DOM/07-unescaped_in_style.json
 delete mode 100644 tests/DOM/08-extra_spaces_in_tag.json
 delete mode 100644 tests/DOM/09-unquoted_attrib.json
 delete mode 100644 tests/DOM/10-singular_attribute.json
 delete mode 100644 tests/DOM/11-text_outside_tags.json
 delete mode 100644 tests/DOM/12-text_only.json
 delete mode 100644 tests/DOM/13-comment_in_text.json
 delete mode 100644 tests/DOM/14-comment_in_text_in_script.json
 delete mode 100644 tests/DOM/15-non-verbose.json
 delete mode 100644 tests/DOM/16-ignore_whitespace.json
 delete mode 100644 tests/DOM/17-xml_namespace.json
 delete mode 100644 tests/DOM/18-enforce_empty_tags.json
 delete mode 100644 tests/DOM/19-ignore_empty_tags.json
 delete mode 100644 tests/DOM/20-template_script_tags.json
 delete mode 100644 tests/DOM/21-conditional_comments.json
 delete mode 100644 tests/DOM/22-lowercase_tags.json
 delete mode 100644 tests/DomUtils/01-by_id.js
 delete mode 100644 tests/DomUtils/02-by_tagname.js
 delete mode 100644 tests/DomUtils/03-by_type.js
 delete mode 100644 tests/DomUtils/04-outer_html.js
 delete mode 100644 tests/DomUtils/05-inner_html.js

diff --git a/lib/DomHandler.js b/lib/DomHandler.js
deleted file mode 100644
index d131d92..0000000
--- a/lib/DomHandler.js
+++ /dev/null
@@ -1,122 +0,0 @@
-var ElementType = require("./ElementType.js");
-
-function DomHandler(callback, options){
-	this._options = typeof callback === "object" ? callback : options || defaultOpts;
-	this._callback = typeof callback === "object" ? null : callback;
-	this.dom = [];
-	this._done = false;
-	this._tagStack = [];
-}
-
-//default options
-var defaultOpts = {
-	ignoreWhitespace: false, //Keep whitespace-only text nodes
-	refParent: false //add a reference to the elements parent node
-};
-
-//Resets the handler back to starting state
-DomHandler.prototype.onreset = function(){
-	DomHandler.call(this, this._callback, this._options);
-};
-
-//Signals the handler that parsing is done
-DomHandler.prototype.onend = function(){
-	if(this._done) return;
-	this._done = true;
-	this._handleCallback(null);
-};
-
-DomHandler.prototype._handleCallback = 
-DomHandler.prototype.onerror = function(error){
-	if(typeof this._callback === "function"){
-		this._callback(error, this.dom);
-	} else {
-		if(error) throw error;
-	}
-};
-
-DomHandler.prototype.onclosetag = function(name){
-	//if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
-	this._tagStack.pop();
-};
-
-DomHandler.prototype._addDomElement = function(element){
-	var lastChild,
-		lastTag = this._tagStack[this._tagStack.length - 1];
-	
-	if(lastTag){ //There are parent elements
-		if(this._options.refParent){
-			element.parent = lastTag;
-		}
-		if(!lastTag.children){
-			lastTag.children = [element];
-			return;
-		}
-		lastChild = lastTag.children[lastTag.children.length - 1];
-		if(element.type === ElementType.Text && lastChild.type === ElementType.Text){
-			lastChild.data += element.data;
-		} else {
-			lastTag.children.push(element);
-		}
-	}
-	else {
-		this.dom.push(element);
-	}
-};
-
-DomHandler.prototype.onopentagname = function(name){
-	var element = {
-		type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,
-		name: name
-	};
-	this._addDomElement(element);
-	this._tagStack.push(element);
-};
-
-DomHandler.prototype.onattribute = function(name, value){
-	var element = this._tagStack[this._tagStack.length-1];
-	if(!("attribs" in element)) element.attribs = {};
-	element.attribs[name] = value;
-};
-
-DomHandler.prototype.ontext = function(data){
-	if(this._options.ignoreWhitespace && data.trim() === "") return;
-	this._addDomElement({
-		data: data,
-		type: ElementType.Text
-	});
-};
-
-DomHandler.prototype.oncomment = function(data){
-	var lastTag = this._tagStack[this._tagStack.length - 1];
-
-	if(lastTag && lastTag.type === ElementType.Comment){
-		lastTag.data += data;
-		return;
-	}
-
-	var element = {
-		data: data,
-		 type: ElementType.Comment
-	};
-
-	if(!lastTag) this.dom.push(element);
-	else if(!lastTag.children) lastTag.children = [element];
-	else lastTag.children.push(element);
-
-	this._tagStack.push(element);
-};
-
-DomHandler.prototype.oncommentend = function(){
-	this._tagStack.pop();
-};
-
-DomHandler.prototype.onprocessinginstruction = function(name, data){
-	this._addDomElement({
-		name: name,
-		data: data,
-		type: ElementType.Directive
-	});
-};
-
-module.exports = DomHandler;
\ No newline at end of file
diff --git a/lib/DomUtils.js b/lib/DomUtils.js
deleted file mode 100644
index 6bf3068..0000000
--- a/lib/DomUtils.js
+++ /dev/null
@@ -1,145 +0,0 @@
-var ElementType = require("./ElementType.js"),
-    arrayPush = Array.prototype.push,
-    DomUtils = module.exports;
-
-function filterArray(test, arr, recurse, limit){
-	var result = [], childs;
-
-	for(var i = 0, j = arr.length; i < j; i++){
-		if(test(arr[i])){
-			result.push(arr[i]);
-			if(--limit <= 0) break;
-		}
-
-		childs = arr[i].children;
-		if(recurse && childs){
-			childs = filterArray(test, childs, recurse, limit);
-			arrayPush.apply(result, childs);
-			limit -= childs.length;
-			if(limit <= 0) break;
-		}
-	}
-
-	return result;
-}
-
-function filter(test, element, recurse, limit){
-	if(recurse !== false) recurse = true;
-	if(isNaN(limit)) limit = Infinity;
-	if(!Array.isArray(element)) element = [element];
-
-	return filterArray(test, element, recurse, limit);
-}
-
-DomUtils.testElement = function(options, element){
-	var type = element.type;
-	for(var key in options){
-		if(!options.hasOwnProperty(key));
-		else if(key === "tag_name"){
-		    if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style) return false;
-		    if(!options.tag_name(element.name)) return false;
-		} else if(key === "tag_type"){
-		    if(!options.tag_type(type)) return false;
-		} else if(key === "tag_contains"){
-		    if(type !== ElementType.Text && type !== ElementType.Comment && type !== ElementType.Directive) return false;
-		    if(!options.tag_contains(element.data)) return false;
-		} else if(!element.attribs || !options[key](element.attribs[key])) return false;
-	}
-
-	return true;
-};
-
-function getEqualityFunc(check){
-	return (function(val){ return val === check; });
-}
-
-DomUtils.getElements = function(options, element, recurse, limit){
-	for(var key in options){
-		if(options.hasOwnProperty(key) && typeof options[key] !== "function"){
-			options[key] = getEqualityFunc(options[key]);
-		}
-	}
-
-    return filter(function(elem){ return DomUtils.testElement(options, elem); }, element, recurse, limit);
-};
-
-DomUtils.getElementById = function(id, element, recurse){
-	var result;
-
-	if(typeof id === "function"){
-		result = filter(function(elem){ return elem.attribs && id(elem.attribs); }, element, recurse, 1);
-	} else {
-		result = filter(function(elem){ return elem.attribs && elem.attribs.id === id; }, element, recurse, 1);
-	}
-
-	return result.length ? result[0] : null;
-};
-
-DomUtils.getElementsByTagName = function(name, element, recurse, limit){
-	if(typeof name === "function"){ 
-		return filter(function(elem){
-			var type = elem.type;
-			if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)
-				return false;
-			return name(elem.name);
-		}, element, recurse, limit);
-	}
-
-	if(name === "*") return filter(function(elem){
-		var type = elem.type;
-		return type === ElementType.Tag || type === ElementType.Script || type === ElementType.Style;
-	});
-
-	return filter(function(elem){
-		var type = elem.type;
-		if(type !== ElementType.Tag && type !== ElementType.Script && type !== ElementType.Style)
-			return false;
-		return elem.name === name;
-	}, element, recurse, limit);
-};
-
-DomUtils.getElementsByTagType = function(type, element, recurse, limit){
-	if(typeof type === "function"){
-		return filter(function(elem){ return type(elem.type); }, element, recurse, limit);
-	} else {
-		return filter(function(elem){ return elem.type === type; }, element, recurse, limit);
-	}
-};
-
-DomUtils.getInnerHTML = function(elem){
-	if(!elem.children) return "";
-
-	var childs = elem.children,
-		childNum = childs.length,
-		ret = "";
-
-	for(var i = 0; i < childNum; i++){
-		ret += this.getOuterHTML(childs[i]);
-	}
-
-	return ret;
-};
-
-DomUtils.getOuterHTML = function(elem){
-	var type = elem.type,
-		name = elem.name;
-
-	if(type === ElementType.Text) return elem.data;
-	if(type === ElementType.Comment) return "<!--" + elem.data + "-->";
-	if(type === ElementType.Directive) return "<" + elem.data + ">";
-
-	var ret = "<" + name;
-	if(elem.hasOwnProperty("attribs")){
-		for(var attr in elem.attribs){
-			if(elem.attribs.hasOwnProperty(attr)){
-				ret += " " + attr + "=\"" + elem.attribs[attr] + "\"";
-			}
-		}
-	}
-
-	ret += ">";
-
-	if(type === ElementType.Directive) return ret;
-
-	return ret + this.getInnerHTML(elem) + "</" + name + ">";
-};
\ No newline at end of file
diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index ca6c856..9f02025 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -1,5 +1,6 @@
-var DomHandler = require("./DomHandler.js"),
-	DomUtils = require("./DomUtils.js");
+var index = require("./index.js"),
+    DomHandler = index.DomHandler,
+	DomUtils = index.DomUtils;
 
 //TODO: make this a streamable handler
 function FeedHandler(callback){
diff --git a/lib/index.js b/lib/index.js
index 621a06b..b5f1731 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -6,7 +6,7 @@ module.exports = {
 		return this.Parser;
 	},
 	get DomHandler(){
-		defineProp(this, "DomHandler", {value:require("./DomHandler.js")});
+		defineProp(this, "DomHandler", {value:require("domhandler").Handler});
 		return this.DomHandler;
 	},
 	get FeedHandler(){
@@ -30,7 +30,7 @@ module.exports = {
 		return this.ProxyHandler;
 	},
 	get DomUtils(){
-		defineProp(this, "DomUtils", {value:require("./DomUtils.js")});
+		defineProp(this, "DomUtils", {value:require("domhandler").Utils});
 		return this.DomUtils;
 	},
 	// For legacy support
diff --git a/package.json b/package.json
index 11d7e8f..739b64c 100644
--- a/package.json
+++ b/package.json
@@ -14,13 +14,15 @@
 		"url": "http://github.com/fb55/node-htmlparser/issues"
 	},
 	"directories": {
-		"lib": "./lib/"
+		"lib": "lib/"
 	},
 	"main": "./lib/",
 	"scripts": {
 		"test": "node tests/00-runtests.js"
 	},
-	"engines": "node >= 0.3.0",
+	"dependencies": {
+		"domhandler": "1.0.x"
+	},
 	"licenses": [{
 		"type": "MIT",
 		"url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"
diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 3f76d43..c742539 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -33,14 +33,12 @@ function runTests(test){
 		});
 	});
 	console.log("->", test.dir.slice(1, -1), "started");
-};
+}
 
 //run all tests
 [
- "./01-dom.js",
  "./02-feed.js",
  "./03-events.js",
- "./04-dom_utils.js",
  "./05-stream.js"
 ].map(require).forEach(runTests);
 
@@ -48,4 +46,4 @@ function runTests(test){
 (function check(){
 	if(runCount !== 0) return process.nextTick(check);
 	console.log("Total tests:", testCount);
-})();
\ No newline at end of file
+}());
\ No newline at end of file
diff --git a/tests/01-dom.js b/tests/01-dom.js
deleted file mode 100644
index f4ce1fb..0000000
--- a/tests/01-dom.js
+++ /dev/null
@@ -1,18 +0,0 @@
-//Runs tests for the DOM handler
-
-var helper = require("./test-helper.js"),
-	DefaultHandler = require("../lib/DomHandler.js");
-
-exports.dir = "/DOM/";
-
-/*
-	function test()
-	runs a test, calls the callback afterwards
-*/
-exports.test = function(test, cb){
-	var handler = new DefaultHandler(function(err, dom){
-		if(err) cb(err, 0); //return the error
-		else cb(null, dom);
-	}, test.options.handler);
-	helper.writeToParser(handler, test.options.parser, test.html);
-}
\ No newline at end of file
diff --git a/tests/04-dom_utils.js b/tests/04-dom_utils.js
deleted file mode 100644
index 0d14c6c..0000000
--- a/tests/04-dom_utils.js
+++ /dev/null
@@ -1,15 +0,0 @@
-//generate a dom
-var handler = new (require("../lib/DomHandler.js"))();
-
-(new (require("../lib/Parser.js"))(handler)).parseComplete(
-	Array(21).join("<?xml><tag1 id='asdf'> <script>text</script> <!-- comment --> <tag2> text </tag1>")
-);
-
-var dom = handler.dom;
-
-exports.dir = "/DomUtils/";
-
-exports.test = function(test, cb){
-	cb(null, test.getElements(dom));
-	cb(null, test.getByFunction(dom));
-};
\ No newline at end of file
diff --git a/tests/DOM/01-basic.json b/tests/DOM/01-basic.json
deleted file mode 100644
index 7453c30..0000000
--- a/tests/DOM/01-basic.json
+++ /dev/null
@@ -1,41 +0,0 @@
-{
-  "name": "Basic test",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>",
-  "expected": [
-    {
-      "name": "!DOCTYPE",
-      "data": "!DOCTYPE html",
-      "type": "directive"
-    },
-    {
-      "type": "tag",
-      "name": "html",
-      "children": [
-        {
-          "type": "tag",
-          "name": "title",
-          "children": [
-            {
-              "data": "The Title",
-              "type": "text"
-            }
-          ]
-        },
-        {
-          "type": "tag",
-          "name": "body",
-          "children": [
-            {
-              "data": "Hello world",
-              "type": "text"
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/02-single_tag_1.json b/tests/DOM/02-single_tag_1.json
deleted file mode 100644
index 4efff6a..0000000
--- a/tests/DOM/02-single_tag_1.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-  "name": "Single Tag 1",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<br>text</br>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "br"
-    },
-    {
-      "data": "text",
-      "type": "text"
-    },
-    {
-      "type": "tag",
-      "name": "br"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/03-single_tag_2.json b/tests/DOM/03-single_tag_2.json
deleted file mode 100644
index e7b23b8..0000000
--- a/tests/DOM/03-single_tag_2.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-  "name": "Single Tag 2",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<br>text<br>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "br"
-    },
-    {
-      "data": "text",
-      "type": "text"
-    },
-    {
-      "type": "tag",
-      "name": "br"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/04-unescaped_in_script.json b/tests/DOM/04-unescaped_in_script.json
deleted file mode 100644
index 029d202..0000000
--- a/tests/DOM/04-unescaped_in_script.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-  "name": "Unescaped chars in script",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "head",
-      "children": [
-        {
-          "type": "script",
-          "name": "script",
-          "attribs": {
-            "language": "Javascript"
-          },
-          "children": [
-            {
-              "data": "var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";",
-              "type": "text"
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/05-tags_in_comment.json b/tests/DOM/05-tags_in_comment.json
deleted file mode 100644
index 577d23b..0000000
--- a/tests/DOM/05-tags_in_comment.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-  "name": "Special char in comment",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<head><!-- commented out tags <title>Test</title>--></head>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "head",
-      "children": [
-        {
-          "data": " commented out tags <title>Test</title>",
-          "type": "comment"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/06-comment_in_script.json b/tests/DOM/06-comment_in_script.json
deleted file mode 100644
index 8ec2c51..0000000
--- a/tests/DOM/06-comment_in_script.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-  "name": "Script source in comment",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<script><!--var foo = 1;--></script>",
-  "expected": [
-    {
-      "type": "script",
-      "name": "script",
-      "children": [
-        {
-          "data": "<!--var foo = 1;-->",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/07-unescaped_in_style.json b/tests/DOM/07-unescaped_in_style.json
deleted file mode 100644
index d6bf9fb..0000000
--- a/tests/DOM/07-unescaped_in_style.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "name": "Unescaped chars in style",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<style type=\"text/css\">\n body > p\n\t{ font-weight: bold; }</style>",
-  "expected": [
-    {
-      "type": "style",
-      "name": "style",
-      "attribs": {
-        "type": "text/css"
-      },
-      "children": [
-        {
-          "data": "\n body > p\n\t{ font-weight: bold; }",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/08-extra_spaces_in_tag.json b/tests/DOM/08-extra_spaces_in_tag.json
deleted file mode 100644
index 78b30f4..0000000
--- a/tests/DOM/08-extra_spaces_in_tag.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "name": "Extra spaces in tag",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<\n font\t\n size='14' \n>the text<\n /\t\nfont\t \n>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "font",
-      "attribs": {
-        "size": "14"
-      },
-      "children": [
-        {
-          "data": "the text",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/09-unquoted_attrib.json b/tests/DOM/09-unquoted_attrib.json
deleted file mode 100644
index ae5f44c..0000000
--- a/tests/DOM/09-unquoted_attrib.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "name": "Unquoted attributes",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<font size= 14>the text</font>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "font",
-      "attribs": {
-        "size": "14"
-      },
-      "children": [
-        {
-          "data": "the text",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/10-singular_attribute.json b/tests/DOM/10-singular_attribute.json
deleted file mode 100644
index 6de1ef2..0000000
--- a/tests/DOM/10-singular_attribute.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "name": "Singular attribute",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<option value='foo' selected>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "option",
-      "attribs": {
-        "value": "foo",
-        "selected": ""
-      }
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/11-text_outside_tags.json b/tests/DOM/11-text_outside_tags.json
deleted file mode 100644
index 234821f..0000000
--- a/tests/DOM/11-text_outside_tags.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-  "name": "Text outside tags",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "Line one\n<br>\nline two",
-  "expected": [
-    {
-      "data": "Line one\n",
-      "type": "text"
-    },
-    {
-      "type": "tag",
-      "name": "br"
-    },
-    {
-      "data": "\nline two",
-      "type": "text"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/12-text_only.json b/tests/DOM/12-text_only.json
deleted file mode 100644
index f2f8610..0000000
--- a/tests/DOM/12-text_only.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-  "name": "Only text",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "this is the text",
-  "expected": [
-    {
-      "data": "this is the text",
-      "type": "text"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/13-comment_in_text.json b/tests/DOM/13-comment_in_text.json
deleted file mode 100644
index bc47305..0000000
--- a/tests/DOM/13-comment_in_text.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-  "name": "Comment within text",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "this is <!-- the comment --> the text",
-  "expected": [
-    {
-      "data": "this is ",
-      "type": "text"
-    },
-    {
-      "data": " the comment ",
-      "type": "comment"
-    },
-    {
-      "data": " the text",
-      "type": "text"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/14-comment_in_text_in_script.json b/tests/DOM/14-comment_in_text_in_script.json
deleted file mode 100644
index 2f07bc4..0000000
--- a/tests/DOM/14-comment_in_text_in_script.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-  "name": "Comment within text within script",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<script>this is <!-- the comment --> the text</script>",
-  "expected": [
-    {
-      "type": "script",
-      "name": "script",
-      "children": [
-        {
-          "data": "this is <!-- the comment --> the text",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/15-non-verbose.json b/tests/DOM/15-non-verbose.json
deleted file mode 100644
index e887368..0000000
--- a/tests/DOM/15-non-verbose.json
+++ /dev/null
@@ -1,25 +0,0 @@
-{
-  "name": "Option 'verbose' set to 'false'",
-  "options": {
-    "handler": {
-      "verbose": false
-    },
-    "parser": {}
-  },
-  "html": "<\n font\t\n size='14' \n>the text<\n /\t\nfont\t \n>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "font",
-      "attribs": {
-        "size": "14"
-      },
-      "children": [
-        {
-          "data": "the text",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/16-ignore_whitespace.json b/tests/DOM/16-ignore_whitespace.json
deleted file mode 100644
index ade32f3..0000000
--- a/tests/DOM/16-ignore_whitespace.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-  "name": "Options 'ignoreWhitespace' set to 'true'",
-  "options": {
-    "handler": {
-      "ignoreWhitespace": true
-    },
-    "parser": {}
-  },
-  "html": "Line one\n<br> \t\n<br>\nline two<font>\n <br> x </font>",
-  "expected": [
-    {
-      "data": "Line one\n",
-      "type": "text"
-    },
-    {
-      "type": "tag",
-      "name": "br"
-    },
-    {
-      "type": "tag",
-      "name": "br"
-    },
-    {
-      "data": "\nline two",
-      "type": "text"
-    },
-    {
-      "type": "tag",
-      "name": "font",
-      "children": [
-        {
-          "type": "tag",
-          "name": "br"
-        },
-        {
-          "data": " x ",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/17-xml_namespace.json b/tests/DOM/17-xml_namespace.json
deleted file mode 100644
index 4302144..0000000
--- a/tests/DOM/17-xml_namespace.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-  "name": "XML Namespace",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<ns:tag>text</ns:tag>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "ns:tag",
-      "children": [
-        {
-          "data": "text",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/18-enforce_empty_tags.json b/tests/DOM/18-enforce_empty_tags.json
deleted file mode 100644
index ed5c44c..0000000
--- a/tests/DOM/18-enforce_empty_tags.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "name": "Enforce empty tags",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<link>text</link>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "link"
-    },
-    {
-      "data": "text",
-      "type": "text"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/19-ignore_empty_tags.json b/tests/DOM/19-ignore_empty_tags.json
deleted file mode 100644
index 97270eb..0000000
--- a/tests/DOM/19-ignore_empty_tags.json
+++ /dev/null
@@ -1,22 +0,0 @@
-{
-  "name": "Ignore empty tags (xml mode)",
-  "options": {
-    "handler": {},
-    "parser": {
-      "xmlMode": true
-    }
-  },
-  "html": "<link>text</link>",
-  "expected": [
-    {
-      "type": "tag",
-      "name": "link",
-      "children": [
-        {
-          "data": "text",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/20-template_script_tags.json b/tests/DOM/20-template_script_tags.json
deleted file mode 100644
index 90a3709..0000000
--- a/tests/DOM/20-template_script_tags.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "name": "Template script tags",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
-  "expected": [
-    {
-      "type": "script",
-      "name": "script",
-      "attribs": {
-        "type": "text/template"
-      },
-      "children": [
-        {
-          "data": "<h1>Heading1</h1>",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/21-conditional_comments.json b/tests/DOM/21-conditional_comments.json
deleted file mode 100644
index 56cf232..0000000
--- a/tests/DOM/21-conditional_comments.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "name": "Conditional comments",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->",
-  "expected": [
-    {
-      "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
-      "type": "comment"
-    },
-    {
-      "data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
-      "type": "comment"
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DOM/22-lowercase_tags.json b/tests/DOM/22-lowercase_tags.json
deleted file mode 100644
index b619ffc..0000000
--- a/tests/DOM/22-lowercase_tags.json
+++ /dev/null
@@ -1,43 +0,0 @@
-{
-  "name": "Basic test",
-  "options": {
-    "handler": {},
-    "parser": {
-      "lowerCaseTags": true
-    }
-  },
-  "html": "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>",
-  "expected": [
-    {
-      "name": "!doctype",
-      "data": "!DOCTYPE html",
-      "type": "directive"
-    },
-    {
-      "type": "tag",
-      "name": "html",
-      "children": [
-        {
-          "type": "tag",
-          "name": "title",
-          "children": [
-            {
-              "data": "The Title",
-              "type": "text"
-            }
-          ]
-        },
-        {
-          "type": "tag",
-          "name": "body",
-          "children": [
-            {
-              "data": "Hello world",
-              "type": "text"
-            }
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
diff --git a/tests/DomUtils/01-by_id.js b/tests/DomUtils/01-by_id.js
deleted file mode 100644
index 578257f..0000000
--- a/tests/DomUtils/01-by_id.js
+++ /dev/null
@@ -1,54 +0,0 @@
-var DomUtils = require("../../lib/DomUtils.js");
-
-exports.name = "Get element by id";
-exports.getElements = function(dom){
-	return DomUtils.getElements({id:"asdf"}, dom, true, 1)[0];
-};
-exports.getByFunction = function(dom){
-	return DomUtils.getElementById("asdf", dom, true);
-};
-exports.expected = {
-  "type": "tag",
-  "name": "tag1",
-  "attribs": {
-    "id": "asdf"
-  },
-  "children": [
-    {
-      "data": " ",
-      "type": "text"
-    },
-    {
-      "type": "script",
-      "name": "script",
-      "children": [
-        {
-          "data": "text",
-          "type": "text"
-        }
-      ]
-    },
-    {
-      "data": " ",
-      "type": "text"
-    },
-    {
-      "data": " comment ",
-      "type": "comment"
-    },
-    {
-      "data": " ",
-      "type": "text"
-    },
-    {
-      "type": "tag",
-      "name": "tag2",
-      "children": [
-        {
-          "data": " text ",
-          "type": "text"
-        }
-      ]
-    }
-  ]
-};
\ No newline at end of file
diff --git a/tests/DomUtils/02-by_tagname.js b/tests/DomUtils/02-by_tagname.js
deleted file mode 100644
index 280414e..0000000
--- a/tests/DomUtils/02-by_tagname.js
+++ /dev/null
@@ -1,22 +0,0 @@
-var DomUtils = require("../../lib/DomUtils.js");
-
-exports.name = "Get elements by tagName";
-exports.getElements = function(dom){
-	return DomUtils.getElements({tag_name:"tag2"}, dom, true);
-};
-exports.getByFunction = function(dom){
-	return DomUtils.getElementsByTagName("tag2", dom, true);
-};
-exports.expected = [];
-for(var i = 0; i < 20; i++) exports.expected.push(
-  {
-    "type": "tag",
-    "name": "tag2",
-    "children": [
-      {
-        "data": " text ",
-        "type": "text"
-      }
-    ]
-  }
-);
\ No newline at end of file
diff --git a/tests/DomUtils/03-by_type.js b/tests/DomUtils/03-by_type.js
deleted file mode 100644
index 16a3971..0000000
--- a/tests/DomUtils/03-by_type.js
+++ /dev/null
@@ -1,22 +0,0 @@
-var DomUtils = require("../../lib/DomUtils.js");
-
-exports.name = "Get elements by type";
-exports.getElements = function(dom){
-	return DomUtils.getElements({tag_type:"script"}, dom, true);
-};
-exports.getByFunction = function(dom){
-	return DomUtils.getElementsByTagType("script", dom, true);
-};
-exports.expected = [];
-for(var i = 0; i < 20; i++) exports.expected.push(
-  {
-    "type": "script",
-    "name": "script",
-    "children": [
-      {
-        "data": "text",
-        "type": "text"
-      }
-    ]
-  }
-);
\ No newline at end of file
diff --git a/tests/DomUtils/04-outer_html.js b/tests/DomUtils/04-outer_html.js
deleted file mode 100644
index 0ed8374..0000000
--- a/tests/DomUtils/04-outer_html.js
+++ /dev/null
@@ -1,10 +0,0 @@
-var DomUtils = require("../../lib/DomUtils.js");
-
-exports.name = "Get outer HTML";
-exports.getElements = function(dom){
-    return '<tag1 id="asdf"> <script>text</script> <!-- comment --> <tag2> text </tag2></tag1>';
-};
-exports.getByFunction = function(dom){
-    return DomUtils.getOuterHTML(DomUtils.getElementById("asdf", dom, true));
-};
-exports.expected = '<tag1 id="asdf"> <script>text</script> <!-- comment --> <tag2> text </tag2></tag1>';
diff --git a/tests/DomUtils/05-inner_html.js b/tests/DomUtils/05-inner_html.js
deleted file mode 100644
index 72dba0e..0000000
--- a/tests/DomUtils/05-inner_html.js
+++ /dev/null
@@ -1,10 +0,0 @@
-var DomUtils = require("../../lib/DomUtils.js");
-
-exports.name = "Get inner HTML";
-exports.getElements = function(dom){
-    return ' <script>text</script> <!-- comment --> <tag2> text </tag2>';
-};
-exports.getByFunction = function(dom){
-    return DomUtils.getInnerHTML(DomUtils.getElementById("asdf", dom, true));
-};
-exports.expected = ' <script>text</script> <!-- comment --> <tag2> text </tag2>';

From c0b7eda733cec37a3357126b272dd2eb9a04d936 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 14 Aug 2012 16:59:33 +0200
Subject: [PATCH 228/450] Updated readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ae1bf44..91ea260 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ Read more about the parser in the [wiki](https://github.com/FB55/node-htmlparser
 ##Get a DOM
 The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the `DomUtils` helper.
 
-Read more about the DomHandler in the [wiki](https://github.com/FB55/node-htmlparser/wiki/DomHandler).
+The `DomHandler`, while still bundled with this module, was recently moved to it's [own module](https://github.com/FB55/domhandler). Have a look at it for further information.
 
 ##Parsing RSS/RDF/Atom Feeds
 

From a928109ce788ef248b7f7a4fe982b1be45420138 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 14 Aug 2012 16:59:40 +0200
Subject: [PATCH 229/450] 2.3.1

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 739b64c..1dc75ec 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.3.0",
+	"version": "2.3.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From b90c1e6f11eee96cb9db5b29000b5889a89784ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 14 Aug 2012 17:13:01 +0200
Subject: [PATCH 230/450] publish the element types from DomHandler

---
 lib/index.js | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/index.js b/lib/index.js
index b5f1731..ae07dc4 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -5,8 +5,12 @@ module.exports = {
 		defineProp(this, "Parser", {value:require("./Parser.js")});
 		return this.Parser;
 	},
+	get HandlerModule(){
+		defineProp(this, "HandlerModule", {value:require("domhandler")});
+		return this.DomHandler;
+	},
 	get DomHandler(){
-		defineProp(this, "DomHandler", {value:require("domhandler").Handler});
+		defineProp(this, "DomHandler", {value:this.HandlerModule.Handler});
 		return this.DomHandler;
 	},
 	get FeedHandler(){
@@ -14,7 +18,7 @@ module.exports = {
 		return this.FeedHandler;
 	},
 	get ElementType(){
-		defineProp(this, "ElementType", {value:require("./ElementType.js")});
+		defineProp(this, "ElementType", {value:this.HandlerModule.ElementType});
 		return this.ElementType;
 	},
 	get Stream(){

From b6c4a7395cc96709a0757e9bc28057d927e67bb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 14 Aug 2012 17:14:37 +0200
Subject: [PATCH 231/450] use numeric element types

'case numbers are faster to compare

NOT breaking due to last commit
---
 lib/ElementType.js | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/ElementType.js b/lib/ElementType.js
index 618465e..e4d7598 100644
--- a/lib/ElementType.js
+++ b/lib/ElementType.js
@@ -1,10 +1,10 @@
 //Types of elements found in the DOM
 module.exports = {
-	Text: "text", //Text
-	Directive: "directive", //<? ... ?>
-	Comment: "comment", //<!-- ... -->
-	Script: "script", //<script> tags
-	Style: "style", //<style> tags
-	Tag: "tag", //Any tag
-	CDATA: "cdata" //<![CDATA[ ... ]]>
+	Text: 0, //Text
+	Directive: 1, //<? ... ?>
+	Comment: 2, //<!-- ... -->
+	Script: 3, //<script> tags
+	Style: 4, //<style> tags
+	Tag: 5, //Any tag
+	CDATA: 6 //<![CDATA[ ... ]]>
 };
\ No newline at end of file

From 401cc09167dc49ba5c2feaeb44da9698f9342795 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 14 Aug 2012 17:25:14 +0200
Subject: [PATCH 232/450] don't expose HandlerModule

---
 lib/index.js | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/lib/index.js b/lib/index.js
index ae07dc4..9cfdfe2 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -5,12 +5,8 @@ module.exports = {
 		defineProp(this, "Parser", {value:require("./Parser.js")});
 		return this.Parser;
 	},
-	get HandlerModule(){
-		defineProp(this, "HandlerModule", {value:require("domhandler")});
-		return this.DomHandler;
-	},
 	get DomHandler(){
-		defineProp(this, "DomHandler", {value:this.HandlerModule.Handler});
+		defineProp(this, "DomHandler", {value:require("domhandler").Handler});
 		return this.DomHandler;
 	},
 	get FeedHandler(){
@@ -18,7 +14,7 @@ module.exports = {
 		return this.FeedHandler;
 	},
 	get ElementType(){
-		defineProp(this, "ElementType", {value:this.HandlerModule.ElementType});
+		defineProp(this, "ElementType", {value:require("domhandler").ElementType});
 		return this.ElementType;
 	},
 	get Stream(){

From f5925c9c351de312d822f6724fdf12bd80b50e8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Thu, 23 Aug 2012 21:43:36 +0300
Subject: [PATCH 233/450] fixed travis badge

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 91ea260..2df1a71 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-#htmlparser2 [![Build Status](https://secure.travis-ci.org/FB55/node-htmlparser.png)](http://travis-ci.org/FB55/node-htmlparser)
+#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/node-htmlparser.png)](http://travis-ci.org/fb55/node-htmlparser)
 
 A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
 

From 181c31bed04d066f7a85feb13967deb8725fba0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 10 Nov 2012 14:55:02 +0100
Subject: [PATCH 234/450] stylistic changes

---
 lib/FeedHandler.js | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 9f02025..e49f6ca 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -13,17 +13,16 @@ FeedHandler.prototype.init = DomHandler;
 
 function getElements(what, where, one, recurse){
 	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
-	else	return DomUtils.getElementsByTagName(what, where, recurse);
+	return DomUtils.getElementsByTagName(what, where, recurse);
 }
 function fetch(what, where, recurse){
-	var ret = getElements(what, where, true, recurse);
-	if(ret && (ret = ret.children) && ret.length !== 0) return ret[0].data;
-	else return false;
+	var ret = DomUtils.getElementsByTagName(what, where, recurse, 1);
+	return ret.length > 0 && ret[0].children.length > 0 && ret[0].children[0].data;
 }
 
 var isValidFeed = function(value) {
 	return value === "rss" || value === "feed" || value === "rdf:RDF";
-}
+};
 
 FeedHandler.prototype.onend = function() {
 	var feed = {},

From 84012d6a780d685e078812589af72542f6266b1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 10 Nov 2012 14:57:23 +0100
Subject: [PATCH 235/450] use the new dom modules, 2.5.0

Attention: The DOM changes slightly.
---
 lib/index.js | 6 +++---
 package.json | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/index.js b/lib/index.js
index 9cfdfe2..e3e3378 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -6,7 +6,7 @@ module.exports = {
 		return this.Parser;
 	},
 	get DomHandler(){
-		defineProp(this, "DomHandler", {value:require("domhandler").Handler});
+		defineProp(this, "DomHandler", {value:require("domhandler")});
 		return this.DomHandler;
 	},
 	get FeedHandler(){
@@ -14,7 +14,7 @@ module.exports = {
 		return this.FeedHandler;
 	},
 	get ElementType(){
-		defineProp(this, "ElementType", {value:require("domhandler").ElementType});
+		defineProp(this, "ElementType", {value:require("domelementtype")});
 		return this.ElementType;
 	},
 	get Stream(){
@@ -30,7 +30,7 @@ module.exports = {
 		return this.ProxyHandler;
 	},
 	get DomUtils(){
-		defineProp(this, "DomUtils", {value:require("domhandler").Utils});
+		defineProp(this, "DomUtils", {value:require("domutils")});
 		return this.DomUtils;
 	},
 	// For legacy support
diff --git a/package.json b/package.json
index 1dc75ec..a7ca4a3 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.3.1",
+	"version": "2.5.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
@@ -21,7 +21,9 @@
 		"test": "node tests/00-runtests.js"
 	},
 	"dependencies": {
-		"domhandler": "1.0.x"
+		"domhandler": "2.0",
+		"domutils": "1.0",
+		"domelementtype": "1.0"
 	},
 	"licenses": [{
 		"type": "MIT",

From b3bc4132098623b36c2c4a0958953078b45aa612 Mon Sep 17 00:00:00 2001
From: Kris Reeves <github.com@masquerader.com>
Date: Thu, 29 Nov 2012 20:55:36 -0800
Subject: [PATCH 236/450] Made the attribute regular expression more correct
 with regards to unquoted attribute values. Require self-closing tags to be
 void

---
 lib/Parser.js | 54 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 201cb8a..31d538b 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -13,7 +13,30 @@ function Parser(cbs, options){
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
-var _reAttrib = /\s([^\s\/]+?)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+))|(?=\s)|\/|$)/g,
+
+/* http://dev.w3.org/html5/html-author/#attributes
+ * - Whitespace is permitted after the tag name, but it is not permitted before the tag name.
+ * - Attribute names must consist of one or more characters other than the space characters,
+ *   control characters, NULL, one of the characters: double quote ("), single quote ('),
+ *   greater-than sign (>), solidus (/), equals sign (=), nor any characters that are not defined by Unicode.
+ * - An empty attribute is one where the value has been omitted. (<input disabled>...</input>
+ * - An unquoted attribute value must not contain any literal space characters, any of the characters:
+ *   double quote ("), apostrophe ('), equals sign (=), less-than sign (<), greater-than sign (>),
+ *   or grave accent (`), and the value must not be the empty string.
+ * - There may be space characters between the attribute name and the equals sign (=),
+ *   and between that and the attribute value.
+ * - Double-quoted attributes must not contain any double-quote characters or ambiguous ampersands.
+ * - Single-quoted attributes must not contain any single-quote characters or ambiguous ampersands.
+ */
+// element name:	(<[^<& ]+)
+// attribute name:	( [^"'=>\/]+)
+// attribute value:	(\s*=\s*(?:
+//						"([^"]*)"|
+//						'([^']*)'|
+//						[^\s"'=<>`]+)
+// tag end: (?=\s|\/|$)
+
+var _reAttrib = /\s+([^"'=>\/\s]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+))|(?=[\s>])|\/|$)/g,
     _reTail = /\s|\/|$/;
 
 var defaultOpts = {
@@ -286,9 +309,19 @@ Parser.prototype._processCloseTag = function(name){
 };
 
 Parser.prototype._parseAttributes = function(data, lcNames){
-	for(var match; match = _reAttrib.exec(data);){
-		this._cbs.onattribute(lcNames ? match[1].toLowerCase() : match[1], match[2] || match[3] || match[4] || "");
+	var attribs = false;
+	// if the callback exists, call it for each attribute
+	if (this._cbs.onattribute) {
+		for(var match; match = _reAttrib.exec(data);){
+			attribs = true;
+			this._cbs.onattribute(lcNames ? match[1].toLowerCase() : match[1], match[2] || match[3] || match[4] || "");
+		}
+	}
+	// if not, check to see if there are any attributes
+	else if (_reAttrib.exec(data)) {
+		attribs = true;
 	}
+	return attribs;
 };
 
 //parses the attribute string
@@ -301,7 +334,7 @@ var parseAttributes = function(data, lcNames){
 };
 
 Parser.prototype._processOpenTag = function(data){
-	var name = this._parseTagName(data),
+	var name = this._parseTagName(data), attributes,
 	    type = ElementType.Tag;
 	
 	if(this._options.xmlMode){ /*do nothing*/ }
@@ -314,13 +347,18 @@ Parser.prototype._processOpenTag = function(data){
 			data, this._options.lowerCaseAttributeNames
 		));
 	}
-	if(this._cbs.onattribute){
-		this._parseAttributes(data, this._options.lowerCaseAttributeNames);
-	}
+	// we need to know if the tag is void or not. the callback check was moved to Parser.prototype._parseAttributes
+	attributes = this._parseAttributes(data, this._options.lowerCaseAttributeNames);
 	if(this._cbs.onopentagend) this._cbs.onopentagend();
 	
 	//If tag self-terminates, add an explicit, separate closing tag
-	if(data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
+	/* http://dev.w3.org/html5/html-author/#tags
+	 * In XHTML, self-closing tags are valid but attribute values must be quoted.
+	 * In HTML, self-closing tags must be either void elements or foreign elements.
+	 * Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
+	 * Foreign elements use XML rules
+	 */
+	if(!attributes && data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
 		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
 	} else {
 		if(type !== ElementType.Tag){

From 0f71a495779e66501623d10d4ab5d53e3378be7d Mon Sep 17 00:00:00 2001
From: Kris Reeves <github.com@masquerader.com>
Date: Thu, 29 Nov 2012 22:32:24 -0800
Subject: [PATCH 237/450] I didn't understand how RegExps worked in this way,
 and was desynching the attributes count. Here's a different way to accomplish
 the same thing.

---
 lib/Parser.js | 499 ++++++++++++++++++++++++--------------------------
 1 file changed, 243 insertions(+), 256 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 31d538b..e25faee 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,15 +1,15 @@
 var ElementType = require("./ElementType.js");
 
 function Parser(cbs, options){
-	this._options = options || defaultOpts;
-	this._cbs = cbs || defaultCbs;
-	this._buffer = "";
-	this._tagSep = "";
-	this._stack = [];
-	this._wroteSpecial = false;
-	this._contentFlags = 0;
-	this._done = false;
-	this._running = true; //false if paused
+        this._options = options || defaultOpts;
+        this._cbs = cbs || defaultCbs;
+        this._buffer = "";
+        this._tagSep = "";
+        this._stack = [];
+        this._wroteSpecial = false;
+        this._contentFlags = 0;
+        this._done = false;
+        this._running = true; //false if paused
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
@@ -36,95 +36,95 @@ function Parser(cbs, options){
 //						[^\s"'=<>`]+)
 // tag end: (?=\s|\/|$)
 
-var _reAttrib = /\s+([^"'=>\/\s]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+))|(?=[\s>])|\/|$)/g,
+var _reAttrib = /\s+([^"'=>\/\s]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+))|(?=\s)|\/|$)/g,
     _reTail = /\s|\/|$/;
 
 var defaultOpts = {
-	xmlMode: false, //Special behavior for script/style tags by default
-	lowerCaseAttributeNames: false, //call .toLowerCase for each attribute name
-	lowerCaseTags: false //call .toLowerCase for each tag name
+        xmlMode: false, //Special behavior for script/style tags by default
+        lowerCaseAttributeNames: false, //call .toLowerCase for each attribute name
+        lowerCaseTags: false //call .toLowerCase for each tag name
 };
 
 var defaultCbs = {
-	/*
-		This is just a plain object
-		so that the parser doesn't
-		throw if no arguments were
-		provided.
-	*/
-	/*
-		oncdataend,
-		oncdatastart,
-		onclosetag,
-		oncomment,
-		oncommentend,
-		onerror,
-		onopentag,
-		onopentagend,
-		onprocessinginstruction,
-		onreset,
-		ontext
-	*/
+        /*
+                This is just a plain object
+                so that the parser doesn't
+                throw if no arguments were
+                provided.
+        */
+        /*
+                oncdataend,
+                oncdatastart,
+                onclosetag,
+                oncomment,
+                oncommentend,
+                onerror,
+                onopentag,
+                onopentagend,
+                onprocessinginstruction,
+                onreset,
+                ontext
+        */
 };
 
 //Parses a complete HTML and pushes it to the handler
 Parser.prototype.parseComplete = function(data){
-	this.reset();
-	this.end(data);
+        this.reset();
+        this.end(data);
 };
 
 //Parses a piece of an HTML document
 Parser.prototype.parseChunk =
 Parser.prototype.write = function(data){
-	if(this._done) this._handleError("Attempted to parse chunk after parsing already done");
-	this._buffer += data; //FIXME: this can be a bottleneck
-	if(this._running) this._parseTags();
+        if(this._done) this._handleError("Attempted to parse chunk after parsing already done");
+        this._buffer += data; //FIXME: this can be a bottleneck
+        if(this._running) this._parseTags();
 };
 
 //Tells the parser that the HTML being parsed is complete
 Parser.prototype.done =
 Parser.prototype.end = function(chunk){
-	if(this._done) return;
+        if(this._done) return;
 
-	if(chunk) this.write(chunk);
-	this._done = true;
-	
-	if(this._running) this._finishParsing();
+        if(chunk) this.write(chunk);
+        this._done = true;
+        
+        if(this._running) this._finishParsing();
 };
 
 Parser.prototype._finishParsing = function(){
-	//Parse the buffer to its end
-	if(this._buffer) this._parseTags(true);
-	
-	if(this._cbs.onclosetag){
-		while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
-	}
-	
-	if(this._cbs.onend) this._cbs.onend();
+        //Parse the buffer to its end
+        if(this._buffer) this._parseTags(true);
+        
+        if(this._cbs.onclosetag){
+                while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
+        }
+        
+        if(this._cbs.onend) this._cbs.onend();
 };
 
 Parser.prototype.pause = function(){
-	if(!this._done) this._running = false;
+        if(!this._done) this._running = false;
 };
 
 Parser.prototype.resume = function(){
-	if(this._running) return;
-	this._running = true;
-	this._parseTags();
-	if(this._done) this._finishParsing();
+        if(this._running) return;
+        this._running = true;
+        this._parseTags();
+        if(this._done) this._finishParsing();
 };
 
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
-	Parser.call(this, this._cbs, this._options);
-	if(this._cbs.onreset) this._cbs.onreset();
+        Parser.call(this, this._cbs, this._options);
+        if(this._cbs.onreset) this._cbs.onreset();
 };
 
 //Extracts the base tag name from the data value of an element
 Parser.prototype._parseTagName = function(data){
-	var match = data.substr(0, data.search(_reTail));
-	if(!this._options.lowerCaseTags) return match;
-	return match.toLowerCase();
+        var match = data.substr(0, data.search(_reTail));
+        if(!this._options.lowerCaseTags) return match;
+        return match.toLowerCase();
 };
 
 //Special tags that are treated differently
@@ -136,192 +136,182 @@ SpecialTags[ElementType.Comment] = 0x4; //2^2
 SpecialTags[ElementType.CDATA]   = 0x8; //2^3
 
 var TagValues = {
-	style: 1,
-	script: 2
+        style: 1,
+        script: 2
 };
 
 //Parses through HTML text and returns an array of found elements
 Parser.prototype._parseTags = function(force){
-	var current = 0,
-	    opening = this._buffer.indexOf("<"),
-	    closing = this._buffer.indexOf(">"),
-	    next, rawData, elementData, lastTagSep;
-
-	//if force is true, parse everything
-	if(force) opening = Infinity;
-
-	//opening !== closing is just false if both are -1
-	while(opening !== closing && this._running){
-		lastTagSep = this._tagSep;
-		
-		if((opening !== -1 && opening < closing) || closing === -1){
-			next = opening;
-			this._tagSep = "<";
-			opening = this._buffer.indexOf("<", next + 1);
-		}
-		else{
-			next = closing;
-			this._tagSep = ">";
-			closing = this._buffer.indexOf(">", next + 1);
-		}
-		rawData = this._buffer.substring(current, next); //The next chunk of data to parse
-		
-		//set elements for next run
-		current = next + 1;
-		
-		if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
-			// We're inside a CDATA section
-			this._writeCDATA(rawData);
-
-		}
-		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
-			//We're in a comment tag
-			this._writeComment(rawData);
-		}
-		else if(lastTagSep === "<"){
-			elementData = rawData.trimLeft();
-			if(elementData.charAt(0) === "/"){
-				//elementData = elementData.substr(1).trim();
-				elementData = this._parseTagName(elementData.substr(1));
-				if(this._contentFlags !== 0){
-					//if it's a closing tag, remove the flag
-					if(this._contentFlags & TagValues[elementData]){
-						//remove the flag
-						this._contentFlags ^= TagValues[elementData];
-					} else {
-						this._writeSpecial(rawData, lastTagSep);
-						continue;
-					}
-				}
-				this._processCloseTag(elementData);
-			}
-			else if(elementData.charAt(0) === "!"){
-				if(elementData.substr(1, 7) === "[CDATA["){
-					this._contentFlags |= SpecialTags[ElementType.CDATA];
-					if(this._cbs.oncdatastart) this._cbs.oncdatastart();
-					this._writeCDATA(elementData.substr(8));
-				}
-				else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
-				else if(elementData.substr(1, 2) === "--"){
-					//This tag is a comment
-					this._contentFlags |= SpecialTags[ElementType.Comment];
-					this._writeComment(rawData.substr(3));
-				}
-				//TODO: This isn't a processing instruction, needs a new name
-				else if(this._cbs.onprocessinginstruction){
-					this._cbs.onprocessinginstruction(
-						"!" + this._parseTagName(elementData.substr(1)),
-						elementData
-					);
-				}
-			}
-			else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
-			else if(elementData.charAt(0) === "?"){
-				if(this._cbs.onprocessinginstruction){
-					this._cbs.onprocessinginstruction(
-						"?" + this._parseTagName(elementData.substr(1)),
-						elementData
-					);
-				}
-			}
-			else this._processOpenTag(elementData);
-		}
-		else{
-			if(this._contentFlags !== 0){
-				this._writeSpecial(rawData, ">");
-			}
-			else if(rawData !== "" && this._cbs.ontext){
-				if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
-				this._cbs.ontext(rawData);
-			}
-		}
-	}
-
-	this._buffer = this._buffer.substr(current);
+        var current = 0,
+            opening = this._buffer.indexOf("<"),
+            closing = this._buffer.indexOf(">"),
+            next, rawData, elementData, lastTagSep;
+
+        //if force is true, parse everything
+        if(force) opening = Infinity;
+
+        //opening !== closing is just false if both are -1
+        while(opening !== closing && this._running){
+                lastTagSep = this._tagSep;
+                
+                if((opening !== -1 && opening < closing) || closing === -1){
+                        next = opening;
+                        this._tagSep = "<";
+                        opening = this._buffer.indexOf("<", next + 1);
+                }
+                else{
+                        next = closing;
+                        this._tagSep = ">";
+                        closing = this._buffer.indexOf(">", next + 1);
+                }
+                rawData = this._buffer.substring(current, next); //The next chunk of data to parse
+                
+                //set elements for next run
+                current = next + 1;
+                
+                if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
+                        // We're inside a CDATA section
+                        this._writeCDATA(rawData);
+
+                }
+                else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
+                        //We're in a comment tag
+                        this._writeComment(rawData);
+                }
+                else if(lastTagSep === "<"){
+                        elementData = rawData.trimLeft();
+                        if(elementData.charAt(0) === "/"){
+                                //elementData = elementData.substr(1).trim();
+                                elementData = this._parseTagName(elementData.substr(1));
+                                if(this._contentFlags !== 0){
+                                        //if it's a closing tag, remove the flag
+                                        if(this._contentFlags & TagValues[elementData]){
+                                                //remove the flag
+                                                this._contentFlags ^= TagValues[elementData];
+                                        } else {
+                                                this._writeSpecial(rawData, lastTagSep);
+                                                continue;
+                                        }
+                                }
+                                this._processCloseTag(elementData);
+                        }
+                        else if(elementData.charAt(0) === "!"){
+                                if(elementData.substr(1, 7) === "[CDATA["){
+                                        this._contentFlags |= SpecialTags[ElementType.CDATA];
+                                        if(this._cbs.oncdatastart) this._cbs.oncdatastart();
+                                        this._writeCDATA(elementData.substr(8));
+                                }
+                                else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
+                                else if(elementData.substr(1, 2) === "--"){
+                                        //This tag is a comment
+                                        this._contentFlags |= SpecialTags[ElementType.Comment];
+                                        this._writeComment(rawData.substr(3));
+                                }
+                                //TODO: This isn't a processing instruction, needs a new name
+                                else if(this._cbs.onprocessinginstruction){
+                                        this._cbs.onprocessinginstruction(
+                                                "!" + this._parseTagName(elementData.substr(1)),
+                                                elementData
+                                        );
+                                }
+                        }
+                        else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
+                        else if(elementData.charAt(0) === "?"){
+                                if(this._cbs.onprocessinginstruction){
+                                        this._cbs.onprocessinginstruction(
+                                                "?" + this._parseTagName(elementData.substr(1)),
+                                                elementData
+                                        );
+                                }
+                        }
+                        else this._processOpenTag(elementData);
+                }
+                else{
+                        if(this._contentFlags !== 0){
+                                this._writeSpecial(rawData, ">");
+                        }
+                        else if(rawData !== "" && this._cbs.ontext){
+                                if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
+                                this._cbs.ontext(rawData);
+                        }
+                }
+        }
+
+        this._buffer = this._buffer.substr(current);
 };
 
 Parser.prototype._writeCDATA = function(data){
-	if(this._tagSep === ">" && data.substr(-2) === "]]"){
-		// CDATA ends
-		if(data.length !== 2 && this._cbs.ontext){
-			this._cbs.ontext(data.slice(0,-2));
-		}
-		this._contentFlags ^= SpecialTags[ElementType.CDATA];
-		if(this._cbs.oncdataend) this._cbs.oncdataend();
+        if(this._tagSep === ">" && data.substr(-2) === "]]"){
+                // CDATA ends
+                if(data.length !== 2 && this._cbs.ontext){
+                        this._cbs.ontext(data.slice(0,-2));
+                }
+                this._contentFlags ^= SpecialTags[ElementType.CDATA];
+                if(this._cbs.oncdataend) this._cbs.oncdataend();
     }
     else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
 };
 
 Parser.prototype._writeComment = function(rawData){
-	if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
-		//remove the written flag (also removes the comment flag)
-		this._contentFlags ^= SpecialTags[ElementType.Comment];
-		this._wroteSpecial = false;
-		if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
-		if(this._cbs.oncommentend) this._cbs.oncommentend();
-	}
-	else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
+        if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
+                //remove the written flag (also removes the comment flag)
+                this._contentFlags ^= SpecialTags[ElementType.Comment];
+                this._wroteSpecial = false;
+                if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
+                if(this._cbs.oncommentend) this._cbs.oncommentend();
+        }
+        else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
 };
 
 Parser.prototype._writeSpecial = function(rawData, lastTagSep){
-	//if the previous element is text, append the last tag sep to element
-	if(this._wroteSpecial){
-		if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
-	}
-	else{ //The previous element was not text
-		this._wroteSpecial = true;
-		if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
-	}
+        //if the previous element is text, append the last tag sep to element
+        if(this._wroteSpecial){
+                if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
+        }
+        else{ //The previous element was not text
+                this._wroteSpecial = true;
+                if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
+        }
 };
 
 var emptyTags = {
-	__proto__: null,
-	area: true,
-	base: true,
-	basefont: true,
-	br: true,
-	col: true,
-	frame: true,
-	hr: true,
-	img: true,
-	input: true,
-	isindex: true,
-	link: true,
-	meta: true,
-	param: true,
-	embed: true
+        __proto__: null,
+        area: true,
+        base: true,
+        basefont: true,
+        br: true,
+        col: true,
+        frame: true,
+        hr: true,
+        img: true,
+        input: true,
+        isindex: true,
+        link: true,
+        meta: true,
+        param: true,
+        embed: true
 };
 
 Parser.prototype._processCloseTag = function(name){
-	if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){
-		var pos = this._stack.lastIndexOf(name);
-		if(pos !== -1)
-			if(this._cbs.onclosetag){
-				pos = this._stack.length - pos;
-				while(pos--) this._cbs.onclosetag(this._stack.pop());
-			}
-			else this._stack.splice(pos);
-	}
-	//many browsers (eg. Safari, Chrome) convert </br> to <br>
-	else if(name === "br" && !this._options.xmlMode){
-		this._processOpenTag(name + "/");
-	}
+        if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){
+                var pos = this._stack.lastIndexOf(name);
+                if(pos !== -1)
+                        if(this._cbs.onclosetag){
+                                pos = this._stack.length - pos;
+                                while(pos--) this._cbs.onclosetag(this._stack.pop());
+                        }
+                        else this._stack.splice(pos);
+        }
+        //many browsers (eg. Safari, Chrome) convert </br> to <br>
+        else if(name === "br" && !this._options.xmlMode){
+                this._processOpenTag(name + "/");
+        }
 };
 
 Parser.prototype._parseAttributes = function(data, lcNames){
-	var attribs = false;
-	// if the callback exists, call it for each attribute
-	if (this._cbs.onattribute) {
-		for(var match; match = _reAttrib.exec(data);){
-			attribs = true;
-			this._cbs.onattribute(lcNames ? match[1].toLowerCase() : match[1], match[2] || match[3] || match[4] || "");
-		}
-	}
-	// if not, check to see if there are any attributes
-	else if (_reAttrib.exec(data)) {
-		attribs = true;
+	for(var match; match = _reAttrib.exec(data);){
+		this._cbs.onattribute(lcNames ? match[1].toLowerCase() : match[1], match[2] || match[3] || match[4] || "");
 	}
-	return attribs;
 };
 
 //parses the attribute string
@@ -334,45 +324,42 @@ var parseAttributes = function(data, lcNames){
 };
 
 Parser.prototype._processOpenTag = function(data){
-	var name = this._parseTagName(data), attributes,
-	    type = ElementType.Tag;
-	
-	if(this._options.xmlMode){ /*do nothing*/ }
-	else if(name === "script") type = ElementType.Script;
-	else if(name === "style")  type = ElementType.Style;
-
-	if(this._cbs.onopentagname)	this._cbs.onopentagname(name);
-	if(this._cbs.onopentag){
-		this._cbs.onopentag(name, parseAttributes(
-			data, this._options.lowerCaseAttributeNames
-		));
-	}
-	// we need to know if the tag is void or not. the callback check was moved to Parser.prototype._parseAttributes
-	attributes = this._parseAttributes(data, this._options.lowerCaseAttributeNames);
-	if(this._cbs.onopentagend) this._cbs.onopentagend();
-	
-	//If tag self-terminates, add an explicit, separate closing tag
-	/* http://dev.w3.org/html5/html-author/#tags
-	 * In XHTML, self-closing tags are valid but attribute values must be quoted.
-	 * In HTML, self-closing tags must be either void elements or foreign elements.
-	 * Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
-	 * Foreign elements use XML rules
-	 */
-	if(!attributes && data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
-		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
-	} else {
-		if(type !== ElementType.Tag){
-			this._contentFlags |= SpecialTags[type];
-			this._wroteSpecial = false;
-		}
-		this._stack.push(name);
+        var name = this._parseTagName(data),
+                attributes = parseAttributes(data, this._options.lowerCaseAttributeNames),
+            type = ElementType.Tag;
+        
+        if(this._options.xmlMode){ /*do nothing*/ }
+        else if(name === "script") type = ElementType.Script;
+        else if(name === "style")  type = ElementType.Style;
+
+        if(this._cbs.onopentagname)	this._cbs.onopentagname(name);
+        if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
+	if(this._cbs.onattribute){
+		this._parseAttributes(data, this._options.lowerCaseAttributeNames);
 	}
+        
+        //If tag self-terminates, add an explicit, separate closing tag
+        /* http://dev.w3.org/html5/html-author/#tags
+         * In XHTML, self-closing tags are valid but attribute values must be quoted.
+         * In HTML, self-closing tags must be either void elements or foreign elements.
+         * Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
+         * Foreign elements use XML rules
+         */
+        if(!Object.keys(attributes).length && data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
+                if(this._cbs.onclosetag) this._cbs.onclosetag(name);
+        } else {
+                if(type !== ElementType.Tag){
+                        this._contentFlags |= SpecialTags[type];
+                        this._wroteSpecial = false;
+                }
+                this._stack.push(name);
+        }
 };
 
 Parser.prototype._handleError = function(error){
-	error = new Error(error);
-	if(this._cbs.onerror) this._cbs.onerror(error);
-	else throw error;
+        error = new Error(error);
+        if(this._cbs.onerror) this._cbs.onerror(error);
+        else throw error;
 };
 
 module.exports = Parser;
\ No newline at end of file

From f7b6d5468e2e0961765ab3f9be74114492d5e316 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 4 Dec 2012 15:19:13 +0100
Subject: [PATCH 238/450] Revert "stylistic changes"

This reverts commit 181c31bed04d066f7a85feb13967deb8725fba0e.
---
 lib/FeedHandler.js | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index e49f6ca..9f02025 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -13,16 +13,17 @@ FeedHandler.prototype.init = DomHandler;
 
 function getElements(what, where, one, recurse){
 	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
-	return DomUtils.getElementsByTagName(what, where, recurse);
+	else	return DomUtils.getElementsByTagName(what, where, recurse);
 }
 function fetch(what, where, recurse){
-	var ret = DomUtils.getElementsByTagName(what, where, recurse, 1);
-	return ret.length > 0 && ret[0].children.length > 0 && ret[0].children[0].data;
+	var ret = getElements(what, where, true, recurse);
+	if(ret && (ret = ret.children) && ret.length !== 0) return ret[0].data;
+	else return false;
 }
 
 var isValidFeed = function(value) {
 	return value === "rss" || value === "feed" || value === "rdf:RDF";
-};
+}
 
 FeedHandler.prototype.onend = function() {
 	var feed = {},

From c75da20d59c5a1f5236a8f015d50a4645e5fa9c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 4 Dec 2012 15:27:24 +0100
Subject: [PATCH 239/450] Revert "Revert "stylistic changes""

This reverts commit f7b6d5468e2e0961765ab3f9be74114492d5e316.
---
 lib/FeedHandler.js | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 9f02025..e49f6ca 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -13,17 +13,16 @@ FeedHandler.prototype.init = DomHandler;
 
 function getElements(what, where, one, recurse){
 	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
-	else	return DomUtils.getElementsByTagName(what, where, recurse);
+	return DomUtils.getElementsByTagName(what, where, recurse);
 }
 function fetch(what, where, recurse){
-	var ret = getElements(what, where, true, recurse);
-	if(ret && (ret = ret.children) && ret.length !== 0) return ret[0].data;
-	else return false;
+	var ret = DomUtils.getElementsByTagName(what, where, recurse, 1);
+	return ret.length > 0 && ret[0].children.length > 0 && ret[0].children[0].data;
 }
 
 var isValidFeed = function(value) {
 	return value === "rss" || value === "feed" || value === "rdf:RDF";
-}
+};
 
 FeedHandler.prototype.onend = function() {
 	var feed = {},

From 6730fde4bdfc637a628158221435a82a25a8310b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 4 Dec 2012 16:26:47 +0100
Subject: [PATCH 240/450] added missing comma in benchmark script

---
 tests/99-benchmark.js | 50 +++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
index ed70b02..e44d76a 100644
--- a/tests/99-benchmark.js
+++ b/tests/99-benchmark.js
@@ -1,26 +1,26 @@
-var multiply = function(text){
-		return Array(5e3+1).join(text);
-	},
-	tests = {
-		self_closing: multiply("<br/>"),
-		tag: multiply("<tag foo=bar foobar> Text </tag>"),
-		comment: multiply("<!-- this is <<a> comment -->"),
-		directive: multiply("<?foo bar?>"),
-		special: multiply("<script> THIS IS <SPECIAL> </script>"),
-		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
-	}
-	empty = function(){},
-	cbs = {};
-
-require("./test-helper.js").EVENTS.forEach(function(name){
-    cbs["on" + name] = empty;
-});
-
-var parser = new (require("../lib/Parser.js"))(cbs),
-	ben = require("ben");
-
-Object.keys(tests).forEach(function(name){
-	console.log("Test", name, "took", ben(150, function(){
-		parser.parseComplete(tests[name]);
-	}));
+var multiply = function(text){
+		return Array(5e3+1).join(text);
+	},
+	tests = {
+		self_closing: multiply("<br/>"),
+		tag: multiply("<tag foo=bar foobar> Text </tag>"),
+		comment: multiply("<!-- this is <<a> comment -->"),
+		directive: multiply("<?foo bar?>"),
+		special: multiply("<script> THIS IS <SPECIAL> </script>"),
+		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
+	},
+	empty = function(){},
+	cbs = {};
+
+require("./test-helper.js").EVENTS.forEach(function(name){
+    cbs["on" + name] = empty;
+});
+
+var parser = new (require("../lib/Parser.js"))(cbs),
+	ben = require("ben");
+
+Object.keys(tests).forEach(function(name){
+	console.log("Test", name, "took", ben(150, function(){
+		parser.parseComplete(tests[name]);
+	}));
 });
\ No newline at end of file

From 840291e4764e376dac32fb6b4cf2ee01a38a1228 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 9 Jan 2013 15:37:30 +0100
Subject: [PATCH 241/450] domelementtype must be version 1.x (not 1.0)

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index a7ca4a3..498bb29 100644
--- a/package.json
+++ b/package.json
@@ -23,7 +23,7 @@
 	"dependencies": {
 		"domhandler": "2.0",
 		"domutils": "1.0",
-		"domelementtype": "1.0"
+		"domelementtype": "1"
 	},
 	"licenses": [{
 		"type": "MIT",

From 46cd5468894cb03baa1b74e977457c9a81cb828f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 9 Jan 2013 15:52:57 +0100
Subject: [PATCH 242/450] 2.5.1

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 498bb29..46e855a 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.5.0",
+	"version": "2.5.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From a83c708bc53436ceacb371e38252cd7361ea0042 Mon Sep 17 00:00:00 2001
From: Kris Reeves <krisreeves@searchfanatics.com>
Date: Tue, 5 Feb 2013 02:59:25 +0000
Subject: [PATCH 243/450] Better handling of implied close tags. A list is
 given of tags whose close is implied by other tags being opened, and these
 are closed when those tags are opened. This helps correctly parse things like
 lists and tables with unterminated LI or TD tags.

---
 lib/Parser.js | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index e25faee..5ac5c91 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -67,6 +67,24 @@ var defaultCbs = {
         */
 };
 
+var formTags = {'input':1, 'option':1, 'optgroup':1, 'select':1, 'button':1, 'datalist':1, 'textarea':1};
+var openImpliesClose = {
+        'tr'      : {'tr':1, 'th':1, 'td':1},
+        'th'      : {'th':1},
+        'td'      : {'thead':1, 'td':1},
+        'body'    : {'head':1, 'link':1, 'script':1},
+        'li'      : {'li':1},
+        'p'       : {'p':1},
+        'select'  : formTags,
+        'input'   : formTags,
+        'output'  : formTags,
+        'button'  : formTags,
+        'datalist': formTags,
+        'textarea': formTags,
+        'option'  : {'option':1},
+        'optgroup': {'optgroup':1}
+};
+
 //Parses a complete HTML and pushes it to the handler
 Parser.prototype.parseComplete = function(data){
         this.reset();
@@ -326,17 +344,25 @@ var parseAttributes = function(data, lcNames){
 Parser.prototype._processOpenTag = function(data){
         var name = this._parseTagName(data),
                 attributes = parseAttributes(data, this._options.lowerCaseAttributeNames),
-            type = ElementType.Tag;
+                type = ElementType.Tag;
         
         if(this._options.xmlMode){ /*do nothing*/ }
         else if(name === "script") type = ElementType.Script;
         else if(name === "style")  type = ElementType.Style;
-
-        if(this._cbs.onopentagname)	this._cbs.onopentagname(name);
+        
+        if (name in openImpliesClose) {
+                var el;
+                while ((el = this._stack[this._stack.length]) in openImpliesClose[name]) {
+                        this._stack.pop();
+                        this._processCloseTag(el);
+                }
+        }
+        
+        if(this._cbs.onopentagname) this._cbs.onopentagname(name);
         if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
-	if(this._cbs.onattribute){
-		this._parseAttributes(data, this._options.lowerCaseAttributeNames);
-	}
+        if(this._cbs.onattribute){
+                this._parseAttributes(data, this._options.lowerCaseAttributeNames);
+        }
         
         //If tag self-terminates, add an explicit, separate closing tag
         /* http://dev.w3.org/html5/html-author/#tags

From a1777a92c0ba1dc84ae7d52b0eb3df817bdd7fca Mon Sep 17 00:00:00 2001
From: Kris Reeves <krisreeves@searchfanatics.com>
Date: Tue, 5 Feb 2013 03:05:14 +0000
Subject: [PATCH 244/450] spaces -> tabs, thought the merge would update my
 local files to the correct spacing (and tried to match that)

---
 lib/Parser.js | 526 +++++++++++++++++++++++++-------------------------
 1 file changed, 263 insertions(+), 263 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 5ac5c91..370ca71 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,15 +1,15 @@
 var ElementType = require("./ElementType.js");
 
 function Parser(cbs, options){
-        this._options = options || defaultOpts;
-        this._cbs = cbs || defaultCbs;
-        this._buffer = "";
-        this._tagSep = "";
-        this._stack = [];
-        this._wroteSpecial = false;
-        this._contentFlags = 0;
-        this._done = false;
-        this._running = true; //false if paused
+	this._options = options || defaultOpts;
+	this._cbs = cbs || defaultCbs;
+	this._buffer = "";
+	this._tagSep = "";
+	this._stack = [];
+	this._wroteSpecial = false;
+	this._contentFlags = 0;
+	this._done = false;
+	this._running = true; //false if paused
 }
 
 //Regular expressions used for cleaning up and parsing (stateless)
@@ -40,109 +40,109 @@ var _reAttrib = /\s+([^"'=>\/\s]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+
     _reTail = /\s|\/|$/;
 
 var defaultOpts = {
-        xmlMode: false, //Special behavior for script/style tags by default
-        lowerCaseAttributeNames: false, //call .toLowerCase for each attribute name
-        lowerCaseTags: false //call .toLowerCase for each tag name
+	xmlMode: false, //Special behavior for script/style tags by default
+	lowerCaseAttributeNames: false, //call .toLowerCase for each attribute name
+	lowerCaseTags: false //call .toLowerCase for each tag name
 };
 
 var defaultCbs = {
-        /*
-                This is just a plain object
-                so that the parser doesn't
-                throw if no arguments were
-                provided.
-        */
-        /*
-                oncdataend,
-                oncdatastart,
-                onclosetag,
-                oncomment,
-                oncommentend,
-                onerror,
-                onopentag,
-                onopentagend,
-                onprocessinginstruction,
-                onreset,
-                ontext
-        */
+	/*
+		This is just a plain object
+		so that the parser doesn't
+		throw if no arguments were
+		provided.
+	*/
+	/*
+		oncdataend,
+		oncdatastart,
+		onclosetag,
+		oncomment,
+		oncommentend,
+		onerror,
+		onopentag,
+		onopentagend,
+		onprocessinginstruction,
+		onreset,
+		ontext
+	*/
 };
 
 var formTags = {'input':1, 'option':1, 'optgroup':1, 'select':1, 'button':1, 'datalist':1, 'textarea':1};
 var openImpliesClose = {
-        'tr'      : {'tr':1, 'th':1, 'td':1},
-        'th'      : {'th':1},
-        'td'      : {'thead':1, 'td':1},
-        'body'    : {'head':1, 'link':1, 'script':1},
-        'li'      : {'li':1},
-        'p'       : {'p':1},
-        'select'  : formTags,
-        'input'   : formTags,
-        'output'  : formTags,
-        'button'  : formTags,
-        'datalist': formTags,
-        'textarea': formTags,
-        'option'  : {'option':1},
-        'optgroup': {'optgroup':1}
+	'tr'      : {'tr':1, 'th':1, 'td':1},
+	'th'      : {'th':1},
+	'td'      : {'thead':1, 'td':1},
+	'body'    : {'head':1, 'link':1, 'script':1},
+	'li'      : {'li':1},
+	'p'       : {'p':1},
+	'select'  : formTags,
+	'input'   : formTags,
+	'output'  : formTags,
+	'button'  : formTags,
+	'datalist': formTags,
+	'textarea': formTags,
+	'option'  : {'option':1},
+	'optgroup': {'optgroup':1}
 };
 
 //Parses a complete HTML and pushes it to the handler
 Parser.prototype.parseComplete = function(data){
-        this.reset();
-        this.end(data);
+	this.reset();
+	this.end(data);
 };
 
 //Parses a piece of an HTML document
 Parser.prototype.parseChunk =
 Parser.prototype.write = function(data){
-        if(this._done) this._handleError("Attempted to parse chunk after parsing already done");
-        this._buffer += data; //FIXME: this can be a bottleneck
-        if(this._running) this._parseTags();
+	if(this._done) this._handleError("Attempted to parse chunk after parsing already done");
+	this._buffer += data; //FIXME: this can be a bottleneck
+	if(this._running) this._parseTags();
 };
 
 //Tells the parser that the HTML being parsed is complete
 Parser.prototype.done =
 Parser.prototype.end = function(chunk){
-        if(this._done) return;
+	if(this._done) return;
 
-        if(chunk) this.write(chunk);
-        this._done = true;
-        
-        if(this._running) this._finishParsing();
+	if(chunk) this.write(chunk);
+	this._done = true;
+	
+	if(this._running) this._finishParsing();
 };
 
 Parser.prototype._finishParsing = function(){
-        //Parse the buffer to its end
-        if(this._buffer) this._parseTags(true);
-        
-        if(this._cbs.onclosetag){
-                while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
-        }
-        
-        if(this._cbs.onend) this._cbs.onend();
+	//Parse the buffer to its end
+	if(this._buffer) this._parseTags(true);
+	
+	if(this._cbs.onclosetag){
+		while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
+	}
+	
+	if(this._cbs.onend) this._cbs.onend();
 };
 
 Parser.prototype.pause = function(){
-        if(!this._done) this._running = false;
+	if(!this._done) this._running = false;
 };
 
 Parser.prototype.resume = function(){
-        if(this._running) return;
-        this._running = true;
-        this._parseTags();
-        if(this._done) this._finishParsing();
+	if(this._running) return;
+	this._running = true;
+	this._parseTags();
+	if(this._done) this._finishParsing();
 };
 
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
-        Parser.call(this, this._cbs, this._options);
-        if(this._cbs.onreset) this._cbs.onreset();
+	Parser.call(this, this._cbs, this._options);
+	if(this._cbs.onreset) this._cbs.onreset();
 };
 
 //Extracts the base tag name from the data value of an element
 Parser.prototype._parseTagName = function(data){
-        var match = data.substr(0, data.search(_reTail));
-        if(!this._options.lowerCaseTags) return match;
-        return match.toLowerCase();
+	var match = data.substr(0, data.search(_reTail));
+	if(!this._options.lowerCaseTags) return match;
+	return match.toLowerCase();
 };
 
 //Special tags that are treated differently
@@ -154,176 +154,176 @@ SpecialTags[ElementType.Comment] = 0x4; //2^2
 SpecialTags[ElementType.CDATA]   = 0x8; //2^3
 
 var TagValues = {
-        style: 1,
-        script: 2
+	style: 1,
+	script: 2
 };
 
 //Parses through HTML text and returns an array of found elements
 Parser.prototype._parseTags = function(force){
-        var current = 0,
-            opening = this._buffer.indexOf("<"),
-            closing = this._buffer.indexOf(">"),
-            next, rawData, elementData, lastTagSep;
-
-        //if force is true, parse everything
-        if(force) opening = Infinity;
-
-        //opening !== closing is just false if both are -1
-        while(opening !== closing && this._running){
-                lastTagSep = this._tagSep;
-                
-                if((opening !== -1 && opening < closing) || closing === -1){
-                        next = opening;
-                        this._tagSep = "<";
-                        opening = this._buffer.indexOf("<", next + 1);
-                }
-                else{
-                        next = closing;
-                        this._tagSep = ">";
-                        closing = this._buffer.indexOf(">", next + 1);
-                }
-                rawData = this._buffer.substring(current, next); //The next chunk of data to parse
-                
-                //set elements for next run
-                current = next + 1;
-                
-                if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
-                        // We're inside a CDATA section
-                        this._writeCDATA(rawData);
-
-                }
-                else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
-                        //We're in a comment tag
-                        this._writeComment(rawData);
-                }
-                else if(lastTagSep === "<"){
-                        elementData = rawData.trimLeft();
-                        if(elementData.charAt(0) === "/"){
-                                //elementData = elementData.substr(1).trim();
-                                elementData = this._parseTagName(elementData.substr(1));
-                                if(this._contentFlags !== 0){
-                                        //if it's a closing tag, remove the flag
-                                        if(this._contentFlags & TagValues[elementData]){
-                                                //remove the flag
-                                                this._contentFlags ^= TagValues[elementData];
-                                        } else {
-                                                this._writeSpecial(rawData, lastTagSep);
-                                                continue;
-                                        }
-                                }
-                                this._processCloseTag(elementData);
-                        }
-                        else if(elementData.charAt(0) === "!"){
-                                if(elementData.substr(1, 7) === "[CDATA["){
-                                        this._contentFlags |= SpecialTags[ElementType.CDATA];
-                                        if(this._cbs.oncdatastart) this._cbs.oncdatastart();
-                                        this._writeCDATA(elementData.substr(8));
-                                }
-                                else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
-                                else if(elementData.substr(1, 2) === "--"){
-                                        //This tag is a comment
-                                        this._contentFlags |= SpecialTags[ElementType.Comment];
-                                        this._writeComment(rawData.substr(3));
-                                }
-                                //TODO: This isn't a processing instruction, needs a new name
-                                else if(this._cbs.onprocessinginstruction){
-                                        this._cbs.onprocessinginstruction(
-                                                "!" + this._parseTagName(elementData.substr(1)),
-                                                elementData
-                                        );
-                                }
-                        }
-                        else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
-                        else if(elementData.charAt(0) === "?"){
-                                if(this._cbs.onprocessinginstruction){
-                                        this._cbs.onprocessinginstruction(
-                                                "?" + this._parseTagName(elementData.substr(1)),
-                                                elementData
-                                        );
-                                }
-                        }
-                        else this._processOpenTag(elementData);
-                }
-                else{
-                        if(this._contentFlags !== 0){
-                                this._writeSpecial(rawData, ">");
-                        }
-                        else if(rawData !== "" && this._cbs.ontext){
-                                if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
-                                this._cbs.ontext(rawData);
-                        }
-                }
-        }
-
-        this._buffer = this._buffer.substr(current);
+	var current = 0,
+	    opening = this._buffer.indexOf("<"),
+	    closing = this._buffer.indexOf(">"),
+	    next, rawData, elementData, lastTagSep;
+
+	//if force is true, parse everything
+	if(force) opening = Infinity;
+
+	//opening !== closing is just false if both are -1
+	while(opening !== closing && this._running){
+		lastTagSep = this._tagSep;
+		
+		if((opening !== -1 && opening < closing) || closing === -1){
+			next = opening;
+			this._tagSep = "<";
+			opening = this._buffer.indexOf("<", next + 1);
+		}
+		else{
+			next = closing;
+			this._tagSep = ">";
+			closing = this._buffer.indexOf(">", next + 1);
+		}
+		rawData = this._buffer.substring(current, next); //The next chunk of data to parse
+		
+		//set elements for next run
+		current = next + 1;
+		
+		if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
+			// We're inside a CDATA section
+			this._writeCDATA(rawData);
+
+		}
+		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
+			//We're in a comment tag
+			this._writeComment(rawData);
+		}
+		else if(lastTagSep === "<"){
+			elementData = rawData.trimLeft();
+			if(elementData.charAt(0) === "/"){
+				//elementData = elementData.substr(1).trim();
+				elementData = this._parseTagName(elementData.substr(1));
+				if(this._contentFlags !== 0){
+					//if it's a closing tag, remove the flag
+					if(this._contentFlags & TagValues[elementData]){
+						//remove the flag
+						this._contentFlags ^= TagValues[elementData];
+					} else {
+						this._writeSpecial(rawData, lastTagSep);
+						continue;
+					}
+				}
+				this._processCloseTag(elementData);
+			}
+			else if(elementData.charAt(0) === "!"){
+				if(elementData.substr(1, 7) === "[CDATA["){
+					this._contentFlags |= SpecialTags[ElementType.CDATA];
+					if(this._cbs.oncdatastart) this._cbs.oncdatastart();
+					this._writeCDATA(elementData.substr(8));
+				}
+				else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
+				else if(elementData.substr(1, 2) === "--"){
+					//This tag is a comment
+					this._contentFlags |= SpecialTags[ElementType.Comment];
+					this._writeComment(rawData.substr(3));
+				}
+				//TODO: This isn't a processing instruction, needs a new name
+				else if(this._cbs.onprocessinginstruction){
+					this._cbs.onprocessinginstruction(
+						"!" + this._parseTagName(elementData.substr(1)),
+						elementData
+					);
+				}
+			}
+			else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
+			else if(elementData.charAt(0) === "?"){
+				if(this._cbs.onprocessinginstruction){
+					this._cbs.onprocessinginstruction(
+						"?" + this._parseTagName(elementData.substr(1)),
+						elementData
+					);
+				}
+			}
+			else this._processOpenTag(elementData);
+		}
+		else{
+			if(this._contentFlags !== 0){
+				this._writeSpecial(rawData, ">");
+			}
+			else if(rawData !== "" && this._cbs.ontext){
+				if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
+				this._cbs.ontext(rawData);
+			}
+		}
+	}
+
+	this._buffer = this._buffer.substr(current);
 };
 
 Parser.prototype._writeCDATA = function(data){
-        if(this._tagSep === ">" && data.substr(-2) === "]]"){
-                // CDATA ends
-                if(data.length !== 2 && this._cbs.ontext){
-                        this._cbs.ontext(data.slice(0,-2));
-                }
-                this._contentFlags ^= SpecialTags[ElementType.CDATA];
-                if(this._cbs.oncdataend) this._cbs.oncdataend();
+	if(this._tagSep === ">" && data.substr(-2) === "]]"){
+		// CDATA ends
+		if(data.length !== 2 && this._cbs.ontext){
+			this._cbs.ontext(data.slice(0,-2));
+		}
+		this._contentFlags ^= SpecialTags[ElementType.CDATA];
+		if(this._cbs.oncdataend) this._cbs.oncdataend();
     }
     else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
 };
 
 Parser.prototype._writeComment = function(rawData){
-        if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
-                //remove the written flag (also removes the comment flag)
-                this._contentFlags ^= SpecialTags[ElementType.Comment];
-                this._wroteSpecial = false;
-                if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
-                if(this._cbs.oncommentend) this._cbs.oncommentend();
-        }
-        else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
+	if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
+		//remove the written flag (also removes the comment flag)
+		this._contentFlags ^= SpecialTags[ElementType.Comment];
+		this._wroteSpecial = false;
+		if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
+		if(this._cbs.oncommentend) this._cbs.oncommentend();
+	}
+	else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
 };
 
 Parser.prototype._writeSpecial = function(rawData, lastTagSep){
-        //if the previous element is text, append the last tag sep to element
-        if(this._wroteSpecial){
-                if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
-        }
-        else{ //The previous element was not text
-                this._wroteSpecial = true;
-                if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
-        }
+	//if the previous element is text, append the last tag sep to element
+	if(this._wroteSpecial){
+		if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
+	}
+	else{ //The previous element was not text
+		this._wroteSpecial = true;
+		if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
+	}
 };
 
 var emptyTags = {
-        __proto__: null,
-        area: true,
-        base: true,
-        basefont: true,
-        br: true,
-        col: true,
-        frame: true,
-        hr: true,
-        img: true,
-        input: true,
-        isindex: true,
-        link: true,
-        meta: true,
-        param: true,
-        embed: true
+	__proto__: null,
+	area: true,
+	base: true,
+	basefont: true,
+	br: true,
+	col: true,
+	frame: true,
+	hr: true,
+	img: true,
+	input: true,
+	isindex: true,
+	link: true,
+	meta: true,
+	param: true,
+	embed: true
 };
 
 Parser.prototype._processCloseTag = function(name){
-        if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){
-                var pos = this._stack.lastIndexOf(name);
-                if(pos !== -1)
-                        if(this._cbs.onclosetag){
-                                pos = this._stack.length - pos;
-                                while(pos--) this._cbs.onclosetag(this._stack.pop());
-                        }
-                        else this._stack.splice(pos);
-        }
-        //many browsers (eg. Safari, Chrome) convert </br> to <br>
-        else if(name === "br" && !this._options.xmlMode){
-                this._processOpenTag(name + "/");
-        }
+	if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){
+		var pos = this._stack.lastIndexOf(name);
+		if(pos !== -1)
+			if(this._cbs.onclosetag){
+				pos = this._stack.length - pos;
+				while(pos--) this._cbs.onclosetag(this._stack.pop());
+			}
+			else this._stack.splice(pos);
+	}
+	//many browsers (eg. Safari, Chrome) convert </br> to <br>
+	else if(name === "br" && !this._options.xmlMode){
+		this._processOpenTag(name + "/");
+	}
 };
 
 Parser.prototype._parseAttributes = function(data, lcNames){
@@ -342,50 +342,50 @@ var parseAttributes = function(data, lcNames){
 };
 
 Parser.prototype._processOpenTag = function(data){
-        var name = this._parseTagName(data),
-                attributes = parseAttributes(data, this._options.lowerCaseAttributeNames),
-                type = ElementType.Tag;
-        
-        if(this._options.xmlMode){ /*do nothing*/ }
-        else if(name === "script") type = ElementType.Script;
-        else if(name === "style")  type = ElementType.Style;
-        
-        if (name in openImpliesClose) {
-                var el;
-                while ((el = this._stack[this._stack.length]) in openImpliesClose[name]) {
-                        this._stack.pop();
-                        this._processCloseTag(el);
-                }
-        }
-        
-        if(this._cbs.onopentagname) this._cbs.onopentagname(name);
-        if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
-        if(this._cbs.onattribute){
-                this._parseAttributes(data, this._options.lowerCaseAttributeNames);
-        }
-        
-        //If tag self-terminates, add an explicit, separate closing tag
-        /* http://dev.w3.org/html5/html-author/#tags
-         * In XHTML, self-closing tags are valid but attribute values must be quoted.
-         * In HTML, self-closing tags must be either void elements or foreign elements.
-         * Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
-         * Foreign elements use XML rules
-         */
-        if(!Object.keys(attributes).length && data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
-                if(this._cbs.onclosetag) this._cbs.onclosetag(name);
-        } else {
-                if(type !== ElementType.Tag){
-                        this._contentFlags |= SpecialTags[type];
-                        this._wroteSpecial = false;
-                }
-                this._stack.push(name);
-        }
+	var name = this._parseTagName(data),
+		attributes = parseAttributes(data, this._options.lowerCaseAttributeNames),
+		type = ElementType.Tag;
+	
+	if(this._options.xmlMode){ /*do nothing*/ }
+	else if(name === "script") type = ElementType.Script;
+	else if(name === "style")  type = ElementType.Style;
+	
+	if (name in openImpliesClose) {
+		var el;
+		while ((el = this._stack[this._stack.length]) in openImpliesClose[name]) {
+			this._stack.pop();
+			this._processCloseTag(el);
+		}
+	}
+	
+	if(this._cbs.onopentagname) this._cbs.onopentagname(name);
+	if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
+	if(this._cbs.onattribute){
+		this._parseAttributes(data, this._options.lowerCaseAttributeNames);
+	}
+	
+	//If tag self-terminates, add an explicit, separate closing tag
+	/* http://dev.w3.org/html5/html-author/#tags
+	 * In XHTML, self-closing tags are valid but attribute values must be quoted.
+	 * In HTML, self-closing tags must be either void elements or foreign elements.
+	 * Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
+	 * Foreign elements use XML rules
+	 */
+	if(!Object.keys(attributes).length && data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
+		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
+	} else {
+		if(type !== ElementType.Tag){
+			this._contentFlags |= SpecialTags[type];
+			this._wroteSpecial = false;
+		}
+		this._stack.push(name);
+	}
 };
 
 Parser.prototype._handleError = function(error){
-        error = new Error(error);
-        if(this._cbs.onerror) this._cbs.onerror(error);
-        else throw error;
+	error = new Error(error);
+	if(this._cbs.onerror) this._cbs.onerror(error);
+	else throw error;
 };
 
 module.exports = Parser;
\ No newline at end of file

From a126b18cf8c3596e5c271eea3d4f0aa295c09bd6 Mon Sep 17 00:00:00 2001
From: Kris Reeves <krisreeves@searchfanatics.com>
Date: Mon, 11 Feb 2013 13:06:29 -0500
Subject: [PATCH 245/450] Derp.

---
 lib/Parser.js | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 370ca71..3ed0d71 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -349,15 +349,12 @@ Parser.prototype._processOpenTag = function(data){
 	if(this._options.xmlMode){ /*do nothing*/ }
 	else if(name === "script") type = ElementType.Script;
 	else if(name === "style")  type = ElementType.Style;
-	
 	if (name in openImpliesClose) {
 		var el;
-		while ((el = this._stack[this._stack.length]) in openImpliesClose[name]) {
-			this._stack.pop();
+		while ((el = this._stack[this._stack.length-1]) in openImpliesClose[name]) {
 			this._processCloseTag(el);
 		}
 	}
-	
 	if(this._cbs.onopentagname) this._cbs.onopentagname(name);
 	if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
 	if(this._cbs.onattribute){

From 5a72c283ec048106bfeb969f862e08b558dbfdfb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 4 Dec 2012 16:26:47 +0100
Subject: [PATCH 246/450] added missing comma in benchmark script

---
 tests/99-benchmark.js | 50 +++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
index ed70b02..e44d76a 100644
--- a/tests/99-benchmark.js
+++ b/tests/99-benchmark.js
@@ -1,26 +1,26 @@
-var multiply = function(text){
-		return Array(5e3+1).join(text);
-	},
-	tests = {
-		self_closing: multiply("<br/>"),
-		tag: multiply("<tag foo=bar foobar> Text </tag>"),
-		comment: multiply("<!-- this is <<a> comment -->"),
-		directive: multiply("<?foo bar?>"),
-		special: multiply("<script> THIS IS <SPECIAL> </script>"),
-		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
-	}
-	empty = function(){},
-	cbs = {};
-
-require("./test-helper.js").EVENTS.forEach(function(name){
-    cbs["on" + name] = empty;
-});
-
-var parser = new (require("../lib/Parser.js"))(cbs),
-	ben = require("ben");
-
-Object.keys(tests).forEach(function(name){
-	console.log("Test", name, "took", ben(150, function(){
-		parser.parseComplete(tests[name]);
-	}));
+var multiply = function(text){
+		return Array(5e3+1).join(text);
+	},
+	tests = {
+		self_closing: multiply("<br/>"),
+		tag: multiply("<tag foo=bar foobar> Text </tag>"),
+		comment: multiply("<!-- this is <<a> comment -->"),
+		directive: multiply("<?foo bar?>"),
+		special: multiply("<script> THIS IS <SPECIAL> </script>"),
+		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
+	},
+	empty = function(){},
+	cbs = {};
+
+require("./test-helper.js").EVENTS.forEach(function(name){
+    cbs["on" + name] = empty;
+});
+
+var parser = new (require("../lib/Parser.js"))(cbs),
+	ben = require("ben");
+
+Object.keys(tests).forEach(function(name){
+	console.log("Test", name, "took", ben(150, function(){
+		parser.parseComplete(tests[name]);
+	}));
 });
\ No newline at end of file

From eca12d8f5562e3c5253da3d48daf0a1335dae109 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 9 Jan 2013 15:37:30 +0100
Subject: [PATCH 247/450] domelementtype must be version 1.x (not 1.0)

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index a7ca4a3..498bb29 100644
--- a/package.json
+++ b/package.json
@@ -23,7 +23,7 @@
 	"dependencies": {
 		"domhandler": "2.0",
 		"domutils": "1.0",
-		"domelementtype": "1.0"
+		"domelementtype": "1"
 	},
 	"licenses": [{
 		"type": "MIT",

From 7f0389f609439e16f6deffa0e4a4514de94cbbc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 9 Jan 2013 15:52:57 +0100
Subject: [PATCH 248/450] 2.5.1

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 498bb29..46e855a 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.5.0",
+	"version": "2.5.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 8df87abaa8f7030f689218ce83353d984d8a648f Mon Sep 17 00:00:00 2001
From: Mike Pennisi <mike@mikepennisi.com>
Date: Thu, 14 Feb 2013 16:19:52 -0500
Subject: [PATCH 249/450] Recognize closing CDATA tags as end of "special"

This allows for correct parsing of text that directly follows CDATA tags
---
 lib/Parser.js                      |  1 +
 tests/Events/05-cdata-special.json | 97 ++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 tests/Events/05-cdata-special.json

diff --git a/lib/Parser.js b/lib/Parser.js
index 201cb8a..841f433 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -225,6 +225,7 @@ Parser.prototype._writeCDATA = function(data){
 		}
 		this._contentFlags ^= SpecialTags[ElementType.CDATA];
 		if(this._cbs.oncdataend) this._cbs.oncdataend();
+		this._wroteSpecial = false;
     }
     else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
 };
diff --git a/tests/Events/05-cdata-special.json b/tests/Events/05-cdata-special.json
new file mode 100644
index 0000000..4b271ef
--- /dev/null
+++ b/tests/Events/05-cdata-special.json
@@ -0,0 +1,97 @@
+{
+  "name": "CDATA",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<script>/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/</script>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "script",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "/*"
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "*/ asdf >"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "asdf>"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "/adsf>"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        ">"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        " fo/*"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "*/"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "script"
+      ]
+    }
+  ]
+}

From d21706b161173ba10b71a18f432eefab92fddee8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 08:58:30 +0100
Subject: [PATCH 250/450] test on node 0.6, 0.8 & 0.9

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 2948edb..84fd7ca 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
 language: node_js
 node_js:
-  - 0.4
   - 0.6
-  - 0.7
+  - 0.8
+  - 0.9

From 4dc73a55679c8a9c7dc8b6ba54a53252ffbf2295 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 08:59:16 +0100
Subject: [PATCH 251/450] FeedHandler should return an error when nothing's
 found

---
 lib/FeedHandler.js | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index e49f6ca..cb86cdb 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -79,7 +79,9 @@ FeedHandler.prototype.onend = function() {
 		}
 	}
 	this.dom = feed;
-	DomHandler.prototype._handleCallback.call(this);
+	DomHandler.prototype._handleCallback.call(
+		this, feedRoot ? null : Error("couldn't find root of feed")
+	);
 };
 
 module.exports = FeedHandler;
\ No newline at end of file

From e976099a7e71a19f12777f7fb58ed8e194421eec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 10:15:57 +0100
Subject: [PATCH 252/450] added missing semicolon in test-helper.js

---
 tests/test-helper.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test-helper.js b/tests/test-helper.js
index 31c83fe..04f3b2b 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -11,7 +11,7 @@ exports.writeToParser = function(handler, options, data){
 	parser.done();
 	//then parse everything
 	parser.parseComplete(data);
-}
+};
 
 var EVENTS = Object.keys(htmlparser.EVENTS);
 

From 36650b8779687c6ba77b78e1695b0f731b852606 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 10:35:01 +0100
Subject: [PATCH 253/450] improved how tests are run

---
 tests/00-runtests.js | 44 ++++++++++++++++++++++----------------------
 tests/02-feed.js     |  2 +-
 tests/03-events.js   |  2 +-
 tests/05-stream.js   |  2 +-
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index c742539..6c6c9b1 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -1,26 +1,33 @@
 var fs = require("fs"),
-	assert = require("assert");
+    path = require("path"),
+    assert = require("assert");
 
 var runCount = 0,
 	testCount = 0;
 
-function runTests(test){
+[
+ "./02-feed.js",
+ "./03-events.js",
+ "./05-stream.js"
+]
+.map(require)
+.forEach(function (test){
+	var dir = path.resolve(__dirname, test.dir);
+
 	//read files, load them, run them
-	fs.readdirSync(__dirname + test.dir
-	).map(function(file){
-		if(file[0] === ".") return false;
-		if(file.substr(-5) === ".json") return JSON.parse(
-			fs.readFileSync(__dirname + test.dir + file)
-		);
-		return require(__dirname + test.dir + file);
-	}).forEach(function(file){
-		if(!file) return;
-		var second = false;
-		
+	var f = fs
+	.readdirSync(dir)
+	.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
+	.map(function(name){
+		return path.resolve(dir, name);
+	})
+	.map(require)
+	.forEach(function(file){
 		runCount++;
 		
 		console.log("Testing:", file.name);
 		
+		var second = false; //every test runs twice
 		test.test(file, function(err, dom){
 			assert.ifError(err);
 			assert.deepEqual(file.expected, dom, "didn't get expected output");
@@ -32,15 +39,8 @@ function runTests(test){
 			else second = true;
 		});
 	});
-	console.log("->", test.dir.slice(1, -1), "started");
-}
-
-//run all tests
-[
- "./02-feed.js",
- "./03-events.js",
- "./05-stream.js"
-].map(require).forEach(runTests);
+	console.log("->", test.dir, "started");
+});
 
 //log the results
 (function check(){
diff --git a/tests/02-feed.js b/tests/02-feed.js
index 308079b..230f925 100644
--- a/tests/02-feed.js
+++ b/tests/02-feed.js
@@ -7,7 +7,7 @@ var helper = require("./test-helper.js"),
 		xmlMode: true
 	};
 
-exports.dir = "/Feeds/";
+exports.dir = "Feeds";
 
 exports.test = function(test, cb){
 	var handler = new FeedHandler(function(err, dom){
diff --git a/tests/03-events.js b/tests/03-events.js
index 3552a7e..01ffe4e 100644
--- a/tests/03-events.js
+++ b/tests/03-events.js
@@ -1,7 +1,7 @@
 var helper = require("./test-helper.js"),
 	sliceArr = Array.prototype.slice;
 
-exports.dir = "/Events/";
+exports.dir = "Events";
 
 exports.test = function(test, cb){
 	var tokens = [], cbs;
diff --git a/tests/05-stream.js b/tests/05-stream.js
index 92f45bb..d3e376c 100644
--- a/tests/05-stream.js
+++ b/tests/05-stream.js
@@ -3,7 +3,7 @@ var helper = require("./test-helper.js"),
 	sliceArr = Array.prototype.slice,
 	fs = require("fs");
 
-exports.dir = "/Stream/";
+exports.dir = "Stream";
 
 exports.test = function(test, cb){
 	var tokens = [],

From 610da2c8515b24ba24742ff4200cf7cd5657c67b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 10:35:45 +0100
Subject: [PATCH 254/450] don't run 03-rdf.js test

it currently fails, requires investigation
---
 tests/Feeds/{03-rdf.js => _03-rdf.js} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/Feeds/{03-rdf.js => _03-rdf.js} (100%)

diff --git a/tests/Feeds/03-rdf.js b/tests/Feeds/_03-rdf.js
similarity index 100%
rename from tests/Feeds/03-rdf.js
rename to tests/Feeds/_03-rdf.js

From 07466909a9c86cddfa6c01eb34ba63394334afbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 10:37:13 +0100
Subject: [PATCH 255/450] renamed tests

---
 tests/00-runtests.js                 | 6 +++---
 tests/{03-events.js => 01-events.js} | 0
 tests/{05-stream.js => 02-stream.js} | 0
 tests/{02-feed.js => 03-feed.js}     | 0
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename tests/{03-events.js => 01-events.js} (100%)
 rename tests/{05-stream.js => 02-stream.js} (100%)
 rename tests/{02-feed.js => 03-feed.js} (100%)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 6c6c9b1..834d913 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -6,9 +6,9 @@ var runCount = 0,
 	testCount = 0;
 
 [
- "./02-feed.js",
- "./03-events.js",
- "./05-stream.js"
+ "./01-events.js",
+ "./02-stream.js",
+ "./03-feed.js"
 ]
 .map(require)
 .forEach(function (test){
diff --git a/tests/03-events.js b/tests/01-events.js
similarity index 100%
rename from tests/03-events.js
rename to tests/01-events.js
diff --git a/tests/05-stream.js b/tests/02-stream.js
similarity index 100%
rename from tests/05-stream.js
rename to tests/02-stream.js
diff --git a/tests/02-feed.js b/tests/03-feed.js
similarity index 100%
rename from tests/02-feed.js
rename to tests/03-feed.js

From d1d9caed4dbd93b167d540f0cb7cc2de135b3315 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 10:48:47 +0100
Subject: [PATCH 256/450] added semicolons & use EE#on in 02-stream.js

---
 tests/02-stream.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/02-stream.js b/tests/02-stream.js
index d3e376c..44fdad7 100644
--- a/tests/02-stream.js
+++ b/tests/02-stream.js
@@ -26,7 +26,7 @@ exports.test = function(test, cb){
 					event: name,
 					data: sliceArr.apply(arguments)
 				});
-			}
+			};
 		}});
 	}
 	else {
@@ -41,12 +41,12 @@ exports.test = function(test, cb){
 			}
 		};
 		helper.EVENTS.forEach(function(name){
-			stream._events[name] = function(){
+			stream.on(name, function(){
 				tokens.push({
 					event: name,
 					data: sliceArr.apply(arguments)
 				});
-			}
+			});
 		});
 	}
 	fs.createReadStream(__dirname + test.file).pipe(stream);

From 7c77a1f213c5d003b4871d882815d04f897b5e55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 10:49:15 +0100
Subject: [PATCH 257/450] changed how the end of all tests is shown

---
 tests/00-runtests.js | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index 834d913..f40ee10 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -3,7 +3,8 @@ var fs = require("fs"),
     assert = require("assert");
 
 var runCount = 0,
-	testCount = 0;
+	testCount = 0,
+	done = false;
 
 [
  "./01-events.js",
@@ -12,6 +13,8 @@ var runCount = 0,
 ]
 .map(require)
 .forEach(function (test){
+	console.log("\nStarting", test.dir, "\n----");
+
 	var dir = path.resolve(__dirname, test.dir);
 
 	//read files, load them, run them
@@ -33,17 +36,14 @@ var runCount = 0,
 			assert.deepEqual(file.expected, dom, "didn't get expected output");
 						
 			if(second){
-				runCount--;
 				testCount++;
+				if(!--runCount && done){
+					console.log("Total tests:", testCount);
+				}
 			}
 			else second = true;
 		});
 	});
-	console.log("->", test.dir, "started");
 });
 
-//log the results
-(function check(){
-	if(runCount !== 0) return process.nextTick(check);
-	console.log("Total tests:", testCount);
-}());
\ No newline at end of file
+var done = true; //started all tests
\ No newline at end of file

From 0494e9086ad9687410e9596aa690a417ea1ee011 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 11:08:20 +0100
Subject: [PATCH 258/450] allow `>` at the beginning of a document

fixes #25

also allows `>`s to be at the beginning of text or after a `>`.
---
 lib/Parser.js                   |  6 +++---
 tests/Events/06-leading-lt.json | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 tests/Events/06-leading-lt.json

diff --git a/lib/Parser.js b/lib/Parser.js
index 841f433..b2be18a 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -4,7 +4,7 @@ function Parser(cbs, options){
 	this._options = options || defaultOpts;
 	this._cbs = cbs || defaultCbs;
 	this._buffer = "";
-	this._tagSep = "";
+	this._tagSep = ">";
 	this._stack = [];
 	this._wroteSpecial = false;
 	this._contentFlags = 0;
@@ -207,9 +207,9 @@ Parser.prototype._parseTags = function(force){
 			if(this._contentFlags !== 0){
 				this._writeSpecial(rawData, ">");
 			}
-			else if(rawData !== "" && this._cbs.ontext){
+			else if(this._cbs.ontext){
 				if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
-				this._cbs.ontext(rawData);
+				if(rawData !== "") this._cbs.ontext(rawData);
 			}
 		}
 	}
diff --git a/tests/Events/06-leading-lt.json b/tests/Events/06-leading-lt.json
new file mode 100644
index 0000000..7c97777
--- /dev/null
+++ b/tests/Events/06-leading-lt.json
@@ -0,0 +1,22 @@
+{
+  "name": "leading lt",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": ">a>",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        ">"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "a>"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From f707bd7ba01e8e93ef2e19cebc39a5529fb3ba3b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 15 Feb 2013 13:52:17 +0100
Subject: [PATCH 259/450] 2.5.2

---
 package.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/package.json b/package.json
index 46e855a..dece555 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.5.1",
+	"version": "2.5.2",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
@@ -29,4 +29,4 @@
 		"type": "MIT",
 		"url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"
 	}]
-}
\ No newline at end of file
+}

From 05a99ef1d76e95f102a912843f2c2c5e21e6b54d Mon Sep 17 00:00:00 2001
From: Kris Reeves <krisreeves@searchfanatics.com>
Date: Fri, 15 Feb 2013 19:49:02 -0500
Subject: [PATCH 260/450] Tests for changes.

---
 tests/Events/07-self-closing.json        | 67 ++++++++++++++++++++++++
 tests/Events/08-implicit-close-tags.json | 59 +++++++++++++++++++++
 2 files changed, 126 insertions(+)
 create mode 100644 tests/Events/07-self-closing.json
 create mode 100644 tests/Events/08-implicit-close-tags.json

diff --git a/tests/Events/07-self-closing.json b/tests/Events/07-self-closing.json
new file mode 100644
index 0000000..ae5c22e
--- /dev/null
+++ b/tests/Events/07-self-closing.json
@@ -0,0 +1,67 @@
+{
+    "name": "Self-closing tags",
+    "options": {
+        "handler": {
+
+            },
+        "parser": {
+
+            }
+    },
+    "html": "<a href=http://test.com/>Foo</a><hr />",
+    "expected": [
+		{
+			"event": "opentagname",
+			"data": [
+				"a"
+			]
+		},
+		{
+			"event": "opentag",
+			"data": [
+				"a",
+				{
+					"href": "http://test.com/"
+				}
+			]
+		},
+		{
+			"event": "attribute",
+			"data": [
+				"href",
+				"http://test.com/"
+			]
+		},
+		{
+			"event": "text",
+			"data": [
+				"Foo"
+			]
+		},
+		{
+			"event": "closetag",
+			"data": [
+				"a"
+			]
+		},
+		{
+			"event": "opentagname",
+			"data": [
+				"hr"
+			]
+		},
+		{
+			"event": "opentag",
+			"data": [
+				"hr",
+				{}
+			]
+		},
+		{
+			"event": "closetag",
+			"data": [
+				"hr"
+			]
+		}
+	]
+}
\ No newline at end of file
diff --git a/tests/Events/08-implicit-close-tags.json b/tests/Events/08-implicit-close-tags.json
new file mode 100644
index 0000000..0086f35
--- /dev/null
+++ b/tests/Events/08-implicit-close-tags.json
@@ -0,0 +1,59 @@
+{
+  "name": "Implicit close tags",
+  "options": {},
+  "html": "<ol><li class=test><div><table style=width:100%><tr><td colspan=2><h3>Heading</h3><tr><td><div>Div</div><td><div>Div2</div></table></div><li><div><h3>Heading 2</h3></div></li></ol>",
+  "expected": [
+    { "event": "opentagname", "data": [ "ol" ] },
+    { "event": "opentag", "data": [ "ol", {} ] },
+    { "event": "opentagname", "data": [ "li" ] },
+    { "event": "opentag", "data": [ "li", { "class": "test" } ] },
+    { "event": "attribute", "data": [ "class", "test" ] },
+    { "event": "opentagname", "data": [ "div" ] },
+    { "event": "opentag", "data": [ "div", {} ] },
+    { "event": "opentagname", "data": [ "table" ] },
+    { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
+    { "event": "attribute", "data": [ "style", "width:100%" ] },
+    { "event": "opentagname", "data": [ "tr" ] },
+    { "event": "opentag", "data": [ "tr", {} ] },
+    { "event": "opentagname", "data": [ "td" ] },
+    { "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
+    { "event": "attribute", "data": [ "colspan", "2" ] },
+    { "event": "opentagname", "data": [ "h3" ] },
+    { "event": "opentag", "data": [ "h3", {} ] },
+    { "event": "text", "data": [ "Heading" ] },
+    { "event": "closetag", "data": [ "h3" ] },
+    { "event": "closetag", "data": [ "td" ] },
+    { "event": "closetag", "data": [ "tr" ] },
+    { "event": "opentagname", "data": [ "tr" ] },
+    { "event": "opentag", "data": [ "tr", {} ] },
+    { "event": "opentagname", "data": [ "td" ] },
+    { "event": "opentag", "data": [ "td", {} ] },
+    { "event": "opentagname", "data": [ "div" ] },
+    { "event": "opentag", "data": [ "div", {} ] },
+    { "event": "text", "data": [ "Div" ] },
+    { "event": "closetag", "data": [ "div" ] },
+    { "event": "closetag", "data": [ "td" ] },
+    { "event": "opentagname", "data": [ "td" ] },
+    { "event": "opentag", "data": [ "td", {} ] },
+    { "event": "opentagname", "data": [ "div" ] },
+    { "event": "opentag", "data": [ "div", {} ] },
+    { "event": "text", "data": [ "Div2" ] },
+    { "event": "closetag", "data": [ "div" ] },
+    { "event": "closetag", "data": [ "td" ] },
+    { "event": "closetag", "data": [ "tr" ] },
+    { "event": "closetag", "data": [ "table" ] },
+    { "event": "closetag", "data": [ "div" ] },
+    { "event": "closetag", "data": [ "li" ] },
+    { "event": "opentagname", "data": [ "li" ] },
+    { "event": "opentag", "data": [ "li", {} ] },
+    { "event": "opentagname", "data": [ "div" ] },
+    { "event": "opentag", "data": [ "div", {} ] },
+    { "event": "opentagname", "data": [ "h3" ] },
+    { "event": "opentag", "data": [ "h3", {} ] },
+    { "event": "text", "data": [ "Heading 2" ] },
+    { "event": "closetag", "data": [ "h3" ] },
+    { "event": "closetag", "data": [ "div" ] },
+    { "event": "closetag", "data": [ "li" ] },
+    { "event": "closetag", "data": [ "ol" ] }
+  ]
+}
\ No newline at end of file

From fe6b8d6318c7e2f5eb6d72a37f52940f10695eaa Mon Sep 17 00:00:00 2001
From: Kris Reeves <krisreeves@searchfanatics.com>
Date: Sat, 16 Feb 2013 16:09:18 -0500
Subject: [PATCH 261/450] Fixes discussed in
 https://github.com/fb55/node-htmlparser/pull/28

---
 lib/Parser.js | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 0cfcdf1..cacc1bb 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -67,22 +67,30 @@ var defaultCbs = {
 	*/
 };
 
-var formTags = {'input':1, 'option':1, 'optgroup':1, 'select':1, 'button':1, 'datalist':1, 'textarea':1};
+var formTags = {
+	input: true,
+	option: true,
+	optgroup: true,
+	select: true,
+	button: true,
+	datalist: true,
+	textarea: true
+};
 var openImpliesClose = {
-	'tr'      : {'tr':1, 'th':1, 'td':1},
-	'th'      : {'th':1},
-	'td'      : {'thead':1, 'td':1},
-	'body'    : {'head':1, 'link':1, 'script':1},
-	'li'      : {'li':1},
-	'p'       : {'p':1},
-	'select'  : formTags,
-	'input'   : formTags,
-	'output'  : formTags,
-	'button'  : formTags,
-	'datalist': formTags,
-	'textarea': formTags,
-	'option'  : {'option':1},
-	'optgroup': {'optgroup':1}
+	tr      : { tr:true, th:true, td:true },
+	th      : { th:true },
+	td      : { thead:true, td:true },
+	body    : { head:true, link:true, script:true },
+	li      : { li:true },
+	p       : { p:true },
+	select  : formTags,
+	input   : formTags,
+	output  : formTags,
+	button  : formTags,
+	datalist: formTags,
+	textarea: formTags,
+	option  : { option:true },
+	optgroup: { optgroup:true }
 };
 
 //Parses a complete HTML and pushes it to the handler
@@ -350,7 +358,7 @@ Parser.prototype._processOpenTag = function(data){
 	if(this._options.xmlMode){ /*do nothing*/ }
 	else if(name === "script") type = ElementType.Script;
 	else if(name === "style")  type = ElementType.Style;
-	if (name in openImpliesClose) {
+	if (!this._options.xmlMode && name in openImpliesClose) {
 		var el;
 		while ((el = this._stack[this._stack.length-1]) in openImpliesClose[name]) {
 			this._processCloseTag(el);
@@ -369,7 +377,7 @@ Parser.prototype._processOpenTag = function(data){
 	 * Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
 	 * Foreign elements use XML rules
 	 */
-	if(!Object.keys(attributes).length && data.substr(-1) === "/" || (name in emptyTags && !this._options.xmlMode)){
+	if((!this._options.xmlMode && name in emptyTags) || (data.substr(-1) === "/" && data.replace(_reAttrib, "").substr(-1) === "/")){
 		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
 	} else {
 		if(type !== ElementType.Tag){
@@ -386,4 +394,4 @@ Parser.prototype._handleError = function(error){
 	else throw error;
 };
 
-module.exports = Parser;
\ No newline at end of file
+module.exports = Parser;

From f1627673a1a0daf475413f97c2875bed3ff65e80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Sat, 16 Feb 2013 23:55:51 +0100
Subject: [PATCH 262/450] Update README.md

The example checked the `language` attribute. Changed it to `type`.
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 2df1a71..9add2fb 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 var htmlparser = require("htmlparser2");
 var parser = new htmlparser.Parser({
 	onopentag: function(name, attribs){
-		if(name === "script" && attribs["language"] === "javascript"){
+		if(name === "script" && attribs.type === "text/javascript"){
 			console.log("JS! Hooray!");
 		}
 	},
@@ -69,4 +69,4 @@ The parser now provides a callback interface close to [sax.js](https://github.co
 
 The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose). 
 
-The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.
\ No newline at end of file
+The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.

From c0bd69c48f432db0279d871e731ac3f0445c6f25 Mon Sep 17 00:00:00 2001
From: Mike Pennisi <mike@mikepennisi.com>
Date: Wed, 20 Feb 2013 20:01:48 -0500
Subject: [PATCH 263/450] Do not parse CDATA-like text inside special tags

Special nodes (e.g. script tags, style tags, comment nodes, etc.) can
contain only text nodes.
---
 lib/Parser.js                      |  2 +-
 tests/Events/05-cdata-special.json | 28 ++++++++++------------------
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index cacc1bb..030d221 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -221,6 +221,7 @@ Parser.prototype._parseTags = function(force){
 				}
 				this._processCloseTag(elementData);
 			}
+			else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
 			else if(elementData.charAt(0) === "!"){
 				if(elementData.substr(1, 7) === "[CDATA["){
 					this._contentFlags |= SpecialTags[ElementType.CDATA];
@@ -241,7 +242,6 @@ Parser.prototype._parseTags = function(force){
 					);
 				}
 			}
-			else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
 			else if(elementData.charAt(0) === "?"){
 				if(this._cbs.onprocessinginstruction){
 					this._cbs.onprocessinginstruction(
diff --git a/tests/Events/05-cdata-special.json b/tests/Events/05-cdata-special.json
index 4b271ef..d8b1bc9 100644
--- a/tests/Events/05-cdata-special.json
+++ b/tests/Events/05-cdata-special.json
@@ -1,5 +1,5 @@
 {
-  "name": "CDATA",
+  "name": "CDATA (inside special)",
   "options": {
     "handler": {},
     "parser": {}
@@ -25,66 +25,58 @@
         "/*"
       ]
     },
-    {
-      "event": "cdatastart",
-      "data": []
-    },
     {
       "event": "text",
       "data": [
-        "*/ asdf >"
+        "<![CDATA[*/ asdf "
       ]
     },
     {
       "event": "text",
       "data": [
-        "<"
+        ">"
       ]
     },
     {
       "event": "text",
       "data": [
-        "asdf>"
+        "<asdf"
       ]
     },
     {
       "event": "text",
       "data": [
-        "<"
+        ">"
       ]
     },
     {
       "event": "text",
       "data": [
-        "/adsf>"
+        "</adsf"
       ]
     },
     {
       "event": "text",
       "data": [
-        "<"
+        ">"
       ]
     },
     {
       "event": "text",
       "data": [
-        ">"
+        "<"
       ]
     },
     {
       "event": "text",
       "data": [
-        " fo/*"
+        "> fo/*]]"
       ]
     },
-    {
-      "event": "cdataend",
-      "data": []
-    },
     {
       "event": "text",
       "data": [
-        "*/"
+        ">*/"
       ]
     },
     {

From 875600190022d845bfd72af8b648bd5768665ac6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 17 Mar 2013 23:14:35 +0100
Subject: [PATCH 264/450] 2.6.0

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index dece555..12a04ed 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.5.2",
+	"version": "2.6.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 5e6fcb3bdc8806c6a36743c14681862da212a952 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 21 Mar 2013 21:26:19 +0100
Subject: [PATCH 265/450] landed first version of FSM based tokenizer

fsm style taken from creationix/jsonparse

support for special tags (<script> & <style>) is missing
---
 lib/Tokenizer.js | 349 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 349 insertions(+)
 create mode 100644 lib/Tokenizer.js

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
new file mode 100644
index 0000000..bb0cd4f
--- /dev/null
+++ b/lib/Tokenizer.js
@@ -0,0 +1,349 @@
+module.exports = Parser;
+
+var WritableStream = require("stream").Writable,
+
+    i = 0,
+
+    TEXT = i++,
+    TAG_START = i++, //after <
+    IN_TAG_NAME = i++,
+    CLOSING_TAG_START = i++,
+    IN_CLOSING_TAG_NAME = i++,
+    AFTER_CLOSING_TAG_NAME = i++,
+
+    //attributes
+    BEFORE_ATTRIBUTE_NAME = i++,
+    IN_ATTRIBUTE_NAME = i++,
+    AFTER_ATTRIBUTE_NAME = i++,
+    BEFORE_ATTRIBUTE_VALUE = i++,
+    IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES = i++, // "
+    IN_ATTRIBUTE_VALUE_SINGLE_QUOTES = i++, // '
+    IN_ATTRIBUTE_VALUE_NO_QUOTES = i++,
+
+    //declarations
+    DECLARATION_START = i++, // !
+    IN_DECLARATION = i++,
+
+    //processing instructions
+    IN_PROCESSING_INSTRUCTION = i++, // ?
+
+    //comments
+    BEFORE_COMMENT = i++,
+    IN_COMMENT = i++,
+    COMMENT_END_1 = i++,
+    COMMENT_END_2 = i++,
+
+    //cdata
+    CDATA_1 = i++, // [
+    CDATA_2 = i++, // C
+    CDATA_3 = i++, // D
+    CDATA_4 = i++, // A
+    CDATA_5 = i++, // T
+    CDATA_6 = i++, // A
+    IN_CDATA = i++,// [
+    CDATA_END_1 = i++, // ]
+    CDATA_END_2 = i++; // ]
+
+    //TODO add logic to handle special tags
+
+function code(c){
+	return c.charCodeAt(0);
+}
+
+function whitespace(c){
+	return c === code(" ") || c === code("\t") || c === code("\r") || c === code("\n");
+}
+
+function Parser(options){
+	this._state = TEXT;
+	this._buffer = null;
+	this._sectionStart = 0;
+	this._index = 0;
+	this._options = options;
+
+	WritableStream.call(this, options);
+}
+
+require("util").inherits(Parser, WritableStream);
+
+Parser.prototype._write = function(chunk, encoding, cb){
+	if(this._buffer === null) this._buffer = chunk;
+	else this._buffer = Buffer.concat([this._buffer, chunk]);
+
+	while(this._index < this._buffer.length){
+		var c = this._buffer[this._index];
+		if(this._state === TEXT){
+			if(c === code("<")){
+				this._emitIfToken("text");
+				this._state = TAG_START;
+			}
+		} else if(this._state === TAG_START){
+			if(c === code("!")){
+				this._state = DECLARATION_START;
+				this._sectionStart = this._index + 1;
+			} else if(c === code("?")){
+				this._state = IN_PROCESSING_INSTRUCTION;
+				this._sectionStart = this._index + 1;
+			} else if(c === code("/")){
+				this._state = CLOSING_TAG_START;
+			} else if(!whitespace(c)){
+				this._state = IN_TAG_NAME;
+				this._sectionStart = this._index;
+			}
+			//TODO handle ">"
+		} else if(this._state === IN_TAG_NAME){
+			if(c === code("/")){
+				this._emitToken("opentagname");
+				this.emit("selfclosingtag");
+				this._state = AFTER_CLOSING_TAG_NAME;
+			} else if(c === code(">")){
+				this._emitToken("opentagname");
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			} else if(whitespace(c)){
+				this._emitToken("opentagname");
+				this._state = BEFORE_ATTRIBUTE_NAME;
+			}
+		} else if(this._state === CLOSING_TAG_START){
+			if(!whitespace(c)){
+				this._state = IN_CLOSING_TAG_NAME;
+				this._sectionStart = this._index;
+			}
+			// TODO handle ">"
+		} else if(this._state === IN_CLOSING_TAG_NAME){
+			if(c === code(">")){
+				this._emitToken("closetag");
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			} else if(whitespace(c)){
+				this._emitToken("closetag");
+				this._state = AFTER_CLOSING_TAG_NAME;
+			}
+		} else if(this._state === AFTER_CLOSING_TAG_NAME){
+			//skip everything until ">"
+			if(c === code(">")){
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			}
+		}
+
+		/*
+		*	attributes
+		*/
+		else if(this._state === BEFORE_ATTRIBUTE_NAME){
+			if(c === code("/")){
+				this.emit("selfclosingtag");
+				this._state = AFTER_CLOSING_TAG_NAME;
+			} else if(c === code(">")){
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			} else if(!whitespace(c)){
+				this._state = IN_ATTRIBUTE_NAME;
+				this._sectionStart = this._index;
+			}
+		} else if(this._state === IN_ATTRIBUTE_NAME){
+			if(c === code("=")){
+				this._emitIfToken("attribname");
+				this._state = BEFORE_ATTRIBUTE_VALUE;
+			} else if(c === code("/")){
+				this._emitIfToken("attribname");
+				this.emit("selfclosingtag");
+				this._state = AFTER_CLOSING_TAG_NAME;
+			} else if(c === code(">")){
+				this._emitIfToken("attribname");
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			} else if(whitespace(c)){
+				this._emitIfToken("attribname");
+				this._state = AFTER_ATTRIBUTE_NAME;
+			}
+		} else if(this._state === AFTER_ATTRIBUTE_NAME){
+			if(c === code("=")){
+				this._state = BEFORE_ATTRIBUTE_VALUE;
+			} else if(c === code("/")){
+				this.emit("selfclosingtag");
+				this._state = AFTER_CLOSING_TAG_NAME;
+			} else if(c === code(">")){
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			} else if(!whitespace(c)){
+				this._state = IN_ATTRIBUTE_NAME;
+				this._sectionStart = this._index;
+			}
+		} else if(this._state === BEFORE_ATTRIBUTE_VALUE){
+			if(c === code("\"")){
+				this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
+				this._sectionStart = this._index + 1;
+			} else if(c === code("'")){
+				this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
+				this._sectionStart = this._index + 1;
+			} else if(!whitespace(c)){
+				this._state = IN_ATTRIBUTE_VALUE_NO_QUOTES;
+				this._sectionStart = this._index;
+			}
+		} else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){
+			if(c === code("\"")){
+				this._emitToken("attribvalue");
+				this._state = BEFORE_ATTRIBUTE_NAME;
+			}
+		} else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){
+			if(c === code("'")){
+				this._emitToken("attribvalue");
+				this._state = BEFORE_ATTRIBUTE_NAME;	
+			}
+		} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
+			if(c === code("/")){
+				this._emitToken("attribvalue");
+				this.emit("selfclosingtag");
+				this._state = AFTER_CLOSING_TAG_NAME;
+			} else if(c === code(">")){
+				this._emitToken("attribvalue");
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			} else if(whitespace(c)){
+				this._emitToken("attribvalue");
+				this._state = BEFORE_ATTRIBUTE_NAME;
+			}
+		}
+
+		/*
+		*	declarations
+		*/
+		else if(this._state === DECLARATION_START){
+			if(c === code("[")) this._state = CDATA_1;
+			else if(c === code("-")) this._state = BEFORE_COMMENT;
+			else this._state = IN_DECLARATION;
+		} else if(this._state === IN_DECLARATION){
+			if(c === code(">")){
+				this._emitToken("declaration");
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			}
+		}
+
+		/*
+		*	processing instructions
+		*/
+		else if(this._state === IN_PROCESSING_INSTRUCTION){
+			if(c === code(">")){
+				this._emitToken("processinginstruction");
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			}
+		}
+
+		/*
+		*	comments
+		*/
+		else if(this._state === BEFORE_COMMENT){
+			if(c === code("-")){
+				this._state = IN_COMMENT;
+				this._sectionStart = this._index + 1;
+			} else {
+				this._state = IN_DECLARATION;
+			}
+		} else if(this._state === IN_COMMENT){
+			if(c === code("-")) this._state = COMMENT_END_1;
+		} else if(this._state === COMMENT_END_1){
+			if(c === code("-")) this._state = COMMENT_END_2;
+			else this._state = IN_COMMENT;
+		} else if(this._state === COMMENT_END_2){
+			if(c === code(">")){
+				//remove 2 trailing chars
+				this.emit("comment", this._buffer.toString("utf8", this._sectionStart, this._index - 2));
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			} else {
+				this._state = IN_COMMENT;
+			}
+		}
+
+		/*
+		*	cdata
+		*/
+		else if(this._state === CDATA_1){
+			if(c === code("C")) this._state = CDATA_2;
+			else this._state = IN_DECLARATION;
+		} else if(this._state === CDATA_2){
+			if(c === code("D")) this._state = CDATA_3;
+			else this._state = IN_DECLARATION;
+		} else if(this._state === CDATA_3){
+			if(c === code("A")) this._state = CDATA_4;
+			else this._state = IN_DECLARATION;
+		} else if(this._state === CDATA_4){
+			if(c === code("T")) this._state = CDATA_5;
+			else this._state = IN_DECLARATION;
+		} else if(this._state === CDATA_5){
+			if(c === code("A")) this._state = CDATA_6;
+			else this._state = IN_DECLARATION;
+		} else if(this._state === CDATA_6){
+			if(c === code("[")){
+				this._state = IN_CDATA;
+				this._sectionStart = this._index + 1;
+			} else {
+				this._state = IN_DECLARATION;
+			}
+		} else if(this._state === IN_CDATA){
+			if(c === code("]")) this._state = CDATA_END_1;
+		} else if(this._state === CDATA_END_1){
+			if(c === code("]")) this._state = CDATA_END_2;
+			else this._state = IN_CDATA;
+		} else if(this._state === CDATA_END_2){
+			if(c === code(">")){
+				//remove 2 trailing chars
+				this.emit("cdata", this._buffer.toString("utf8", this._sectionStart, this._index - 2));
+				this._state = TEXT;
+				this._sectionStart = this._index + 1;
+			} else {
+				this._state = IN_CDATA;
+			}
+		} else {
+			throw Error("unknown state " + this._state);
+		}
+
+		this._index++;
+	}
+
+	//cleanup
+	if(this._sectionStart === -1){
+		this._buffer = null;
+	} else {
+		this._sectionStart = 0;
+
+		if(this._sectionStart === this._index - 1){
+			this._buffer = null;
+		} else {
+			this._buffer = this._buffer.slice(this._sectionStart);
+		}
+	}
+
+	cb();
+};
+
+Parser.prototype._emitToken = function(name){
+	this.emit(name, this._buffer.toString("utf8", this._sectionStart, this._index));
+	this._sectionStart = -1;
+};
+
+Parser.prototype._emitIfToken = function(name){
+	if(this._index > this._sectionStart){
+		this.emit(name, this._buffer.toString("utf8", this._sectionStart, this._index));
+	}
+	this._sectionStart = -1;
+};
+
+/*
+//overwritten for better debuggability
+Parser.prototype.emit = function(){
+	process.stdout.write("[" + this._state + "]\t");
+	console.log.apply(null, [].map.call(arguments, Function.prototype.call, String.prototype.trim));
+	WritableStream.prototype.emit.apply(this, arguments);
+};
+
+Parser.prototype.end = function(){
+	WritableStream.prototype.end.apply(this, arguments);
+
+	if(this._state === TEXT) return;
+	console.log("the game must go on!", this._state);
+};
+*/
\ No newline at end of file

From 7be1360b817666b9943eddeea207bcc19b05fe87 Mon Sep 17 00:00:00 2001
From: eonlepapillon <eonlepappillon@gmail.com>
Date: Wed, 27 Mar 2013 16:53:25 +0100
Subject: [PATCH 266/450] Add a new test for issue #36

Only finds first attribute when there is no whitespace between
attributes.

- Added a html example
- Added a test
---
 tests/Documents/Attributes.html | 16 ++++++++
 tests/Events/09-attributes.json | 68 +++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)
 create mode 100644 tests/Documents/Attributes.html
 create mode 100644 tests/Events/09-attributes.json

diff --git a/tests/Documents/Attributes.html b/tests/Documents/Attributes.html
new file mode 100644
index 0000000..f3bfa09
--- /dev/null
+++ b/tests/Documents/Attributes.html
@@ -0,0 +1,16 @@
+<!doctype html>
+<html>
+<head>
+	<title>Attributes test</title>
+</head>
+<body>
+	<!-- Normal attributes -->
+	<button id="test0" class="value0" title="value1">class="value0" title="value1"</button>
+
+	<!-- Attributes with no quotes or value -->
+	<button id="test1" class=value2 disabled>class=value2 disabled</button>
+
+	<!-- Attributes with no space between them. No valid, but accepted by the browser -->
+	<button id="test2" class="value4"title="value5">class="value4"title="value5"</button>
+</body>
+</html>
\ No newline at end of file
diff --git a/tests/Events/09-attributes.json b/tests/Events/09-attributes.json
new file mode 100644
index 0000000..8bd024b
--- /dev/null
+++ b/tests/Events/09-attributes.json
@@ -0,0 +1,68 @@
+{
+  "name": "attributes (no white space, no value, no quotes)",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<button class=\"test0\"title=\"test1\" disabled value=test2>adsf</button>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "class": "test0",
+          "title": "test1",
+          "disabled": "",
+          "value": "test2"
+        }
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test0"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "title",
+        "test1"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "disabled",
+        ""
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "value",
+        "test2"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "adsf"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "button"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From d90e7a3ee43c8e16c7510ce7b1a6c108c8081302 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 11:13:47 +0100
Subject: [PATCH 267/450] added logic for special tags

---
 lib/Tokenizer.js | 217 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 204 insertions(+), 13 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index bb0cd4f..b3ff573 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -42,9 +42,31 @@ var WritableStream = require("stream").Writable,
     CDATA_6 = i++, // A
     IN_CDATA = i++,// [
     CDATA_END_1 = i++, // ]
-    CDATA_END_2 = i++; // ]
+    CDATA_END_2 = i++, // ]
 
-    //TODO add logic to handle special tags
+    //special tags
+    SPECIAL_START = i++, //S
+    SPECIAL_END = i++,   //S
+
+    SCRIPT_1 = i++, //C
+    SCRIPT_2 = i++, //R
+    SCRIPT_3 = i++, //I
+    SCRIPT_4 = i++, //P
+    SCRIPT_5 = i++, //T
+    SCRIPT_END_1 = i++, //C
+    SCRIPT_END_2 = i++, //R
+    SCRIPT_END_3 = i++, //I
+    SCRIPT_END_4 = i++, //P
+    SCRIPT_END_5 = i++, //T
+
+    STYLE_1 = i++, //T
+    STYLE_2 = i++, //Y
+    STYLE_3 = i++, //L
+    STYLE_4 = i++, //E
+    STYLE_END_1 = i++, //T
+    STYLE_END_2 = i++, //Y
+    STYLE_END_3 = i++, //L
+    STYLE_END_4 = i++; //E
 
 function code(c){
 	return c.charCodeAt(0);
@@ -60,6 +82,7 @@ function Parser(options){
 	this._sectionStart = 0;
 	this._index = 0;
 	this._options = options;
+	this._special = 0; // 1 for script, 2 for style
 
 	WritableStream.call(this, options);
 }
@@ -76,21 +99,38 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			if(c === code("<")){
 				this._emitIfToken("text");
 				this._state = TAG_START;
+				if(this._special > 0){
+					//save the position
+					this._sectionStart = this._index;
+				}
 			}
 		} else if(this._state === TAG_START){
-			if(c === code("!")){
-				this._state = DECLARATION_START;
-				this._sectionStart = this._index + 1;
-			} else if(c === code("?")){
-				this._state = IN_PROCESSING_INSTRUCTION;
-				this._sectionStart = this._index + 1;
-			} else if(c === code("/")){
+			if(c === code("/")){
 				this._state = CLOSING_TAG_START;
-			} else if(!whitespace(c)){
+			} else if(this._special === 0) {
+				if(whitespace(c));
+				else if(c === code("!")){
+					this._state = DECLARATION_START;
+					this._sectionStart = this._index + 1;
+				} else if(c === code("?")){
+					this._state = IN_PROCESSING_INSTRUCTION;
+					this._sectionStart = this._index + 1;
+				} else if(
+					(!this._options || !this._options.xmlMode) &&
+					(c === code("s") || c === code("S"))
+				){
+					this._state = SPECIAL_START;
+					this._sectionStart = this._index;
+				} else {
+					this._state = IN_TAG_NAME;
+					this._sectionStart = this._index;
+				}
+			} else {
 				this._state = IN_TAG_NAME;
 				this._sectionStart = this._index;
 			}
 			//TODO handle ">"
+			//TODO remove redundant else
 		} else if(this._state === IN_TAG_NAME){
 			if(c === code("/")){
 				this._emitToken("opentagname");
@@ -105,7 +145,12 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = BEFORE_ATTRIBUTE_NAME;
 			}
 		} else if(this._state === CLOSING_TAG_START){
-			if(!whitespace(c)){
+			if(whitespace(c));
+			else if(this._special > 0){
+				if(c === code("s") || c === code("S")){
+					this._state = SPECIAL_END;
+				}
+			} else {
 				this._state = IN_CLOSING_TAG_NAME;
 				this._sectionStart = this._index;
 			}
@@ -297,8 +342,154 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			} else {
 				this._state = IN_CDATA;
 			}
-		} else {
-			throw Error("unknown state " + this._state);
+		}
+
+		/*
+		* special tags
+		*/
+		else if(this._state === SPECIAL_START){
+			if(c === code("c") || c === code("C")){
+				this._state = SCRIPT_1;
+			} else if(c === code("t") || c === code("T")){
+				this._state = STYLE_1;
+			} else {
+				this._state = IN_TAG_NAME;
+				this._index--; //consume the token again
+			}
+		} else if(this._state === SPECIAL_END){
+			if(this._special === 1 && (c === code("c") || c === code("C"))){
+				this._state = SCRIPT_END_1;
+			} else if(this._special === 2 && (c === code("t") || c === code("T"))){
+				this._state = STYLE_END_1;
+			} 
+			else this._state = TEXT;
+		}
+
+		/*
+		* script
+		*/
+		else if(this._state === SCRIPT_1){
+			if(c === code("r") || c === code("R")){
+				this._state = SCRIPT_2;
+			} else {
+				this._state = IN_TAG_NAME;
+				this._index--; //consume the token again
+			}
+		} else if(this._state === SCRIPT_2){
+			if(c === code("i") || c === code("I")){
+				this._state = SCRIPT_3;
+			} else {
+				this._state = IN_TAG_NAME;
+				this._index--; //consume the token again
+			}
+		} else if(this._state === SCRIPT_3){
+			if(c === code("p") || c === code("P")){
+				this._state = SCRIPT_4;
+			} else {
+				this._state = IN_TAG_NAME;
+				this._index--; //consume the token again
+			}
+		} else if(this._state === SCRIPT_4){
+			if(c === code("t") || c === code("T")){
+				this._state = SCRIPT_5;
+			} else {
+				this._state = IN_TAG_NAME;
+				this._index--; //consume the token again
+			}
+		} else if(this._state === SCRIPT_5){
+			if(c === code("/") || c === code(">") || whitespace(c)){
+				this._special = 1;
+			}
+			this._state = IN_TAG_NAME;
+			this._index--; //consume the token again
+		}
+
+		else if(this._state === SCRIPT_END_1){
+			if(c === code("r") || c === code("R")){
+				this._state = SCRIPT_END_2;
+			} 
+			else this._state = TEXT;
+		} else if(this._state === SCRIPT_END_2){
+			if(c === code("i") || c === code("I")){
+				this._state = SCRIPT_END_3;
+			} 
+			else this._state = TEXT;
+		} else if(this._state === SCRIPT_END_3){
+			if(c === code("p") || c === code("P")){
+				this._state = SCRIPT_END_4;
+			} 
+			else this._state = TEXT;
+		} else if(this._state === SCRIPT_END_4){
+			if(c === code("t") || c === code("T")){
+				this._state = SCRIPT_END_5;
+			} 
+			else this._state = TEXT;
+		} else if(this._state === SCRIPT_END_5){
+			if(c === code(">") || whitespace(c)){
+				this._state = IN_CLOSING_TAG_NAME;
+				this._index--; //reconsume the token
+			} 
+			else this._state = TEXT;
+		}
+
+		/*
+		* style
+		*/
+		else if(this._state === STYLE_1){
+			if(c === code("y") || c === code("Y")){
+				this._state = STYLE_2;
+			} else {
+				this._state = IN_TAG_NAME;
+				this._index--; //consume the token again
+			}
+		} else if(this._state === STYLE_2){
+			if(c === code("l") || c === code("L")){
+				this._state = STYLE_3;
+			} else {
+				this._state = IN_TAG_NAME;
+				this._index--; //consume the token again
+			}
+		} else if(this._state === STYLE_3){
+			if(c === code("e") || c === code("E")){
+				this._state = STYLE_4;
+			} else {
+				this._state = IN_TAG_NAME;
+				this._index--; //consume the token again
+			}
+		} else if(this._state === STYLE_4){
+			if(c === code("/") || c === code(">") || whitespace(c)){
+				this._special = 2;
+			}
+			this._state = IN_TAG_NAME;
+			this._index--; //consume the token again
+		}
+
+		else if(this._state === STYLE_END_1){
+			if(c === code("y") || c === code("Y")){
+				this._state = STYLE_END_2;
+			} 
+			else this._state = TEXT;
+		} else if(this._state === STYLE_END_2){
+			if(c === code("l") || c === code("L")){
+				this._state = STYLE_END_3;
+			} 
+			else this._state = TEXT;
+		} else if(this._state === STYLE_END_3){
+			if(c === code("e") || c === code("E")){
+				this._state = STYLE_END_4;
+			} 
+			else this._state = TEXT;
+		} else if(this._state === STYLE_END_4){
+			if(c === code(">") || whitespace(c)){
+				this._state = IN_CLOSING_TAG_NAME;
+				this._index--; //reconsume the token
+			} 
+			else this._state = TEXT;
+		}
+
+
+		else {
+			this.emit("error", Error("unknown state"), this._state);
 		}
 
 		this._index++;

From aa19a0b33cbeb31efc201a695871a65de3138c40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 11:16:54 +0100
Subject: [PATCH 268/450] [tokenizer] don't fail on `< >` and `< / >`

they are now emitted as text
---
 lib/Tokenizer.js | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index b3ff573..6784363 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -99,13 +99,12 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			if(c === code("<")){
 				this._emitIfToken("text");
 				this._state = TAG_START;
-				if(this._special > 0){
-					//save the position
-					this._sectionStart = this._index;
-				}
+				this._sectionStart = this._index;
 			}
 		} else if(this._state === TAG_START){
-			if(c === code("/")){
+			if(c === code(">")){
+				this._state = TEXT;
+			} else if(c === code("/")){
 				this._state = CLOSING_TAG_START;
 			} else if(this._special === 0) {
 				if(whitespace(c));
@@ -129,7 +128,6 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = IN_TAG_NAME;
 				this._sectionStart = this._index;
 			}
-			//TODO handle ">"
 			//TODO remove redundant else
 		} else if(this._state === IN_TAG_NAME){
 			if(c === code("/")){
@@ -146,7 +144,9 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			}
 		} else if(this._state === CLOSING_TAG_START){
 			if(whitespace(c));
-			else if(this._special > 0){
+			else if(c === code(">")){
+				this._state = TEXT;
+			} else if(this._special > 0){
 				if(c === code("s") || c === code("S")){
 					this._state = SPECIAL_END;
 				}
@@ -154,7 +154,6 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = IN_CLOSING_TAG_NAME;
 				this._sectionStart = this._index;
 			}
-			// TODO handle ">"
 		} else if(this._state === IN_CLOSING_TAG_NAME){
 			if(c === code(">")){
 				this._emitToken("closetag");

From 1bc6568a21ae360e06dde1d46853f93a55d3ebe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 11:34:25 +0100
Subject: [PATCH 269/450] [tokenizer] fixed ordering in cleanup

---
 lib/Tokenizer.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 6784363..094dca9 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -498,13 +498,13 @@ Parser.prototype._write = function(chunk, encoding, cb){
 	if(this._sectionStart === -1){
 		this._buffer = null;
 	} else {
-		this._sectionStart = 0;
-
 		if(this._sectionStart === this._index - 1){
 			this._buffer = null;
 		} else {
 			this._buffer = this._buffer.slice(this._sectionStart);
 		}
+
+		this._sectionStart = 0;
 	}
 
 	cb();

From 400bf432bc7607d5674c7e53149885863db2aa7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 11:54:08 +0100
Subject: [PATCH 270/450] [tokenizer] overwrite WritableStream#end, emit
 everything that's left

---
 lib/Tokenizer.js | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 094dca9..e2925fd 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -510,6 +510,23 @@ Parser.prototype._write = function(chunk, encoding, cb){
 	cb();
 };
 
+Parser.prototype.end = function(){
+	WritableStream.prototype.end.apply(this, arguments);
+
+	if(this._buffer === null || this._sectionStart === -1 || this._sectionStart === this._index - 1){
+		return;
+	}
+
+	//do something with the remaining data
+	if(this._state === IN_CDATA || this._state === CDATA_END_1 || this._state === CDATA_END_2){
+		this._emitIfToken("cdata");
+	} else if(this._state === IN_COMMENT || this._state === COMMENT_END_1 || this._state === COMMENT_END_2){
+		this._emitIfToken("comment");
+	} else {
+		this._emitIfToken("text");
+	}
+};
+
 Parser.prototype._emitToken = function(name){
 	this.emit(name, this._buffer.toString("utf8", this._sectionStart, this._index));
 	this._sectionStart = -1;
@@ -529,11 +546,4 @@ Parser.prototype.emit = function(){
 	console.log.apply(null, [].map.call(arguments, Function.prototype.call, String.prototype.trim));
 	WritableStream.prototype.emit.apply(this, arguments);
 };
-
-Parser.prototype.end = function(){
-	WritableStream.prototype.end.apply(this, arguments);
-
-	if(this._state === TEXT) return;
-	console.log("the game must go on!", this._state);
-};
 */
\ No newline at end of file

From 550b42e3805032c9a742dff10bb7a8bc776cd910 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 11:54:36 +0100
Subject: [PATCH 271/450] [tokenizer] take care of this._index in cleanup, emit
 all text

---
 lib/Tokenizer.js | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index e2925fd..93996c5 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -497,14 +497,25 @@ Parser.prototype._write = function(chunk, encoding, cb){
 	//cleanup
 	if(this._sectionStart === -1){
 		this._buffer = null;
+		this._index = 0;
 	} else {
-		if(this._sectionStart === this._index - 1){
+		if(this._state === TEXT){
+			this._emitIfToken("text");
 			this._buffer = null;
+			this._index = 0;
 		} else {
-			this._buffer = this._buffer.slice(this._sectionStart);
-		}
+			if(this._sectionStart === this._index - 1){
+				//the section just started
+				this._buffer = null;
+				this._index = 0;
+			} else {
+				//remove everything unnecessary
+				this._buffer = this._buffer.slice(this._sectionStart);
+				this._index -= this._sectionStart;
+			}
 
-		this._sectionStart = 0;
+			this._sectionStart = 0;
+		}
 	}
 
 	cb();

From dabe1653fdac98de43cd7d82a735726c6e9bec72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 11:56:51 +0100
Subject: [PATCH 272/450] [tokenizer] set _sectionStart to 0 when text was
 emitted

---
 lib/Tokenizer.js | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 93996c5..cd0d27f 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -503,19 +503,17 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			this._emitIfToken("text");
 			this._buffer = null;
 			this._index = 0;
+		} else if(this._sectionStart === this._index - 1){
+			//the section just started
+			this._buffer = null;
+			this._index = 0;
 		} else {
-			if(this._sectionStart === this._index - 1){
-				//the section just started
-				this._buffer = null;
-				this._index = 0;
-			} else {
-				//remove everything unnecessary
-				this._buffer = this._buffer.slice(this._sectionStart);
-				this._index -= this._sectionStart;
-			}
-
-			this._sectionStart = 0;
+			//remove everything unnecessary
+			this._buffer = this._buffer.slice(this._sectionStart);
+			this._index -= this._sectionStart;
 		}
+
+		this._sectionStart = 0;
 	}
 
 	cb();

From b9d568abd30e75517081df32be04e2fe413f9241 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 11:59:40 +0100
Subject: [PATCH 273/450] [tokenizer] call WritableStream#end after emitting
 the remaining data

---
 lib/Tokenizer.js | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index cd0d27f..2ae7e70 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -519,8 +519,8 @@ Parser.prototype._write = function(chunk, encoding, cb){
 	cb();
 };
 
-Parser.prototype.end = function(){
-	WritableStream.prototype.end.apply(this, arguments);
+Parser.prototype.end = function(chunk){
+	if(chunk) this._write(chunk, null, function(){});
 
 	if(this._buffer === null || this._sectionStart === -1 || this._sectionStart === this._index - 1){
 		return;
@@ -534,6 +534,8 @@ Parser.prototype.end = function(){
 	} else {
 		this._emitIfToken("text");
 	}
+
+	WritableStream.prototype.end.apply(this, arguments);
 };
 
 Parser.prototype._emitToken = function(name){

From 1144e42ecc0c6fda035fcd414c5064e44282b6d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 12:45:49 +0100
Subject: [PATCH 274/450] [tokenizer] call .write instead of ._write

---
 lib/Tokenizer.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 2ae7e70..a4a6e89 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -520,7 +520,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 };
 
 Parser.prototype.end = function(chunk){
-	if(chunk) this._write(chunk, null, function(){});
+	if(chunk) this.write(chunk);
 
 	if(this._buffer === null || this._sectionStart === -1 || this._sectionStart === this._index - 1){
 		return;

From c3d4025f139a23bd6598ecc018649f14f031c8d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 13:11:55 +0100
Subject: [PATCH 275/450] [parser] use the tokenizer

---
 lib/Parser.js | 425 +++++++++++++-------------------------------------
 1 file changed, 111 insertions(+), 314 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 030d221..21dd763 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,43 +1,5 @@
-var ElementType = require("./ElementType.js");
-
-function Parser(cbs, options){
-	this._options = options || defaultOpts;
-	this._cbs = cbs || defaultCbs;
-	this._buffer = "";
-	this._tagSep = ">";
-	this._stack = [];
-	this._wroteSpecial = false;
-	this._contentFlags = 0;
-	this._done = false;
-	this._running = true; //false if paused
-}
-
-//Regular expressions used for cleaning up and parsing (stateless)
-
-/* http://dev.w3.org/html5/html-author/#attributes
- * - Whitespace is permitted after the tag name, but it is not permitted before the tag name.
- * - Attribute names must consist of one or more characters other than the space characters,
- *   control characters, NULL, one of the characters: double quote ("), single quote ('),
- *   greater-than sign (>), solidus (/), equals sign (=), nor any characters that are not defined by Unicode.
- * - An empty attribute is one where the value has been omitted. (<input disabled>...</input>
- * - An unquoted attribute value must not contain any literal space characters, any of the characters:
- *   double quote ("), apostrophe ('), equals sign (=), less-than sign (<), greater-than sign (>),
- *   or grave accent (`), and the value must not be the empty string.
- * - There may be space characters between the attribute name and the equals sign (=),
- *   and between that and the attribute value.
- * - Double-quoted attributes must not contain any double-quote characters or ambiguous ampersands.
- * - Single-quoted attributes must not contain any single-quote characters or ambiguous ampersands.
- */
-// element name:	(<[^<& ]+)
-// attribute name:	( [^"'=>\/]+)
-// attribute value:	(\s*=\s*(?:
-//						"([^"]*)"|
-//						'([^']*)'|
-//						[^\s"'=<>`]+)
-// tag end: (?=\s|\/|$)
-
-var _reAttrib = /\s+([^"'=>\/\s]+)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'=<>`]+))|(?=\s)|\/|$)/g,
-    _reTail = /\s|\/|$/;
+var Tokenizer = require("./Tokenizer.js"),
+    WritableStream = require("stream").Writable;
 
 var defaultOpts = {
 	xmlMode: false, //Special behavior for script/style tags by default
@@ -60,7 +22,6 @@ var defaultCbs = {
 		oncommentend,
 		onerror,
 		onopentag,
-		onopentagend,
 		onprocessinginstruction,
 		onreset,
 		ontext
@@ -93,214 +54,6 @@ var openImpliesClose = {
 	optgroup: { optgroup:true }
 };
 
-//Parses a complete HTML and pushes it to the handler
-Parser.prototype.parseComplete = function(data){
-	this.reset();
-	this.end(data);
-};
-
-//Parses a piece of an HTML document
-Parser.prototype.parseChunk =
-Parser.prototype.write = function(data){
-	if(this._done) this._handleError("Attempted to parse chunk after parsing already done");
-	this._buffer += data; //FIXME: this can be a bottleneck
-	if(this._running) this._parseTags();
-};
-
-//Tells the parser that the HTML being parsed is complete
-Parser.prototype.done =
-Parser.prototype.end = function(chunk){
-	if(this._done) return;
-
-	if(chunk) this.write(chunk);
-	this._done = true;
-	
-	if(this._running) this._finishParsing();
-};
-
-Parser.prototype._finishParsing = function(){
-	//Parse the buffer to its end
-	if(this._buffer) this._parseTags(true);
-	
-	if(this._cbs.onclosetag){
-		while(this._stack.length) this._cbs.onclosetag(this._stack.pop());
-	}
-	
-	if(this._cbs.onend) this._cbs.onend();
-};
-
-Parser.prototype.pause = function(){
-	if(!this._done) this._running = false;
-};
-
-Parser.prototype.resume = function(){
-	if(this._running) return;
-	this._running = true;
-	this._parseTags();
-	if(this._done) this._finishParsing();
-};
-
-//Resets the parser to a blank state, ready to parse a new HTML document
-Parser.prototype.reset = function(){
-	Parser.call(this, this._cbs, this._options);
-	if(this._cbs.onreset) this._cbs.onreset();
-};
-
-//Extracts the base tag name from the data value of an element
-Parser.prototype._parseTagName = function(data){
-	var match = data.substr(0, data.search(_reTail));
-	if(!this._options.lowerCaseTags) return match;
-	return match.toLowerCase();
-};
-
-//Special tags that are treated differently
-var SpecialTags = {};
-//SpecialTags[ElementType.Tag]   = 0x0;
-SpecialTags[ElementType.Style]   = 0x1; //2^0
-SpecialTags[ElementType.Script]  = 0x2; //2^1
-SpecialTags[ElementType.Comment] = 0x4; //2^2
-SpecialTags[ElementType.CDATA]   = 0x8; //2^3
-
-var TagValues = {
-	style: 1,
-	script: 2
-};
-
-//Parses through HTML text and returns an array of found elements
-Parser.prototype._parseTags = function(force){
-	var current = 0,
-	    opening = this._buffer.indexOf("<"),
-	    closing = this._buffer.indexOf(">"),
-	    next, rawData, elementData, lastTagSep;
-
-	//if force is true, parse everything
-	if(force) opening = Infinity;
-
-	//opening !== closing is just false if both are -1
-	while(opening !== closing && this._running){
-		lastTagSep = this._tagSep;
-		
-		if((opening !== -1 && opening < closing) || closing === -1){
-			next = opening;
-			this._tagSep = "<";
-			opening = this._buffer.indexOf("<", next + 1);
-		}
-		else{
-			next = closing;
-			this._tagSep = ">";
-			closing = this._buffer.indexOf(">", next + 1);
-		}
-		rawData = this._buffer.substring(current, next); //The next chunk of data to parse
-		
-		//set elements for next run
-		current = next + 1;
-		
-		if(this._contentFlags >= SpecialTags[ElementType.CDATA]){
-			// We're inside a CDATA section
-			this._writeCDATA(rawData);
-
-		}
-		else if(this._contentFlags >= SpecialTags[ElementType.Comment]){
-			//We're in a comment tag
-			this._writeComment(rawData);
-		}
-		else if(lastTagSep === "<"){
-			elementData = rawData.trimLeft();
-			if(elementData.charAt(0) === "/"){
-				//elementData = elementData.substr(1).trim();
-				elementData = this._parseTagName(elementData.substr(1));
-				if(this._contentFlags !== 0){
-					//if it's a closing tag, remove the flag
-					if(this._contentFlags & TagValues[elementData]){
-						//remove the flag
-						this._contentFlags ^= TagValues[elementData];
-					} else {
-						this._writeSpecial(rawData, lastTagSep);
-						continue;
-					}
-				}
-				this._processCloseTag(elementData);
-			}
-			else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
-			else if(elementData.charAt(0) === "!"){
-				if(elementData.substr(1, 7) === "[CDATA["){
-					this._contentFlags |= SpecialTags[ElementType.CDATA];
-					if(this._cbs.oncdatastart) this._cbs.oncdatastart();
-					this._writeCDATA(elementData.substr(8));
-				}
-				else if(this._contentFlags !== 0) this._writeSpecial(rawData, lastTagSep);
-				else if(elementData.substr(1, 2) === "--"){
-					//This tag is a comment
-					this._contentFlags |= SpecialTags[ElementType.Comment];
-					this._writeComment(rawData.substr(3));
-				}
-				//TODO: This isn't a processing instruction, needs a new name
-				else if(this._cbs.onprocessinginstruction){
-					this._cbs.onprocessinginstruction(
-						"!" + this._parseTagName(elementData.substr(1)),
-						elementData
-					);
-				}
-			}
-			else if(elementData.charAt(0) === "?"){
-				if(this._cbs.onprocessinginstruction){
-					this._cbs.onprocessinginstruction(
-						"?" + this._parseTagName(elementData.substr(1)),
-						elementData
-					);
-				}
-			}
-			else this._processOpenTag(elementData);
-		}
-		else{
-			if(this._contentFlags !== 0){
-				this._writeSpecial(rawData, ">");
-			}
-			else if(this._cbs.ontext){
-				if(this._tagSep === ">") rawData += ">"; //it's the second > in a row
-				if(rawData !== "") this._cbs.ontext(rawData);
-			}
-		}
-	}
-
-	this._buffer = this._buffer.substr(current);
-};
-
-Parser.prototype._writeCDATA = function(data){
-	if(this._tagSep === ">" && data.substr(-2) === "]]"){
-		// CDATA ends
-		if(data.length !== 2 && this._cbs.ontext){
-			this._cbs.ontext(data.slice(0,-2));
-		}
-		this._contentFlags ^= SpecialTags[ElementType.CDATA];
-		if(this._cbs.oncdataend) this._cbs.oncdataend();
-		this._wroteSpecial = false;
-    }
-    else if(this._cbs.ontext) this._cbs.ontext(data + this._tagSep);
-};
-
-Parser.prototype._writeComment = function(rawData){
-	if(this._tagSep === ">" && rawData.substr(-2) === "--"){ //comment ends
-		//remove the written flag (also removes the comment flag)
-		this._contentFlags ^= SpecialTags[ElementType.Comment];
-		this._wroteSpecial = false;
-		if(this._cbs.oncomment) this._cbs.oncomment(rawData.slice(0, -2));
-		if(this._cbs.oncommentend) this._cbs.oncommentend();
-	}
-	else if(this._cbs.oncomment) this._cbs.oncomment(rawData + this._tagSep);
-};
-
-Parser.prototype._writeSpecial = function(rawData, lastTagSep){
-	//if the previous element is text, append the last tag sep to element
-	if(this._wroteSpecial){
-		if(this._cbs.ontext) this._cbs.ontext(lastTagSep + rawData);
-	}
-	else{ //The previous element was not text
-		this._wroteSpecial = true;
-		if(rawData !== "" && this._cbs.ontext) this._cbs.ontext(rawData);
-	}
-};
-
 var emptyTags = {
 	__proto__: null,
 	area: true,
@@ -319,79 +72,123 @@ var emptyTags = {
 	embed: true
 };
 
-Parser.prototype._processCloseTag = function(name){
-	if(this._stack && (!(name in emptyTags) || this._options.xmlMode)){
-		var pos = this._stack.lastIndexOf(name);
-		if(pos !== -1)
-			if(this._cbs.onclosetag){
-				pos = this._stack.length - pos;
-				while(pos--) this._cbs.onclosetag(this._stack.pop());
-			}
-			else this._stack.splice(pos);
-	}
-	//many browsers (eg. Safari, Chrome) convert </br> to <br>
-	else if(name === "br" && !this._options.xmlMode){
-		this._processOpenTag(name + "/");
+function Parser(cbs, options){
+	if(!options) options = defaultOpts;
+	if(!cbs) cbs = defaultCbs;
+	this._options = options;
+	this._cbs = cbs;
+	this._tokenizer = new Tokenizer(options);
+
+	WritableStream.call(this, options);
+
+	var that = this,
+	    tagname = "",
+	    attribname = "",
+	    attribs = null,
+	    stack = [];
+
+	function closeTag(name){
+		if(options.lowerCaseTags) name = name.toLowerCase();
+		if(stack && (!(name in emptyTags) || options.xmlMode)){
+			var pos = stack.lastIndexOf(name);
+			if(pos !== -1)
+				if(cbs.onclosetag){
+					pos = stack.length - pos;
+					while(pos--) cbs.onclosetag(stack.pop());
+				}
+				else stack.splice(pos);
+		}
 	}
-};
 
-Parser.prototype._parseAttributes = function(data, lcNames){
-	for(var match; match = _reAttrib.exec(data);){
-		this._cbs.onattribute(lcNames ? match[1].toLowerCase() : match[1], match[2] || match[3] || match[4] || "");
+	function attribValue(value){
+		if(cbs.onattribute) cbs.onattribute(attribname, value);
+		if(attribs) attribs[attribname] = value;
+		attribname = "";
 	}
-};
 
-//parses the attribute string
-var parseAttributes = function(data, lcNames){
-	var attrs = {};
-	for(var match; match = _reAttrib.exec(data);){
-		attrs[lcNames ? match[1].toLowerCase() : match[1]] = match[2] || match[3] || match[4] || "";
-	}
-	return attrs;
+	this._tokenizer
+		.on("text", function(data){
+			if(tagname !== ""){
+				if(attribname !== "") attribValue("");
+				if(attribs){
+					if(cbs.onopentag) cbs.onopentag(tagname, attribs);
+					attribs = null;
+				}
+				attribname = "";
+			}
+			if(cbs.ontext) cbs.ontext(data);
+		})
+		.on("opentagname", function(name){
+			if(options.lowerCaseTags) name = name.toLowerCase();
+			tagname = name;
+
+			if (!options.xmlMode && name in openImpliesClose) {
+				for(
+					var el;
+					(el = stack[stack.length-1]) in openImpliesClose[name];
+					closeTag(el)
+				);
+			}
+			if(cbs.onopentagname) cbs.onopentagname(name);
+			if(cbs.onopentag) attribs = {};
+		})
+		.on("closetag", closeTag)
+		.on("selfclosingtag", function(){
+			closeTag(tagname);
+		})
+		.on("attribname", function(name){
+			if(attribname !== "") attribValue("");
+			if(options.lowerCaseAttributeNames) name = name.toLowerCase;
+			attribname = name;
+		})
+		.on("attribvalue", attribValue)
+		.on("declaration", function(value){
+			if(cbs.onprocessinginstruction){
+				cbs.onprocessinginstruction("!" + value.split(/\s|\//, 1)[0], "!" + value);
+			}
+		})
+		.on("processinginstruction", function(value){
+			if(cbs.onprocessinginstruction){
+				cbs.onprocessinginstruction("?" + value.split(/\s|\//, 1)[0], "?" + value);
+			}
+		})
+		.on("comment", function(value){
+			if(cbs.oncomment) cbs.oncomment(value);
+			if(cbs.oncommentend) cbs.oncommentend();
+		})
+		.on("cdata", function(value){
+			if(cbs.oncdatastart) cbs.oncdatastart();
+			if(cbs.ontext) cbs.ontext(value);
+			if(cbs.oncdataend) cbs.oncdataend();
+		})
+		.on("error", function(err){
+			if(cbs.onerror) cbs.onerror(err);
+			else that.emit("error", err);
+		})
+		;
+}
+
+require("util").inherits(Parser, WritableStream);
+
+//Resets the parser to a blank state, ready to parse a new HTML document
+Parser.prototype.reset = function(){
+	this._tokenizer.removeAllListeners();
+	Parser.call(this, this._cbs, this._options);
+	if(this._cbs.onreset) this._cbs.onreset();
 };
 
-Parser.prototype._processOpenTag = function(data){
-	var name = this._parseTagName(data),
-		attributes = parseAttributes(data, this._options.lowerCaseAttributeNames),
-		type = ElementType.Tag;
-	
-	if(this._options.xmlMode){ /*do nothing*/ }
-	else if(name === "script") type = ElementType.Script;
-	else if(name === "style")  type = ElementType.Style;
-	if (!this._options.xmlMode && name in openImpliesClose) {
-		var el;
-		while ((el = this._stack[this._stack.length-1]) in openImpliesClose[name]) {
-			this._processCloseTag(el);
-		}
-	}
-	if(this._cbs.onopentagname) this._cbs.onopentagname(name);
-	if(this._cbs.onopentag) this._cbs.onopentag(name, attributes);
-	if(this._cbs.onattribute){
-		this._parseAttributes(data, this._options.lowerCaseAttributeNames);
-	}
-	
-	//If tag self-terminates, add an explicit, separate closing tag
-	/* http://dev.w3.org/html5/html-author/#tags
-	 * In XHTML, self-closing tags are valid but attribute values must be quoted.
-	 * In HTML, self-closing tags must be either void elements or foreign elements.
-	 * Invalid HTML self-closing tag syntax is ignored (treated as an opening tag).
-	 * Foreign elements use XML rules
-	 */
-	if((!this._options.xmlMode && name in emptyTags) || (data.substr(-1) === "/" && data.replace(_reAttrib, "").substr(-1) === "/")){
-		if(this._cbs.onclosetag) this._cbs.onclosetag(name);
-	} else {
-		if(type !== ElementType.Tag){
-			this._contentFlags |= SpecialTags[type];
-			this._wroteSpecial = false;
-		}
-		this._stack.push(name);
-	}
+//Parses a complete HTML and pushes it to the handler
+Parser.prototype.parseComplete = function(data){
+	this.reset();
+	this.end(data);
 };
 
-Parser.prototype._handleError = function(error){
-	error = new Error(error);
-	if(this._cbs.onerror) this._cbs.onerror(error);
-	else throw error;
+Parser.prototype._write = function(chunk, encoding, cb){
+	this._tokenizer.write(chunk, cb);
 };
 
+//alias for backwards compat
+Parser.prototype.parseChunk = Parser.prototype.write;
+Parser.prototype.done = Parser.prototype.end;
+
 module.exports = Parser;

From 627a38b1a5517a67d2ef62ad4484c72ad22a497c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 13:12:43 +0100
Subject: [PATCH 276/450] removed WritableStream.js and ElementType.js

both aren't needed anymore
---
 lib/ElementType.js    | 10 ----------
 lib/Stream.js         |  6 +++---
 lib/WritableStream.js | 19 -------------------
 lib/index.js          |  4 ++--
 4 files changed, 5 insertions(+), 34 deletions(-)
 delete mode 100644 lib/ElementType.js
 delete mode 100644 lib/WritableStream.js

diff --git a/lib/ElementType.js b/lib/ElementType.js
deleted file mode 100644
index e4d7598..0000000
--- a/lib/ElementType.js
+++ /dev/null
@@ -1,10 +0,0 @@
-//Types of elements found in the DOM
-module.exports = {
-	Text: 0, //Text
-	Directive: 1, //<? ... ?>
-	Comment: 2, //<!-- ... -->
-	Script: 3, //<script> tags
-	Style: 4, //<style> tags
-	Tag: 5, //Any tag
-	CDATA: 6 //<![CDATA[ ... ]]>
-};
\ No newline at end of file
diff --git a/lib/Stream.js b/lib/Stream.js
index ee0fdba..fe9e970 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -1,10 +1,10 @@
-var WritableStream = require("./WritableStream.js");
+var Parser = require("./Parser.js");
 
 var Stream = function(options){
-	WritableStream.call(this, new cbs(this), options);
+	Parser.call(this, new cbs(this), options);
 };
 
-require("util").inherits(Stream, WritableStream);
+require("util").inherits(Stream, Parser);
 
 Stream.prototype.readable = true;
 
diff --git a/lib/WritableStream.js b/lib/WritableStream.js
deleted file mode 100644
index b9e5c3e..0000000
--- a/lib/WritableStream.js
+++ /dev/null
@@ -1,19 +0,0 @@
-var Parser = require("./Parser.js");
-
-var WritableStream = function(cbs, options){
-	Parser.call(this, cbs, options);
-};
-
-require("util").inherits(WritableStream, require("stream").Stream);
-
-//util.inherits would overwrite the prototype when called twice,
-//so we need a different approach
-Object.getOwnPropertyNames(Parser.prototype).forEach(function(name){
-	WritableStream.prototype[name] = Parser.prototype[name];
-});
-
-WritableStream.prototype.writable = true;
-
-// TODO improve support for Parser#pause and Parser#continue
-
-module.exports = WritableStream;
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index e3e3378..953c213 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -22,7 +22,7 @@ module.exports = {
 		return this.Stream;
 	},
 	get WritableStream(){
-		defineProp(this, "WritableStream", {value:require("./WritableStream.js")});
+		defineProp(this, "WritableStream", {value:this.Parser});
 		return this.WritableStream;
 	},
 	get ProxyHandler(){
@@ -57,4 +57,4 @@ module.exports = {
 		error: 1,
 		end: 0
 	}
-}
\ No newline at end of file
+};
\ No newline at end of file

From 358944e1d8934f0f08b8814437bc7682484c406a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 13:41:15 +0100
Subject: [PATCH 277/450] [parser] made Parser#reset work again

absolutely aweful.
---
 lib/Parser.js | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 21dd763..0d8acd4 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -173,8 +173,12 @@ require("util").inherits(Parser, WritableStream);
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
 	this._tokenizer.removeAllListeners();
-	Parser.call(this, this._cbs, this._options);
 	if(this._cbs.onreset) this._cbs.onreset();
+	//TODO there needs to be a better way
+	var p = new Parser(this._cbs, this._options);
+	for(var k in p){
+		if(typeof p[k] === "function") this[k] = p[k].bind(p);
+	}
 };
 
 //Parses a complete HTML and pushes it to the handler

From 5c155cac13ffebcc5e7803da36ad350554493b82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 13:48:50 +0100
Subject: [PATCH 278/450] fall back to the readable-stream module

---
 lib/Parser.js    | 2 +-
 lib/Tokenizer.js | 2 +-
 package.json     | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 0d8acd4..d4518e7 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,5 +1,5 @@
 var Tokenizer = require("./Tokenizer.js"),
-    WritableStream = require("stream").Writable;
+    WritableStream = require("stream").Writable || require("readable-stream").Writable;
 
 var defaultOpts = {
 	xmlMode: false, //Special behavior for script/style tags by default
diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index a4a6e89..7581e8f 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -1,6 +1,6 @@
 module.exports = Parser;
 
-var WritableStream = require("stream").Writable,
+var WritableStream = require("stream").Writable || require("readable-stream").Writable,
 
     i = 0,
 
diff --git a/package.json b/package.json
index 12a04ed..e5fa84b 100644
--- a/package.json
+++ b/package.json
@@ -23,7 +23,8 @@
 	"dependencies": {
 		"domhandler": "2.0",
 		"domutils": "1.0",
-		"domelementtype": "1"
+		"domelementtype": "1",
+		"readable-stream": "1.0"
 	},
 	"licenses": [{
 		"type": "MIT",

From 5a285479026099a278518530758ec90dd21b1814 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 13:49:28 +0100
Subject: [PATCH 279/450] [travis] removed 0.6 & 0.9, added 0.10 and 0.11

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 84fd7ca..d63ba09 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
 language: node_js
 node_js:
-  - 0.6
   - 0.8
-  - 0.9
+  - 0.10
+  - 0.11

From c44537555ff561d10290fe3126a15ea84819a9d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 14:59:27 +0100
Subject: [PATCH 280/450] minor changes

---
 lib/Parser.js      |  2 +-
 lib/Stream.js      |  6 +++---
 tests/02-stream.js | 14 ++++++++------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index d4518e7..77607a0 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -181,7 +181,7 @@ Parser.prototype.reset = function(){
 	}
 };
 
-//Parses a complete HTML and pushes it to the handler
+//Parses a complete HTML document and pushes it to the handler
 Parser.prototype.parseComplete = function(data){
 	this.reset();
 	this.end(data);
diff --git a/lib/Stream.js b/lib/Stream.js
index fe9e970..c567dbb 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -1,3 +1,5 @@
+module.exports = Stream;
+
 var Parser = require("./Parser.js");
 
 var Stream = function(options){
@@ -30,6 +32,4 @@ Object.keys(EVENTS).forEach(function(name){
 	} else {
 		throw Error("wrong number of arguments!");
 	}
-});
-
-module.exports = Stream;
\ No newline at end of file
+});
\ No newline at end of file
diff --git a/tests/02-stream.js b/tests/02-stream.js
index 44fdad7..9e3f67d 100644
--- a/tests/02-stream.js
+++ b/tests/02-stream.js
@@ -21,12 +21,14 @@ exports.test = function(test, cb){
 					}
 				};
 			}
-			if(helper.EVENTS.indexOf(name) !== -1) return function(){
-				tokens.push({
-					event: name,
-					data: sliceArr.apply(arguments)
-				});
-			};
+			if(helper.EVENTS.indexOf(name) !== -1){
+				return function(){
+					tokens.push({
+						event: name,
+						data: sliceArr.call(arguments, 0)
+					});
+				};
+			}
 		}});
 	}
 	else {

From 1ab593a33a954e2778c010ff23a910067c712a99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 15:00:18 +0100
Subject: [PATCH 281/450] [index.js] removed redundant code

---
 lib/index.js | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/lib/index.js b/lib/index.js
index 953c213..5feeec1 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -1,46 +1,40 @@
-var defineProp = Object.defineProperty;
+function defineProp(name, value){
+	delete module.exports[name];
+	module.exports[name] = value;
+	return value;
+}
 
 module.exports = {
 	get Parser(){
-		defineProp(this, "Parser", {value:require("./Parser.js")});
-		return this.Parser;
+		return defineProp("Parser", require("./Parser.js"));
 	},
 	get DomHandler(){
-		defineProp(this, "DomHandler", {value:require("domhandler")});
-		return this.DomHandler;
+		return defineProp("DomHandler", require("domhandler"));
 	},
 	get FeedHandler(){
-		defineProp(this, "FeedHandler", {value:require("./FeedHandler.js")});
-		return this.FeedHandler;
+		return defineProp("FeedHandler", require("./FeedHandler.js"));
 	},
 	get ElementType(){
-		defineProp(this, "ElementType", {value:require("domelementtype")});
-		return this.ElementType;
+		return defineProp("ElementType", require("domelementtype"));
 	},
 	get Stream(){
-		defineProp(this, "Stream", {value:require("./Stream.js")});
-		return this.Stream;
+		return defineProp("Stream", require("./Stream.js"));
 	},
 	get WritableStream(){
-		defineProp(this, "WritableStream", {value:this.Parser});
-		return this.WritableStream;
+		return defineProp("WritableStream", this.Parser);
 	},
 	get ProxyHandler(){
-		defineProp(this, "ProxyHandler", {value:require("./ProxyHandler.js")});
-		return this.ProxyHandler;
+		return defineProp("ProxyHandler", require("./ProxyHandler.js"));
 	},
 	get DomUtils(){
-		defineProp(this, "DomUtils", {value:require("domutils")});
-		return this.DomUtils;
+		return defineProp("DomUtils", require("domutils"));
 	},
 	// For legacy support
 	get DefaultHandler(){
-		defineProp(this, "DefaultHandler", {value: this.DomHandler});
-		return this.DefaultHandler;
+		return defineProp("DefaultHandler", this.DomHandler);
 	},
 	get RssHandler(){
-		defineProp(this, "RssHandler", {value: this.FeedHandler});
-		return this.FeedHandler;
+		return defineProp("RssHandler", this.FeedHandler);
 	},
 	// List of all events that the parser emits
 	EVENTS: { /* Format: eventname: number of arguments */

From f78d1ed9921a49e0324e749c5daa8495cc391bc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 15:21:21 +0100
Subject: [PATCH 282/450] [stream] use a named function

fixes export
---
 lib/Stream.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Stream.js b/lib/Stream.js
index c567dbb..b9bad6e 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -2,9 +2,9 @@ module.exports = Stream;
 
 var Parser = require("./Parser.js");
 
-var Stream = function(options){
+function Stream(options){
 	Parser.call(this, new cbs(this), options);
-};
+}
 
 require("util").inherits(Stream, Parser);
 

From 1b6a264b77bd9a22cdb24fb220d2d81d3c556106 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 15:24:56 +0100
Subject: [PATCH 283/450] 3.0.0

also updated domutils version & specified main-field
---
 package.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/package.json b/package.json
index e5fa84b..8520c8e 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "2.6.0",
+	"version": "3.0.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
@@ -16,13 +16,13 @@
 	"directories": {
 		"lib": "lib/"
 	},
-	"main": "./lib/",
+	"main": "lib/index.js",
 	"scripts": {
 		"test": "node tests/00-runtests.js"
 	},
 	"dependencies": {
 		"domhandler": "2.0",
-		"domutils": "1.0",
+		"domutils": "1.1",
 		"domelementtype": "1",
 		"readable-stream": "1.0"
 	},

From b48adc2f92a07c58729a66143d74efc69f4ac212 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 16:02:16 +0100
Subject: [PATCH 284/450] [tokenizer] always call WritableStream#end

---
 lib/Tokenizer.js | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 7581e8f..1a035f8 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -522,12 +522,9 @@ Parser.prototype._write = function(chunk, encoding, cb){
 Parser.prototype.end = function(chunk){
 	if(chunk) this.write(chunk);
 
-	if(this._buffer === null || this._sectionStart === -1 || this._sectionStart === this._index - 1){
-		return;
-	}
-
-	//do something with the remaining data
-	if(this._state === IN_CDATA || this._state === CDATA_END_1 || this._state === CDATA_END_2){
+	//if there is remaining data, emit it in a reasonable way
+	if(this._buffer === null || this._sectionStart === -1 || this._sectionStart === this._index - 1);
+	else if(this._state === IN_CDATA || this._state === CDATA_END_1 || this._state === CDATA_END_2){
 		this._emitIfToken("cdata");
 	} else if(this._state === IN_COMMENT || this._state === COMMENT_END_1 || this._state === COMMENT_END_2){
 		this._emitIfToken("comment");

From 17b7ebe2ca49eeba04db34ca47cc796e65de5da9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 16:04:54 +0100
Subject: [PATCH 285/450] [parser] call Tokenizer#end, clear the stack

---
 lib/Parser.js | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/lib/Parser.js b/lib/Parser.js
index 77607a0..7ca334d 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -165,7 +165,21 @@ function Parser(cbs, options){
 			if(cbs.onerror) cbs.onerror(err);
 			else that.emit("error", err);
 		})
+		.once("finish", function(){
+			if(cbs.onclosetag){
+				for(
+					var i = stack.length;
+					i > 0;
+					cbs.onclosetag(stack[--i])
+				);
+			}
+			if(cbs.onend) cbs.onend();
+		})
 		;
+
+	this.once("finish", function(){
+		that._tokenizer.end();
+	});
 }
 
 require("util").inherits(Parser, WritableStream);

From 654c4d48b1170fd799df37d4b0e3e287f342b121 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 16:05:24 +0100
Subject: [PATCH 286/450] [index.js] added `createDomStream()` convenience
 method

---
 lib/index.js | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lib/index.js b/lib/index.js
index 5feeec1..356075a 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -36,6 +36,10 @@ module.exports = {
 	get RssHandler(){
 		return defineProp("RssHandler", this.FeedHandler);
 	},
+	createDomStream: function(cb, options, elementCb){
+		var handler = new module.exports.DomHandler(cb, options, elementCb);
+		return new module.exports.Parser(handler, options);
+	},
 	// List of all events that the parser emits
 	EVENTS: { /* Format: eventname: number of arguments */
 		attribute: 2,

From 628b99eb160aff0a77b952666d0fdc5ca41df0a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 16:13:11 +0100
Subject: [PATCH 287/450] [tokenizer] added `opentagend` event

---
 lib/Tokenizer.js | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 1a035f8..1970b07 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -136,6 +136,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
 				this._emitToken("opentagname");
+				this.emit("opentagend");
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
@@ -180,6 +181,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
 				this._state = TEXT;
+				this.emit("opentagend");
 				this._sectionStart = this._index + 1;
 			} else if(!whitespace(c)){
 				this._state = IN_ATTRIBUTE_NAME;
@@ -196,6 +198,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			} else if(c === code(">")){
 				this._emitIfToken("attribname");
 				this._state = TEXT;
+				this.emit("opentagend");
 				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
 				this._emitIfToken("attribname");
@@ -209,6 +212,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
 				this._state = TEXT;
+				this.emit("opentagend");
 				this._sectionStart = this._index + 1;
 			} else if(!whitespace(c)){
 				this._state = IN_ATTRIBUTE_NAME;
@@ -243,6 +247,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			} else if(c === code(">")){
 				this._emitToken("attribvalue");
 				this._state = TEXT;
+				this.emit("opentagend");
 				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
 				this._emitToken("attribvalue");

From f70f545bac1e5d88ce9acd8ecb31af32b41884a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 16:13:50 +0100
Subject: [PATCH 288/450] [parser] use `opentagend` event

---
 lib/Parser.js | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 7ca334d..bd2f45c 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -108,14 +108,6 @@ function Parser(cbs, options){
 
 	this._tokenizer
 		.on("text", function(data){
-			if(tagname !== ""){
-				if(attribname !== "") attribValue("");
-				if(attribs){
-					if(cbs.onopentag) cbs.onopentag(tagname, attribs);
-					attribs = null;
-				}
-				attribname = "";
-			}
 			if(cbs.ontext) cbs.ontext(data);
 		})
 		.on("opentagname", function(name){
@@ -132,6 +124,16 @@ function Parser(cbs, options){
 			if(cbs.onopentagname) cbs.onopentagname(name);
 			if(cbs.onopentag) attribs = {};
 		})
+		.on("opentagend", function(){
+			if(tagname !== ""){
+				if(attribname !== "") attribValue("");
+				if(attribs){
+					if(cbs.onopentag) cbs.onopentag(tagname, attribs);
+					attribs = null;
+				}
+				tagname = "";
+			}
+		})
 		.on("closetag", closeTag)
 		.on("selfclosingtag", function(){
 			closeTag(tagname);

From b7cc1aa571e5eacefdfa79f8e3cb09c1a68924f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 16:14:18 +0100
Subject: [PATCH 289/450] 3.0.1

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 8520c8e..4e2e766 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "3.0.0",
+	"version": "3.0.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From acc0d0507514d9759bf3134dc48c45d4a2376657 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 17:15:01 +0100
Subject: [PATCH 290/450] [tokenizer] emit opentagend on selfclosing tags,
 fixed handling of < in special tags

---
 lib/Tokenizer.js | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 1970b07..406baa6 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -102,11 +102,11 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._sectionStart = this._index;
 			}
 		} else if(this._state === TAG_START){
-			if(c === code(">")){
-				this._state = TEXT;
-			} else if(c === code("/")){
+			if(c === code("/")){
 				this._state = CLOSING_TAG_START;
-			} else if(this._special === 0) {
+			} else if(c === code(">") || this._special > 0) {
+				this._state = TEXT;
+			} else {
 				if(whitespace(c));
 				else if(c === code("!")){
 					this._state = DECLARATION_START;
@@ -124,14 +124,11 @@ Parser.prototype._write = function(chunk, encoding, cb){
 					this._state = IN_TAG_NAME;
 					this._sectionStart = this._index;
 				}
-			} else {
-				this._state = IN_TAG_NAME;
-				this._sectionStart = this._index;
 			}
-			//TODO remove redundant else
 		} else if(this._state === IN_TAG_NAME){
 			if(c === code("/")){
 				this._emitToken("opentagname");
+				this.emit("opentagend");
 				this.emit("selfclosingtag");
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
@@ -177,6 +174,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 		*/
 		else if(this._state === BEFORE_ATTRIBUTE_NAME){
 			if(c === code("/")){
+				this.emit("opentagend");
 				this.emit("selfclosingtag");
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
@@ -193,6 +191,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = BEFORE_ATTRIBUTE_VALUE;
 			} else if(c === code("/")){
 				this._emitIfToken("attribname");
+				this.emit("opentagend");
 				this.emit("selfclosingtag");
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
@@ -208,6 +207,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			if(c === code("=")){
 				this._state = BEFORE_ATTRIBUTE_VALUE;
 			} else if(c === code("/")){
+				this.emit("opentagend");
 				this.emit("selfclosingtag");
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
@@ -242,6 +242,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 		} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
 			if(c === code("/")){
 				this._emitToken("attribvalue");
+				this.emit("opentagend");
 				this.emit("selfclosingtag");
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){

From 94e794f7b6fbcf9468ac3b7233ef56d02e7d4936 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 17:34:52 +0100
Subject: [PATCH 291/450] [index.js] added tokenizer

---
 lib/index.js | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/index.js b/lib/index.js
index 356075a..8fff14d 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -14,6 +14,9 @@ module.exports = {
 	get FeedHandler(){
 		return defineProp("FeedHandler", require("./FeedHandler.js"));
 	},
+	get Tokenizer(){
+		return defineProp("Tokenizer", require("./Tokenizer.js"));
+	},
 	get ElementType(){
 		return defineProp("ElementType", require("domelementtype"));
 	},

From 979359308888fd65a78f9b1c95dab374004957b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 30 Mar 2013 17:35:56 +0100
Subject: [PATCH 292/450] [tests] text events now contain more data

---
 tests/Events/04-cdata.json      | 44 +--------------------------------
 tests/Events/06-leading-lt.json |  8 +-----
 2 files changed, 2 insertions(+), 50 deletions(-)

diff --git a/tests/Events/04-cdata.json b/tests/Events/04-cdata.json
index 1ea9f47..cf350ff 100644
--- a/tests/Events/04-cdata.json
+++ b/tests/Events/04-cdata.json
@@ -26,49 +26,7 @@
     {
       "event": "text",
       "data": [
-        " asdf >"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "<"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "asdf>"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "<"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "/adsf>"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "<"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        ">"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        " fo"
+        " asdf ><asdf></adsf><> fo"
       ]
     },
     {
diff --git a/tests/Events/06-leading-lt.json b/tests/Events/06-leading-lt.json
index 7c97777..fcec852 100644
--- a/tests/Events/06-leading-lt.json
+++ b/tests/Events/06-leading-lt.json
@@ -9,13 +9,7 @@
     {
       "event": "text",
       "data": [
-        ">"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "a>"
+        ">a>"
       ]
     }
   ]

From ab8b65317ce048e104065fe78a3a58574960a8a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 18:20:02 +0200
Subject: [PATCH 293/450] [tokenizer] don't inherit from stream.Writable, fixed
 several bugs

---
 lib/Tokenizer.js | 96 ++++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 47 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 406baa6..d599c78 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -1,8 +1,6 @@
 module.exports = Parser;
 
-var WritableStream = require("stream").Writable || require("readable-stream").Writable,
-
-    i = 0,
+var i = 0,
 
     TEXT = i++,
     TAG_START = i++, //after <
@@ -76,24 +74,31 @@ function whitespace(c){
 	return c === code(" ") || c === code("\t") || c === code("\r") || c === code("\n");
 }
 
-function Parser(options){
+function Parser(options, cbs){
 	this._state = TEXT;
 	this._buffer = null;
 	this._sectionStart = 0;
 	this._index = 0;
 	this._options = options;
 	this._special = 0; // 1 for script, 2 for style
-
-	WritableStream.call(this, options);
+	this._cbs = cbs;
+	this._running = true;
 }
 
-require("util").inherits(Parser, WritableStream);
+Parser.prototype.write = function(chunk){
+	if(typeof chunk === "string") chunk = new Buffer(chunk);
+	else if(!Buffer.isBuffer(chunk)) throw new Error("chunks must be buffers or string!");
 
-Parser.prototype._write = function(chunk, encoding, cb){
 	if(this._buffer === null) this._buffer = chunk;
 	else this._buffer = Buffer.concat([this._buffer, chunk]);
 
-	while(this._index < this._buffer.length){
+	this._processData();
+};
+
+//TODO make events conditional
+
+Parser.prototype._processData = function(){
+	while(this._index < this._buffer.length && this._running){
 		var c = this._buffer[this._index];
 		if(this._state === TEXT){
 			if(c === code("<")){
@@ -128,12 +133,12 @@ Parser.prototype._write = function(chunk, encoding, cb){
 		} else if(this._state === IN_TAG_NAME){
 			if(c === code("/")){
 				this._emitToken("opentagname");
-				this.emit("opentagend");
-				this.emit("selfclosingtag");
+				this._cbs.onopentagend();
+				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
 				this._emitToken("opentagname");
-				this.emit("opentagend");
+				this._cbs.onopentagend();
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
@@ -174,12 +179,12 @@ Parser.prototype._write = function(chunk, encoding, cb){
 		*/
 		else if(this._state === BEFORE_ATTRIBUTE_NAME){
 			if(c === code("/")){
-				this.emit("opentagend");
-				this.emit("selfclosingtag");
+				this._cbs.onopentagend();
+				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
 				this._state = TEXT;
-				this.emit("opentagend");
+				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
 			} else if(!whitespace(c)){
 				this._state = IN_ATTRIBUTE_NAME;
@@ -191,13 +196,13 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = BEFORE_ATTRIBUTE_VALUE;
 			} else if(c === code("/")){
 				this._emitIfToken("attribname");
-				this.emit("opentagend");
-				this.emit("selfclosingtag");
+				this._cbs.onopentagend();
+				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
 				this._emitIfToken("attribname");
 				this._state = TEXT;
-				this.emit("opentagend");
+				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
 				this._emitIfToken("attribname");
@@ -207,12 +212,12 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			if(c === code("=")){
 				this._state = BEFORE_ATTRIBUTE_VALUE;
 			} else if(c === code("/")){
-				this.emit("opentagend");
-				this.emit("selfclosingtag");
+				this._cbs.onopentagend();
+				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === code(">")){
 				this._state = TEXT;
-				this.emit("opentagend");
+				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
 			} else if(!whitespace(c)){
 				this._state = IN_ATTRIBUTE_NAME;
@@ -240,15 +245,10 @@ Parser.prototype._write = function(chunk, encoding, cb){
 				this._state = BEFORE_ATTRIBUTE_NAME;	
 			}
 		} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
-			if(c === code("/")){
-				this._emitToken("attribvalue");
-				this.emit("opentagend");
-				this.emit("selfclosingtag");
-				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === code(">")){
+			if(c === code(">")){
 				this._emitToken("attribvalue");
 				this._state = TEXT;
-				this.emit("opentagend");
+				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
 				this._emitToken("attribvalue");
@@ -300,7 +300,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 		} else if(this._state === COMMENT_END_2){
 			if(c === code(">")){
 				//remove 2 trailing chars
-				this.emit("comment", this._buffer.toString("utf8", this._sectionStart, this._index - 2));
+				this._cbs.oncomment(this._buffer.toString("utf8", this._sectionStart, this._index - 2));
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			} else {
@@ -341,7 +341,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 		} else if(this._state === CDATA_END_2){
 			if(c === code(">")){
 				//remove 2 trailing chars
-				this.emit("cdata", this._buffer.toString("utf8", this._sectionStart, this._index - 2));
+				this._cbs.oncdata(this._buffer.toString("utf8", this._sectionStart, this._index - 2));
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			} else {
@@ -432,6 +432,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 		} else if(this._state === SCRIPT_END_5){
 			if(c === code(">") || whitespace(c)){
 				this._state = IN_CLOSING_TAG_NAME;
+				this._sectionStart = this._index - 6;
 				this._index--; //reconsume the token
 			} 
 			else this._state = TEXT;
@@ -487,6 +488,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 		} else if(this._state === STYLE_END_4){
 			if(c === code(">") || whitespace(c)){
 				this._state = IN_CLOSING_TAG_NAME;
+				this._sectionStart = this._index - 5;
 				this._index--; //reconsume the token
 			} 
 			else this._state = TEXT;
@@ -494,7 +496,7 @@ Parser.prototype._write = function(chunk, encoding, cb){
 
 
 		else {
-			this.emit("error", Error("unknown state"), this._state);
+			this._cbs.onerror(Error("unknown state"), this._state);
 		}
 
 		this._index++;
@@ -509,11 +511,11 @@ Parser.prototype._write = function(chunk, encoding, cb){
 			this._emitIfToken("text");
 			this._buffer = null;
 			this._index = 0;
-		} else if(this._sectionStart === this._index - 1){
+		} else if(this._sectionStart === this._index){
 			//the section just started
 			this._buffer = null;
 			this._index = 0;
-		} else {
+		} else if(this._sectionStart > 0){
 			//remove everything unnecessary
 			this._buffer = this._buffer.slice(this._sectionStart);
 			this._index -= this._sectionStart;
@@ -521,8 +523,13 @@ Parser.prototype._write = function(chunk, encoding, cb){
 
 		this._sectionStart = 0;
 	}
+};
 
-	cb();
+Parser.prototype.pause = function(){
+	this._running = false;
+};
+Parser.prototype.resume = function(){
+	this._running = true;
 };
 
 Parser.prototype.end = function(chunk){
@@ -534,30 +541,25 @@ Parser.prototype.end = function(chunk){
 		this._emitIfToken("cdata");
 	} else if(this._state === IN_COMMENT || this._state === COMMENT_END_1 || this._state === COMMENT_END_2){
 		this._emitIfToken("comment");
+	} else if(this._state === IN_TAG_NAME){
+		this._emitIfToken("opentagname");
+	} else if(this._state === IN_CLOSING_TAG_NAME){
+		this._emitIfToken("closetag");
 	} else {
 		this._emitIfToken("text");
 	}
 
-	WritableStream.prototype.end.apply(this, arguments);
+	this._cbs.onend();
 };
 
 Parser.prototype._emitToken = function(name){
-	this.emit(name, this._buffer.toString("utf8", this._sectionStart, this._index));
+	this._cbs["on" + name](this._buffer.toString("utf8", this._sectionStart, this._index));
 	this._sectionStart = -1;
 };
 
 Parser.prototype._emitIfToken = function(name){
 	if(this._index > this._sectionStart){
-		this.emit(name, this._buffer.toString("utf8", this._sectionStart, this._index));
+		this._cbs["on" + name](this._buffer.toString("utf8", this._sectionStart, this._index));
 	}
 	this._sectionStart = -1;
-};
-
-/*
-//overwritten for better debuggability
-Parser.prototype.emit = function(){
-	process.stdout.write("[" + this._state + "]\t");
-	console.log.apply(null, [].map.call(arguments, Function.prototype.call, String.prototype.trim));
-	WritableStream.prototype.emit.apply(this, arguments);
-};
-*/
\ No newline at end of file
+};
\ No newline at end of file

From 09b88334fe2b84561c4484739318980bc1a556ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 18:20:35 +0200
Subject: [PATCH 294/450] [tests/events] concat text events

---
 tests/01-events.js | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/01-events.js b/tests/01-events.js
index 01ffe4e..73204be 100644
--- a/tests/01-events.js
+++ b/tests/01-events.js
@@ -10,7 +10,7 @@ exports.test = function(test, cb){
 			if(name === "onend"){
 				return function(){
 					cb(null, tokens.splice(0));
-				}
+				};
 			}
 			if(name === "onreset") return function(){};
 			return function(){
@@ -18,7 +18,7 @@ exports.test = function(test, cb){
 					event: name.substr(2),
 					data: sliceArr.apply(arguments)
 				});
-			}
+			};
 		}});
 	}
 	else{
@@ -30,11 +30,15 @@ exports.test = function(test, cb){
 		};
 		helper.EVENTS.forEach(function(name){
 			cbs["on" + name] = function(){
+				if(name === "text" && tokens.length && tokens[tokens.length-1].event === "text"){
+					tokens[tokens.length-1].data[0] += arguments[0];
+					return;
+				}
 				tokens.push({
 					event: name,
 					data: sliceArr.apply(arguments)
 				});
-			}
+			};
 		});
 	}
 	helper.writeToParser(cbs, test.options.parser, test.html);

From 00d63cfd802d92dd755221bec1b23821829f9f41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 18:21:47 +0200
Subject: [PATCH 295/450] [tests/events] fixed order of attribute/opentag
 events, merged text events

---
 tests/Events/01-simple.json              | 14 +++---
 tests/Events/02-template.json            | 32 +++-----------
 tests/Events/03-lowercase_tags.json      | 14 +++---
 tests/Events/05-cdata-special.json       | 56 +-----------------------
 tests/Events/07-self-closing.json        | 14 +++---
 tests/Events/08-implicit-close-tags.json |  6 +--
 tests/Events/09-attributes.json          | 24 +++++-----
 7 files changed, 44 insertions(+), 116 deletions(-)

diff --git a/tests/Events/01-simple.json b/tests/Events/01-simple.json
index ff469e7..ab3076a 100644
--- a/tests/Events/01-simple.json
+++ b/tests/Events/01-simple.json
@@ -12,6 +12,13 @@
         "h1"
       ]
     },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test"
+      ]
+    },
     {
       "event": "opentag",
       "data": [
@@ -21,13 +28,6 @@
         }
       ]
     },
-    {
-      "event": "attribute",
-      "data": [
-        "class",
-        "test"
-      ]
-    },
     {
       "event": "text",
       "data": [
diff --git a/tests/Events/02-template.json b/tests/Events/02-template.json
index 37348da..d3183d4 100644
--- a/tests/Events/02-template.json
+++ b/tests/Events/02-template.json
@@ -4,7 +4,7 @@
     "handler": {},
     "parser": {}
   },
-  "html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
+  "html": "<script type=\"text/template\"></script>",
   "expected": [
     {
       "event": "opentagname",
@@ -12,15 +12,6 @@
         "script"
       ]
     },
-    {
-      "event": "opentag",
-      "data": [
-        "script",
-        {
-          "type": "text/template"
-        }
-      ]
-    },
     {
       "event": "attribute",
       "data": [
@@ -29,27 +20,18 @@
       ]
     },
     {
-      "event": "text",
-      "data": [
-        "<h1"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        ">Heading1"
-      ]
-    },
-    {
-      "event": "text",
+      "event": "opentag",
       "data": [
-        "</h1"
+        "script",
+        {
+          "type": "text/template"
+        }
       ]
     },
     {
       "event": "text",
       "data": [
-        ">"
+        "<h1>Heading1</h1>"
       ]
     },
     {
diff --git a/tests/Events/03-lowercase_tags.json b/tests/Events/03-lowercase_tags.json
index 56763ad..9b58c59 100644
--- a/tests/Events/03-lowercase_tags.json
+++ b/tests/Events/03-lowercase_tags.json
@@ -14,6 +14,13 @@
         "h1"
       ]
     },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test"
+      ]
+    },
     {
       "event": "opentag",
       "data": [
@@ -23,13 +30,6 @@
         }
       ]
     },
-    {
-      "event": "attribute",
-      "data": [
-        "class",
-        "test"
-      ]
-    },
     {
       "event": "text",
       "data": [
diff --git a/tests/Events/05-cdata-special.json b/tests/Events/05-cdata-special.json
index d8b1bc9..686cb1a 100644
--- a/tests/Events/05-cdata-special.json
+++ b/tests/Events/05-cdata-special.json
@@ -22,61 +22,7 @@
     {
       "event": "text",
       "data": [
-        "/*"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "<![CDATA[*/ asdf "
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        ">"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "<asdf"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        ">"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "</adsf"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        ">"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "<"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        "> fo/*]]"
-      ]
-    },
-    {
-      "event": "text",
-      "data": [
-        ">*/"
+        "/*<![CDATA[*/ asdf ><asdf></adsf><> fo/*]]>*/"
       ]
     },
     {
diff --git a/tests/Events/07-self-closing.json b/tests/Events/07-self-closing.json
index ae5c22e..f8903aa 100644
--- a/tests/Events/07-self-closing.json
+++ b/tests/Events/07-self-closing.json
@@ -16,6 +16,13 @@
 				"a"
 			]
 		},
+		{
+			"event": "attribute",
+			"data": [
+				"href",
+				"http://test.com/"
+			]
+		},
 		{
 			"event": "opentag",
 			"data": [
@@ -25,13 +32,6 @@
 				}
 			]
 		},
-		{
-			"event": "attribute",
-			"data": [
-				"href",
-				"http://test.com/"
-			]
-		},
 		{
 			"event": "text",
 			"data": [
diff --git a/tests/Events/08-implicit-close-tags.json b/tests/Events/08-implicit-close-tags.json
index 0086f35..3441f20 100644
--- a/tests/Events/08-implicit-close-tags.json
+++ b/tests/Events/08-implicit-close-tags.json
@@ -6,18 +6,18 @@
     { "event": "opentagname", "data": [ "ol" ] },
     { "event": "opentag", "data": [ "ol", {} ] },
     { "event": "opentagname", "data": [ "li" ] },
-    { "event": "opentag", "data": [ "li", { "class": "test" } ] },
     { "event": "attribute", "data": [ "class", "test" ] },
+    { "event": "opentag", "data": [ "li", { "class": "test" } ] },
     { "event": "opentagname", "data": [ "div" ] },
     { "event": "opentag", "data": [ "div", {} ] },
     { "event": "opentagname", "data": [ "table" ] },
-    { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
     { "event": "attribute", "data": [ "style", "width:100%" ] },
+    { "event": "opentag", "data": [ "table", { "style": "width:100%" } ] },
     { "event": "opentagname", "data": [ "tr" ] },
     { "event": "opentag", "data": [ "tr", {} ] },
     { "event": "opentagname", "data": [ "td" ] },
-    { "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
     { "event": "attribute", "data": [ "colspan", "2" ] },
+    { "event": "opentag", "data": [ "td", { "colspan": "2" } ] },
     { "event": "opentagname", "data": [ "h3" ] },
     { "event": "opentag", "data": [ "h3", {} ] },
     { "event": "text", "data": [ "Heading" ] },
diff --git a/tests/Events/09-attributes.json b/tests/Events/09-attributes.json
index 8bd024b..afa6e4a 100644
--- a/tests/Events/09-attributes.json
+++ b/tests/Events/09-attributes.json
@@ -12,18 +12,6 @@
         "button"
       ]
     },
-    {
-      "event": "opentag",
-      "data": [
-        "button",
-        {
-          "class": "test0",
-          "title": "test1",
-          "disabled": "",
-          "value": "test2"
-        }
-      ]
-    },
     {
       "event": "attribute",
       "data": [
@@ -52,6 +40,18 @@
         "test2"
       ]
     },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "class": "test0",
+          "title": "test1",
+          "disabled": "",
+          "value": "test2"
+        }
+      ]
+    },
     {
       "event": "text",
       "data": [

From 643a7f01e58983520260932ee8fb97d4c7fc4442 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 21:21:26 +0200
Subject: [PATCH 296/450] [tokenizer] use strings instead of buffers

has a huge impact on speed
---
 lib/Tokenizer.js | 172 ++++++++++++++++++++++-------------------------
 1 file changed, 82 insertions(+), 90 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index d599c78..753a2da 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -66,17 +66,14 @@ var i = 0,
     STYLE_END_3 = i++, //L
     STYLE_END_4 = i++; //E
 
-function code(c){
-	return c.charCodeAt(0);
-}
 
 function whitespace(c){
-	return c === code(" ") || c === code("\t") || c === code("\r") || c === code("\n");
+	return c === " " || c === "\t" || c === "\r" || c === "\n";
 }
 
 function Parser(options, cbs){
 	this._state = TEXT;
-	this._buffer = null;
+	this._buffer = "";
 	this._sectionStart = 0;
 	this._index = 0;
 	this._options = options;
@@ -85,43 +82,34 @@ function Parser(options, cbs){
 	this._running = true;
 }
 
-Parser.prototype.write = function(chunk){
-	if(typeof chunk === "string") chunk = new Buffer(chunk);
-	else if(!Buffer.isBuffer(chunk)) throw new Error("chunks must be buffers or string!");
-
-	if(this._buffer === null) this._buffer = chunk;
-	else this._buffer = Buffer.concat([this._buffer, chunk]);
-
-	this._processData();
-};
-
 //TODO make events conditional
+Parser.prototype.write = function(chunk){
+	this._buffer += chunk;
 
-Parser.prototype._processData = function(){
 	while(this._index < this._buffer.length && this._running){
-		var c = this._buffer[this._index];
+		var c = this._buffer.charAt(this._index);
 		if(this._state === TEXT){
-			if(c === code("<")){
+			if(c === "<"){
 				this._emitIfToken("text");
 				this._state = TAG_START;
 				this._sectionStart = this._index;
 			}
 		} else if(this._state === TAG_START){
-			if(c === code("/")){
+			if(c === "/"){
 				this._state = CLOSING_TAG_START;
-			} else if(c === code(">") || this._special > 0) {
+			} else if(c === ">" || this._special > 0) {
 				this._state = TEXT;
 			} else {
 				if(whitespace(c));
-				else if(c === code("!")){
+				else if(c === "!"){
 					this._state = DECLARATION_START;
 					this._sectionStart = this._index + 1;
-				} else if(c === code("?")){
+				} else if(c === "?"){
 					this._state = IN_PROCESSING_INSTRUCTION;
 					this._sectionStart = this._index + 1;
 				} else if(
 					(!this._options || !this._options.xmlMode) &&
-					(c === code("s") || c === code("S"))
+					(c === "s" || c === "S")
 				){
 					this._state = SPECIAL_START;
 					this._sectionStart = this._index;
@@ -131,12 +119,12 @@ Parser.prototype._processData = function(){
 				}
 			}
 		} else if(this._state === IN_TAG_NAME){
-			if(c === code("/")){
+			if(c === "/"){
 				this._emitToken("opentagname");
 				this._cbs.onopentagend();
 				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === code(">")){
+			} else if(c === ">"){
 				this._emitToken("opentagname");
 				this._cbs.onopentagend();
 				this._state = TEXT;
@@ -147,10 +135,10 @@ Parser.prototype._processData = function(){
 			}
 		} else if(this._state === CLOSING_TAG_START){
 			if(whitespace(c));
-			else if(c === code(">")){
+			else if(c === ">"){
 				this._state = TEXT;
 			} else if(this._special > 0){
-				if(c === code("s") || c === code("S")){
+				if(c === "s" || c === "S"){
 					this._state = SPECIAL_END;
 				}
 			} else {
@@ -158,7 +146,7 @@ Parser.prototype._processData = function(){
 				this._sectionStart = this._index;
 			}
 		} else if(this._state === IN_CLOSING_TAG_NAME){
-			if(c === code(">")){
+			if(c === ">"){
 				this._emitToken("closetag");
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
@@ -168,7 +156,7 @@ Parser.prototype._processData = function(){
 			}
 		} else if(this._state === AFTER_CLOSING_TAG_NAME){
 			//skip everything until ">"
-			if(c === code(">")){
+			if(c === ">"){
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			}
@@ -178,11 +166,11 @@ Parser.prototype._processData = function(){
 		*	attributes
 		*/
 		else if(this._state === BEFORE_ATTRIBUTE_NAME){
-			if(c === code("/")){
+			if(c === "/"){
 				this._cbs.onopentagend();
 				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === code(">")){
+			} else if(c === ">"){
 				this._state = TEXT;
 				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
@@ -191,15 +179,15 @@ Parser.prototype._processData = function(){
 				this._sectionStart = this._index;
 			}
 		} else if(this._state === IN_ATTRIBUTE_NAME){
-			if(c === code("=")){
+			if(c === "="){
 				this._emitIfToken("attribname");
 				this._state = BEFORE_ATTRIBUTE_VALUE;
-			} else if(c === code("/")){
+			} else if(c === "/"){
 				this._emitIfToken("attribname");
 				this._cbs.onopentagend();
 				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === code(">")){
+			} else if(c === ">"){
 				this._emitIfToken("attribname");
 				this._state = TEXT;
 				this._cbs.onopentagend();
@@ -209,13 +197,13 @@ Parser.prototype._processData = function(){
 				this._state = AFTER_ATTRIBUTE_NAME;
 			}
 		} else if(this._state === AFTER_ATTRIBUTE_NAME){
-			if(c === code("=")){
+			if(c === "="){
 				this._state = BEFORE_ATTRIBUTE_VALUE;
-			} else if(c === code("/")){
+			} else if(c === "/"){
 				this._cbs.onopentagend();
 				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === code(">")){
+			} else if(c === ">"){
 				this._state = TEXT;
 				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
@@ -224,10 +212,10 @@ Parser.prototype._processData = function(){
 				this._sectionStart = this._index;
 			}
 		} else if(this._state === BEFORE_ATTRIBUTE_VALUE){
-			if(c === code("\"")){
+			if(c === "\""){
 				this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
 				this._sectionStart = this._index + 1;
-			} else if(c === code("'")){
+			} else if(c === "'"){
 				this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
 				this._sectionStart = this._index + 1;
 			} else if(!whitespace(c)){
@@ -235,17 +223,17 @@ Parser.prototype._processData = function(){
 				this._sectionStart = this._index;
 			}
 		} else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){
-			if(c === code("\"")){
+			if(c === "\""){
 				this._emitToken("attribvalue");
 				this._state = BEFORE_ATTRIBUTE_NAME;
 			}
 		} else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){
-			if(c === code("'")){
+			if(c === "'"){
 				this._emitToken("attribvalue");
 				this._state = BEFORE_ATTRIBUTE_NAME;	
 			}
 		} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
-			if(c === code(">")){
+			if(c === ">"){
 				this._emitToken("attribvalue");
 				this._state = TEXT;
 				this._cbs.onopentagend();
@@ -260,11 +248,11 @@ Parser.prototype._processData = function(){
 		*	declarations
 		*/
 		else if(this._state === DECLARATION_START){
-			if(c === code("[")) this._state = CDATA_1;
-			else if(c === code("-")) this._state = BEFORE_COMMENT;
+			if(c === "[") this._state = CDATA_1;
+			else if(c === "-") this._state = BEFORE_COMMENT;
 			else this._state = IN_DECLARATION;
 		} else if(this._state === IN_DECLARATION){
-			if(c === code(">")){
+			if(c === ">"){
 				this._emitToken("declaration");
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
@@ -275,7 +263,7 @@ Parser.prototype._processData = function(){
 		*	processing instructions
 		*/
 		else if(this._state === IN_PROCESSING_INSTRUCTION){
-			if(c === code(">")){
+			if(c === ">"){
 				this._emitToken("processinginstruction");
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
@@ -286,21 +274,21 @@ Parser.prototype._processData = function(){
 		*	comments
 		*/
 		else if(this._state === BEFORE_COMMENT){
-			if(c === code("-")){
+			if(c === "-"){
 				this._state = IN_COMMENT;
 				this._sectionStart = this._index + 1;
 			} else {
 				this._state = IN_DECLARATION;
 			}
 		} else if(this._state === IN_COMMENT){
-			if(c === code("-")) this._state = COMMENT_END_1;
+			if(c === "-") this._state = COMMENT_END_1;
 		} else if(this._state === COMMENT_END_1){
-			if(c === code("-")) this._state = COMMENT_END_2;
+			if(c === "-") this._state = COMMENT_END_2;
 			else this._state = IN_COMMENT;
 		} else if(this._state === COMMENT_END_2){
-			if(c === code(">")){
+			if(c === ">"){
 				//remove 2 trailing chars
-				this._cbs.oncomment(this._buffer.toString("utf8", this._sectionStart, this._index - 2));
+				this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			} else {
@@ -312,36 +300,36 @@ Parser.prototype._processData = function(){
 		*	cdata
 		*/
 		else if(this._state === CDATA_1){
-			if(c === code("C")) this._state = CDATA_2;
+			if(c === "C") this._state = CDATA_2;
 			else this._state = IN_DECLARATION;
 		} else if(this._state === CDATA_2){
-			if(c === code("D")) this._state = CDATA_3;
+			if(c === "D") this._state = CDATA_3;
 			else this._state = IN_DECLARATION;
 		} else if(this._state === CDATA_3){
-			if(c === code("A")) this._state = CDATA_4;
+			if(c === "A") this._state = CDATA_4;
 			else this._state = IN_DECLARATION;
 		} else if(this._state === CDATA_4){
-			if(c === code("T")) this._state = CDATA_5;
+			if(c === "T") this._state = CDATA_5;
 			else this._state = IN_DECLARATION;
 		} else if(this._state === CDATA_5){
-			if(c === code("A")) this._state = CDATA_6;
+			if(c === "A") this._state = CDATA_6;
 			else this._state = IN_DECLARATION;
 		} else if(this._state === CDATA_6){
-			if(c === code("[")){
+			if(c === "["){
 				this._state = IN_CDATA;
 				this._sectionStart = this._index + 1;
 			} else {
 				this._state = IN_DECLARATION;
 			}
 		} else if(this._state === IN_CDATA){
-			if(c === code("]")) this._state = CDATA_END_1;
+			if(c === "]") this._state = CDATA_END_1;
 		} else if(this._state === CDATA_END_1){
-			if(c === code("]")) this._state = CDATA_END_2;
+			if(c === "]") this._state = CDATA_END_2;
 			else this._state = IN_CDATA;
 		} else if(this._state === CDATA_END_2){
-			if(c === code(">")){
+			if(c === ">"){
 				//remove 2 trailing chars
-				this._cbs.oncdata(this._buffer.toString("utf8", this._sectionStart, this._index - 2));
+				this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			} else {
@@ -353,18 +341,18 @@ Parser.prototype._processData = function(){
 		* special tags
 		*/
 		else if(this._state === SPECIAL_START){
-			if(c === code("c") || c === code("C")){
+			if(c === "c" || c === "C"){
 				this._state = SCRIPT_1;
-			} else if(c === code("t") || c === code("T")){
+			} else if(c === "t" || c === "T"){
 				this._state = STYLE_1;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
 		} else if(this._state === SPECIAL_END){
-			if(this._special === 1 && (c === code("c") || c === code("C"))){
+			if(this._special === 1 && (c === "c" || c === "C")){
 				this._state = SCRIPT_END_1;
-			} else if(this._special === 2 && (c === code("t") || c === code("T"))){
+			} else if(this._special === 2 && (c === "t" || c === "T")){
 				this._state = STYLE_END_1;
 			} 
 			else this._state = TEXT;
@@ -374,35 +362,35 @@ Parser.prototype._processData = function(){
 		* script
 		*/
 		else if(this._state === SCRIPT_1){
-			if(c === code("r") || c === code("R")){
+			if(c === "r" || c === "R"){
 				this._state = SCRIPT_2;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
 		} else if(this._state === SCRIPT_2){
-			if(c === code("i") || c === code("I")){
+			if(c === "i" || c === "I"){
 				this._state = SCRIPT_3;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
 		} else if(this._state === SCRIPT_3){
-			if(c === code("p") || c === code("P")){
+			if(c === "p" || c === "P"){
 				this._state = SCRIPT_4;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
 		} else if(this._state === SCRIPT_4){
-			if(c === code("t") || c === code("T")){
+			if(c === "t" || c === "T"){
 				this._state = SCRIPT_5;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
 		} else if(this._state === SCRIPT_5){
-			if(c === code("/") || c === code(">") || whitespace(c)){
+			if(c === "/" || c === ">" || whitespace(c)){
 				this._special = 1;
 			}
 			this._state = IN_TAG_NAME;
@@ -410,27 +398,27 @@ Parser.prototype._processData = function(){
 		}
 
 		else if(this._state === SCRIPT_END_1){
-			if(c === code("r") || c === code("R")){
+			if(c === "r" || c === "R"){
 				this._state = SCRIPT_END_2;
 			} 
 			else this._state = TEXT;
 		} else if(this._state === SCRIPT_END_2){
-			if(c === code("i") || c === code("I")){
+			if(c === "i" || c === "I"){
 				this._state = SCRIPT_END_3;
 			} 
 			else this._state = TEXT;
 		} else if(this._state === SCRIPT_END_3){
-			if(c === code("p") || c === code("P")){
+			if(c === "p" || c === "P"){
 				this._state = SCRIPT_END_4;
 			} 
 			else this._state = TEXT;
 		} else if(this._state === SCRIPT_END_4){
-			if(c === code("t") || c === code("T")){
+			if(c === "t" || c === "T"){
 				this._state = SCRIPT_END_5;
 			} 
 			else this._state = TEXT;
 		} else if(this._state === SCRIPT_END_5){
-			if(c === code(">") || whitespace(c)){
+			if(c === ">" || whitespace(c)){
 				this._state = IN_CLOSING_TAG_NAME;
 				this._sectionStart = this._index - 6;
 				this._index--; //reconsume the token
@@ -442,28 +430,28 @@ Parser.prototype._processData = function(){
 		* style
 		*/
 		else if(this._state === STYLE_1){
-			if(c === code("y") || c === code("Y")){
+			if(c === "y" || c === "Y"){
 				this._state = STYLE_2;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
 		} else if(this._state === STYLE_2){
-			if(c === code("l") || c === code("L")){
+			if(c === "l" || c === "L"){
 				this._state = STYLE_3;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
 		} else if(this._state === STYLE_3){
-			if(c === code("e") || c === code("E")){
+			if(c === "e" || c === "E"){
 				this._state = STYLE_4;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
 		} else if(this._state === STYLE_4){
-			if(c === code("/") || c === code(">") || whitespace(c)){
+			if(c === "/" || c === ">" || whitespace(c)){
 				this._special = 2;
 			}
 			this._state = IN_TAG_NAME;
@@ -471,22 +459,22 @@ Parser.prototype._processData = function(){
 		}
 
 		else if(this._state === STYLE_END_1){
-			if(c === code("y") || c === code("Y")){
+			if(c === "y" || c === "Y"){
 				this._state = STYLE_END_2;
 			} 
 			else this._state = TEXT;
 		} else if(this._state === STYLE_END_2){
-			if(c === code("l") || c === code("L")){
+			if(c === "l" || c === "L"){
 				this._state = STYLE_END_3;
 			} 
 			else this._state = TEXT;
 		} else if(this._state === STYLE_END_3){
-			if(c === code("e") || c === code("E")){
+			if(c === "e" || c === "E"){
 				this._state = STYLE_END_4;
 			} 
 			else this._state = TEXT;
 		} else if(this._state === STYLE_END_4){
-			if(c === code(">") || whitespace(c)){
+			if(c === ">" || whitespace(c)){
 				this._state = IN_CLOSING_TAG_NAME;
 				this._sectionStart = this._index - 5;
 				this._index--; //reconsume the token
@@ -504,20 +492,20 @@ Parser.prototype._processData = function(){
 
 	//cleanup
 	if(this._sectionStart === -1){
-		this._buffer = null;
+		this._buffer = "";
 		this._index = 0;
 	} else {
 		if(this._state === TEXT){
 			this._emitIfToken("text");
-			this._buffer = null;
+			this._buffer = "";
 			this._index = 0;
 		} else if(this._sectionStart === this._index){
 			//the section just started
-			this._buffer = null;
+			this._buffer = "";
 			this._index = 0;
 		} else if(this._sectionStart > 0){
 			//remove everything unnecessary
-			this._buffer = this._buffer.slice(this._sectionStart);
+			this._buffer = this._buffer.substr(this._sectionStart);
 			this._index -= this._sectionStart;
 		}
 
@@ -536,7 +524,7 @@ Parser.prototype.end = function(chunk){
 	if(chunk) this.write(chunk);
 
 	//if there is remaining data, emit it in a reasonable way
-	if(this._buffer === null || this._sectionStart === -1 || this._sectionStart === this._index - 1);
+	if(this._buffer === "" || this._sectionStart === -1 || this._sectionStart === this._index);
 	else if(this._state === IN_CDATA || this._state === CDATA_END_1 || this._state === CDATA_END_2){
 		this._emitIfToken("cdata");
 	} else if(this._state === IN_COMMENT || this._state === COMMENT_END_1 || this._state === COMMENT_END_2){
@@ -552,14 +540,18 @@ Parser.prototype.end = function(chunk){
 	this._cbs.onend();
 };
 
+Parser.prototype.reset = function(){
+	Parser.call(this, this._options, this._cbs);
+};
+
 Parser.prototype._emitToken = function(name){
-	this._cbs["on" + name](this._buffer.toString("utf8", this._sectionStart, this._index));
+	this._cbs["on" + name](this._buffer.substring(this._sectionStart, this._index));
 	this._sectionStart = -1;
 };
 
 Parser.prototype._emitIfToken = function(name){
 	if(this._index > this._sectionStart){
-		this._cbs["on" + name](this._buffer.toString("utf8", this._sectionStart, this._index));
+		this._cbs["on" + name](this._buffer.substring(this._sectionStart, this._index));
 	}
 	this._sectionStart = -1;
 };
\ No newline at end of file

From b837b959b7d21bfc408cf3d712e876bfe56fbdec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 21:23:10 +0200
Subject: [PATCH 297/450] [parser] don't implement stream.Writable, use new
 tokenizer interface

---
 lib/Parser.js | 230 ++++++++++++++++++++++++++------------------------
 1 file changed, 119 insertions(+), 111 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index bd2f45c..851bf69 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,5 +1,4 @@
-var Tokenizer = require("./Tokenizer.js"),
-    WritableStream = require("stream").Writable || require("readable-stream").Writable;
+var Tokenizer = require("./Tokenizer.js");
 
 var defaultOpts = {
 	xmlMode: false, //Special behavior for script/style tags by default
@@ -77,124 +76,129 @@ function Parser(cbs, options){
 	if(!cbs) cbs = defaultCbs;
 	this._options = options;
 	this._cbs = cbs;
-	this._tokenizer = new Tokenizer(options);
-
-	WritableStream.call(this, options);
-
-	var that = this,
-	    tagname = "",
-	    attribname = "",
-	    attribs = null,
-	    stack = [];
-
-	function closeTag(name){
-		if(options.lowerCaseTags) name = name.toLowerCase();
-		if(stack && (!(name in emptyTags) || options.xmlMode)){
-			var pos = stack.lastIndexOf(name);
-			if(pos !== -1)
-				if(cbs.onclosetag){
-					pos = stack.length - pos;
-					while(pos--) cbs.onclosetag(stack.pop());
-				}
-				else stack.splice(pos);
-		}
+
+	this._tagname = "";
+	this._attribname = "";
+	this._attribs = null;
+	this._stack = [];
+
+	this._tokenizer = new Tokenizer(options, this);
+}
+
+require("util").inherits(Parser, require("events").EventEmitter);
+
+//Tokenizer event handlers
+Parser.prototype.ontext = function(data){
+	if(this._cbs.ontext) this._cbs.ontext(data);
+};
+
+Parser.prototype.onopentagname = function(name){
+	if(this._options.lowerCaseTags) name = name.toLowerCase();
+	this._tagname = name;
+	this._stack.push(name);
+
+	if (!this._options.xmlMode && name in openImpliesClose) {
+		for(
+			var el;
+			(el = this._stack[this._stack.length-1]) in openImpliesClose[name];
+			this.onclosetag(el)
+		);
 	}
+	if(this._cbs.onopentagname) this._cbs.onopentagname(name);
+	if(this._cbs.onopentag) this._attribs = {};
+};
 
-	function attribValue(value){
-		if(cbs.onattribute) cbs.onattribute(attribname, value);
-		if(attribs) attribs[attribname] = value;
-		attribname = "";
+Parser.prototype.onopentagend = function(){
+	if(this._tagname !== ""){
+		if(this._attribname !== "") this.onattribvalue("");
+		if(this._attribs){
+			if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
+			this._attribs = null;
+		}
+		this._tagname = "";
 	}
+};
 
-	this._tokenizer
-		.on("text", function(data){
-			if(cbs.ontext) cbs.ontext(data);
-		})
-		.on("opentagname", function(name){
-			if(options.lowerCaseTags) name = name.toLowerCase();
-			tagname = name;
-
-			if (!options.xmlMode && name in openImpliesClose) {
-				for(
-					var el;
-					(el = stack[stack.length-1]) in openImpliesClose[name];
-					closeTag(el)
-				);
+Parser.prototype.onclosetag = function closeTag(name){
+	if(this._options.lowerCaseTags) name = name.toLowerCase();
+	if(this._stack.length && (!(name in emptyTags) || this._options.xmlMode)){
+		var pos = this._stack.lastIndexOf(name);
+		if(pos !== -1){
+			if(this._cbs.onclosetag){
+				pos = this._stack.length - pos;
+				while(pos--) this._cbs.onclosetag(this._stack.pop());
 			}
-			if(cbs.onopentagname) cbs.onopentagname(name);
-			if(cbs.onopentag) attribs = {};
-		})
-		.on("opentagend", function(){
-			if(tagname !== ""){
-				if(attribname !== "") attribValue("");
-				if(attribs){
-					if(cbs.onopentag) cbs.onopentag(tagname, attribs);
-					attribs = null;
-				}
-				tagname = "";
-			}
-		})
-		.on("closetag", closeTag)
-		.on("selfclosingtag", function(){
-			closeTag(tagname);
-		})
-		.on("attribname", function(name){
-			if(attribname !== "") attribValue("");
-			if(options.lowerCaseAttributeNames) name = name.toLowerCase;
-			attribname = name;
-		})
-		.on("attribvalue", attribValue)
-		.on("declaration", function(value){
-			if(cbs.onprocessinginstruction){
-				cbs.onprocessinginstruction("!" + value.split(/\s|\//, 1)[0], "!" + value);
-			}
-		})
-		.on("processinginstruction", function(value){
-			if(cbs.onprocessinginstruction){
-				cbs.onprocessinginstruction("?" + value.split(/\s|\//, 1)[0], "?" + value);
-			}
-		})
-		.on("comment", function(value){
-			if(cbs.oncomment) cbs.oncomment(value);
-			if(cbs.oncommentend) cbs.oncommentend();
-		})
-		.on("cdata", function(value){
-			if(cbs.oncdatastart) cbs.oncdatastart();
-			if(cbs.ontext) cbs.ontext(value);
-			if(cbs.oncdataend) cbs.oncdataend();
-		})
-		.on("error", function(err){
-			if(cbs.onerror) cbs.onerror(err);
-			else that.emit("error", err);
-		})
-		.once("finish", function(){
-			if(cbs.onclosetag){
-				for(
-					var i = stack.length;
-					i > 0;
-					cbs.onclosetag(stack[--i])
-				);
-			}
-			if(cbs.onend) cbs.onend();
-		})
-		;
+			else this._stack.splice(pos);
+		}
+	}
+};
 
-	this.once("finish", function(){
-		that._tokenizer.end();
-	});
-}
+Parser.prototype.onselfclosingtag = function(){
+	if(this._cbs.onclosetag && this._stack[this._stack.length-1] === this._tagname){
+		this._cbs.onclosetag(this._stack.pop());
+	}
+};
+
+Parser.prototype.onattribname = function(name){
+	if(this._attribname !== "") this.onattribvalue("");
+	if(this._options.lowerCaseAttributeNames) name = name.toLowerCase();
+	this._attribname = name;
+};
+
+Parser.prototype.onattribvalue = function attribValue(value){
+	if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, value);
+	if(this._attribs) this._attribs[this._attribname] = value;
+	this._attribname = "";
+};
+
+Parser.prototype.ondeclaration = function(value){
+	if(this._cbs.onprocessinginstruction){
+		this._cbs.onprocessinginstruction("!" + value.split(/\s|\//, 1)[0], "!" + value);
+	}
+};
+
+Parser.prototype.onprocessinginstruction = function(value){
+	if(this._cbs.onprocessinginstruction){
+		this._cbs.onprocessinginstruction("?" + value.split(/\s|\//, 1)[0], "?" + value);
+	}
+};
+
+Parser.prototype.oncomment = function(value){
+	if(this._cbs.oncomment) this._cbs.oncomment(value);
+	if(this._cbs.oncommentend) this._cbs.oncommentend();
+};
+
+Parser.prototype.oncdata = function(value){
+	if(this._cbs.oncdatastart) this._cbs.oncdatastart();
+	if(this._cbs.ontext) this._cbs.ontext(value);
+	if(this._cbs.oncdataend) this._cbs.oncdataend();
+};
+
+Parser.prototype.onerror = function(err){
+	if(this._cbs.onerror) this._cbs.onerror(err);
+};
+
+Parser.prototype.onend = function(){
+	if(this._cbs.onclosetag){
+		for(
+			var i = this._stack.length;
+			i > 0;
+			this._cbs.onclosetag(this._stack[--i])
+		);
+	}
+	if(this._cbs.onend) this._cbs.onend();
+};
 
-require("util").inherits(Parser, WritableStream);
 
 //Resets the parser to a blank state, ready to parse a new HTML document
 Parser.prototype.reset = function(){
-	this._tokenizer.removeAllListeners();
 	if(this._cbs.onreset) this._cbs.onreset();
-	//TODO there needs to be a better way
-	var p = new Parser(this._cbs, this._options);
-	for(var k in p){
-		if(typeof p[k] === "function") this[k] = p[k].bind(p);
-	}
+	this._tokenizer.reset();
+
+	this._tagname = "";
+	this._attribname = "";
+	this._attribs = null;
+	this._stack = [];
 };
 
 //Parses a complete HTML document and pushes it to the handler
@@ -203,8 +207,12 @@ Parser.prototype.parseComplete = function(data){
 	this.end(data);
 };
 
-Parser.prototype._write = function(chunk, encoding, cb){
-	this._tokenizer.write(chunk, cb);
+Parser.prototype.write = function(chunk){
+	this._tokenizer.write(chunk);
+};
+
+Parser.prototype.end = function(chunk){
+	this._tokenizer.end(chunk);
 };
 
 //alias for backwards compat

From db95f006f5ac1b8c3bd2594e926a81f31d618c7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 22:13:12 +0200
Subject: [PATCH 298/450] [tests/stream] fixed order of events

---
 tests/Stream/02-RSS.json | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/Stream/02-RSS.json b/tests/Stream/02-RSS.json
index aa04734..55660a2 100644
--- a/tests/Stream/02-RSS.json
+++ b/tests/Stream/02-RSS.json
@@ -38,6 +38,13 @@
         "rss"
       ]
     },
+    {
+      "event": "attribute",
+      "data": [
+        "version",
+        "2.0"
+      ]
+    },
     {
       "event": "opentag",
       "data": [
@@ -47,13 +54,6 @@
         }
       ]
     },
-    {
-      "event": "attribute",
-      "data": [
-        "version",
-        "2.0"
-      ]
-    },
     {
       "event": "text",
       "data": [

From e4982e19d3d47bb27064924c74167c57c04509f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 22:13:23 +0200
Subject: [PATCH 299/450] [tokenizer] simplified logic

---
 lib/Tokenizer.js | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 753a2da..9d19e65 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -166,14 +166,14 @@ Parser.prototype.write = function(chunk){
 		*	attributes
 		*/
 		else if(this._state === BEFORE_ATTRIBUTE_NAME){
-			if(c === "/"){
-				this._cbs.onopentagend();
-				this._cbs.onselfclosingtag();
-				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === ">"){
+			if(c === ">"){
 				this._state = TEXT;
 				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
+			} else if(c === "/"){
+				this._cbs.onopentagend();
+				this._cbs.onselfclosingtag();
+				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(!whitespace(c)){
 				this._state = IN_ATTRIBUTE_NAME;
 				this._sectionStart = this._index;
@@ -182,31 +182,20 @@ Parser.prototype.write = function(chunk){
 			if(c === "="){
 				this._emitIfToken("attribname");
 				this._state = BEFORE_ATTRIBUTE_VALUE;
-			} else if(c === "/"){
-				this._emitIfToken("attribname");
-				this._cbs.onopentagend();
-				this._cbs.onselfclosingtag();
-				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === ">"){
-				this._emitIfToken("attribname");
-				this._state = TEXT;
-				this._cbs.onopentagend();
-				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
 				this._emitIfToken("attribname");
 				this._state = AFTER_ATTRIBUTE_NAME;
+			} else if(c === "/" || c === ">"){
+				this._emitIfToken("attribname");
+				this._state = BEFORE_ATTRIBUTE_NAME;
+				this._index--;
 			}
 		} else if(this._state === AFTER_ATTRIBUTE_NAME){
 			if(c === "="){
 				this._state = BEFORE_ATTRIBUTE_VALUE;
-			} else if(c === "/"){
-				this._cbs.onopentagend();
-				this._cbs.onselfclosingtag();
-				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === ">"){
-				this._state = TEXT;
-				this._cbs.onopentagend();
-				this._sectionStart = this._index + 1;
+			} else if(c === "/" || c === ">"){
+				this._state = BEFORE_ATTRIBUTE_NAME;
+				this._index--;
 			} else if(!whitespace(c)){
 				this._state = IN_ATTRIBUTE_NAME;
 				this._sectionStart = this._index;

From 1905dd320c8e159ae2921b7d24dec203eee33651 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 22:14:14 +0200
Subject: [PATCH 300/450] [parser] fixed handling of implied closing and empty
 tags

---
 lib/Parser.js | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 851bf69..e8a3d47 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -81,6 +81,7 @@ function Parser(cbs, options){
 	this._attribname = "";
 	this._attribs = null;
 	this._stack = [];
+	this._done = false;
 
 	this._tokenizer = new Tokenizer(options, this);
 }
@@ -94,8 +95,8 @@ Parser.prototype.ontext = function(data){
 
 Parser.prototype.onopentagname = function(name){
 	if(this._options.lowerCaseTags) name = name.toLowerCase();
+
 	this._tagname = name;
-	this._stack.push(name);
 
 	if (!this._options.xmlMode && name in openImpliesClose) {
 		for(
@@ -104,22 +105,28 @@ Parser.prototype.onopentagname = function(name){
 			this.onclosetag(el)
 		);
 	}
+
+	if(this._options.xmlMode || !(name in emptyTags)){
+		this._stack.push(name);
+	}
+
 	if(this._cbs.onopentagname) this._cbs.onopentagname(name);
 	if(this._cbs.onopentag) this._attribs = {};
 };
 
 Parser.prototype.onopentagend = function(){
-	if(this._tagname !== ""){
-		if(this._attribname !== "") this.onattribvalue("");
-		if(this._attribs){
-			if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
-			this._attribs = null;
-		}
-		this._tagname = "";
+	if(this._attribname !== "") this.onattribvalue("");
+	if(this._attribs){
+		if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
+		this._attribs = null;
 	}
+	if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in emptyTags){
+		this._cbs.onclosetag(this._tagname);
+	}
+	this._tagname = "";
 };
 
-Parser.prototype.onclosetag = function closeTag(name){
+Parser.prototype.onclosetag = function(name){
 	if(this._options.lowerCaseTags) name = name.toLowerCase();
 	if(this._stack.length && (!(name in emptyTags) || this._options.xmlMode)){
 		var pos = this._stack.lastIndexOf(name);
@@ -199,6 +206,7 @@ Parser.prototype.reset = function(){
 	this._attribname = "";
 	this._attribs = null;
 	this._stack = [];
+	this._done = false;
 };
 
 //Parses a complete HTML document and pushes it to the handler
@@ -208,11 +216,14 @@ Parser.prototype.parseComplete = function(data){
 };
 
 Parser.prototype.write = function(chunk){
+	if(this._done) this.onerror(Error(".write() after done!"));
 	this._tokenizer.write(chunk);
 };
 
 Parser.prototype.end = function(chunk){
+	if(this._done) this.onerror(Error(".end() after done!"));
 	this._tokenizer.end(chunk);
+	this._done = true;
 };
 
 //alias for backwards compat

From 70c6865b7527e622a7c0a91fe4b70bea1b1860af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 22:15:36 +0200
Subject: [PATCH 301/450] [tests/events] accidentally removed part of the
 document

---
 tests/Events/02-template.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Events/02-template.json b/tests/Events/02-template.json
index d3183d4..eda7311 100644
--- a/tests/Events/02-template.json
+++ b/tests/Events/02-template.json
@@ -4,7 +4,7 @@
     "handler": {},
     "parser": {}
   },
-  "html": "<script type=\"text/template\"></script>",
+  "html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
   "expected": [
     {
       "event": "opentagname",

From 4a7eb12b5d0b0da28a26256417f2e8e619b0c708 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 22:31:32 +0200
Subject: [PATCH 302/450] added a WritableStream interface again

this time, it's implementing stream.Writable
---
 lib/Stream.js         | 14 +++++++-------
 lib/WritableStream.js | 21 +++++++++++++++++++++
 lib/index.js          |  2 +-
 3 files changed, 29 insertions(+), 8 deletions(-)
 create mode 100644 lib/WritableStream.js

diff --git a/lib/Stream.js b/lib/Stream.js
index b9bad6e..29a819b 100644
--- a/lib/Stream.js
+++ b/lib/Stream.js
@@ -1,32 +1,32 @@
 module.exports = Stream;
 
-var Parser = require("./Parser.js");
+var Parser = require("./WritableStream.js");
 
 function Stream(options){
-	Parser.call(this, new cbs(this), options);
+	Parser.call(this, new Cbs(this), options);
 }
 
 require("util").inherits(Stream, Parser);
 
 Stream.prototype.readable = true;
 
-var cbs = function(scope){
+function Cbs(scope){
 	this.scope = scope;
-};
+}
 
 var EVENTS = require("../").EVENTS;
 
 Object.keys(EVENTS).forEach(function(name){
 	if(EVENTS[name] === 0){
-		cbs.prototype["on" + name] = function(){
+		Cbs.prototype["on" + name] = function(){
 			this.scope.emit(name);
 		};
 	} else if(EVENTS[name] === 1){
-		cbs.prototype["on" + name] = function(a){
+		Cbs.prototype["on" + name] = function(a){
 			this.scope.emit(name, a);
 		};
 	} else if(EVENTS[name] === 2){
-		cbs.prototype["on" + name] = function(a, b){
+		Cbs.prototype["on" + name] = function(a, b){
 			this.scope.emit(name, a, b);
 		};
 	} else {
diff --git a/lib/WritableStream.js b/lib/WritableStream.js
new file mode 100644
index 0000000..e65b073
--- /dev/null
+++ b/lib/WritableStream.js
@@ -0,0 +1,21 @@
+module.exports = Stream;
+
+var Parser = require("./Parser.js"),
+    WritableStream = require("stream").Writable || require("readable-stream").Writable;
+
+function Stream(cbs, options){
+	var parser = this._parser = new Parser(cbs, options);
+
+	WritableStream.call(this, {decodeStrings: false});
+
+	this.once("finish", function(){
+		parser.end();
+	});
+}
+
+require("util").inherits(Stream, WritableStream);
+
+WritableStream.prototype._write = function(chunk, encoding, cb){
+	this._parser.write(chunk);
+	cb();
+};
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index 8fff14d..0d261cd 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -24,7 +24,7 @@ module.exports = {
 		return defineProp("Stream", require("./Stream.js"));
 	},
 	get WritableStream(){
-		return defineProp("WritableStream", this.Parser);
+		return defineProp("WritableStream", require("./WritableStream.js"));
 	},
 	get ProxyHandler(){
 		return defineProp("ProxyHandler", require("./ProxyHandler.js"));

From a23d7a672d3148e264027309d32cd37f9afaa4e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 22:34:07 +0200
Subject: [PATCH 303/450] 3.0.0 (finally!)

the 3.x releases before were crappy, and I will deny to have published
them
---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 4e2e766..8520c8e 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "3.0.1",
+	"version": "3.0.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 1db8148511d0aad2a8046cab11b6018ed7da7127 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 31 Mar 2013 22:35:30 +0200
Subject: [PATCH 304/450] [tokenizer] changed internal name to `Tokenizer`

---
 lib/Tokenizer.js | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 9d19e65..4caedb7 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -1,4 +1,4 @@
-module.exports = Parser;
+module.exports = Tokenizer;
 
 var i = 0,
 
@@ -71,7 +71,7 @@ function whitespace(c){
 	return c === " " || c === "\t" || c === "\r" || c === "\n";
 }
 
-function Parser(options, cbs){
+function Tokenizer(options, cbs){
 	this._state = TEXT;
 	this._buffer = "";
 	this._sectionStart = 0;
@@ -83,7 +83,7 @@ function Parser(options, cbs){
 }
 
 //TODO make events conditional
-Parser.prototype.write = function(chunk){
+Tokenizer.prototype.write = function(chunk){
 	this._buffer += chunk;
 
 	while(this._index < this._buffer.length && this._running){
@@ -502,14 +502,14 @@ Parser.prototype.write = function(chunk){
 	}
 };
 
-Parser.prototype.pause = function(){
+Tokenizer.prototype.pause = function(){
 	this._running = false;
 };
-Parser.prototype.resume = function(){
+Tokenizer.prototype.resume = function(){
 	this._running = true;
 };
 
-Parser.prototype.end = function(chunk){
+Tokenizer.prototype.end = function(chunk){
 	if(chunk) this.write(chunk);
 
 	//if there is remaining data, emit it in a reasonable way
@@ -529,16 +529,16 @@ Parser.prototype.end = function(chunk){
 	this._cbs.onend();
 };
 
-Parser.prototype.reset = function(){
-	Parser.call(this, this._options, this._cbs);
+Tokenizer.prototype.reset = function(){
+	Tokenizer.call(this, this._options, this._cbs);
 };
 
-Parser.prototype._emitToken = function(name){
+Tokenizer.prototype._emitToken = function(name){
 	this._cbs["on" + name](this._buffer.substring(this._sectionStart, this._index));
 	this._sectionStart = -1;
 };
 
-Parser.prototype._emitIfToken = function(name){
+Tokenizer.prototype._emitIfToken = function(name){
 	if(this._index > this._sectionStart){
 		this._cbs["on" + name](this._buffer.substring(this._sectionStart, this._index));
 	}

From b7f6df56eec76a83e3599b15c0a23b6702c4e3df Mon Sep 17 00:00:00 2001
From: burl <bnyswonger@marchex.com>
Date: Wed, 3 Apr 2013 11:02:33 -0700
Subject: [PATCH 305/450] [tokenizer] fix for script tags causing following
 nodes to be interpreted as TEXT * this._special reverted to 0 after
 "closetag" event [02-template.json] added <p>...</p> around script tag to
 ensure that closing </p> is seen as a tag rather than text node

---
 lib/Tokenizer.js              |  2 ++
 tests/Events/02-template.json | 21 ++++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 4caedb7..aaeba83 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -150,9 +150,11 @@ Tokenizer.prototype.write = function(chunk){
 				this._emitToken("closetag");
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
+				this._special = 0;
 			} else if(whitespace(c)){
 				this._emitToken("closetag");
 				this._state = AFTER_CLOSING_TAG_NAME;
+				this._special = 0;
 			}
 		} else if(this._state === AFTER_CLOSING_TAG_NAME){
 			//skip everything until ">"
diff --git a/tests/Events/02-template.json b/tests/Events/02-template.json
index eda7311..df344b6 100644
--- a/tests/Events/02-template.json
+++ b/tests/Events/02-template.json
@@ -4,8 +4,21 @@
     "handler": {},
     "parser": {}
   },
-  "html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
+  "html": "<p><script type=\"text/template\"><h1>Heading1</h1></script></p>",
   "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "p"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "p",
+        {}
+      ]
+    },
     {
       "event": "opentagname",
       "data": [
@@ -39,6 +52,12 @@
       "data": [
         "script"
       ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "p"
+      ]
     }
   ]
 }
\ No newline at end of file

From 9898b9a8c7917d31684e47e40363775c1a0c2691 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 3 Apr 2013 20:15:46 +0200
Subject: [PATCH 306/450] [proxyhandler] don't use getters/setters

---
 lib/ProxyHandler.js | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/lib/ProxyHandler.js b/lib/ProxyHandler.js
index edfbbf8..1f9f38a 100644
--- a/lib/ProxyHandler.js
+++ b/lib/ProxyHandler.js
@@ -1,19 +1,27 @@
+module.exports = ProxyHandler;
+
 var ProxyHandler = function(cbs){
-	if(cbs) this._cbs = cbs;
+	this._cbs = cbs || {};
 };
 
-ProxyHandler.prototype._cbs = {};
-
-Object.keys(require("./").EVENTS).forEach(function(name){
-	name = "on" + name;
-	Object.defineProperty(ProxyHandler.prototype, name, {
-		enumerable:true, configurable:true,
-		get: function(){ return this._cbs[name]; },
-		set: function(value){
-			//allow functions to be overwritten
-			Object.defineProperty(this, name, {value: value});
-		}
-	});
-});
-
-module.exports = ProxyHandler;
\ No newline at end of file
+var EVENTS = require("./").EVENTS;
+Object.keys(EVENTS).forEach(function(name){
+	if(EVENTS[name] === 0){
+		name = "on" + name;
+		ProxyHandler.prototype[name] = function(){
+			if(this._cbs[name]) this._cbs[name]();
+		};
+	} else if(EVENTS[name] === 1){
+		name = "on" + name;
+		ProxyHandler.prototype[name] = function(a){
+			if(this._cbs[name]) this._cbs[name](a);
+		};
+	} else if(EVENTS[name] === 2){
+		name = "on" + name;
+		ProxyHandler.prototype[name] = function(a, b){
+			if(this._cbs[name]) this._cbs[name](a, b);
+		};
+	} else {
+		throw Error("wrong number of arguments");
+	}
+});
\ No newline at end of file

From 84815a308261548aa379628cff7a6eb51ee5130c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 3 Apr 2013 20:17:24 +0200
Subject: [PATCH 307/450] added CollectingHandler

collects all events and passes them through to another handler

can simulate a reset for the underlying handler using the `restart()`
method
---
 lib/CollectingHandler.js | 55 ++++++++++++++++++++++++++++++++++++++++
 lib/index.js             |  3 +++
 2 files changed, 58 insertions(+)
 create mode 100644 lib/CollectingHandler.js

diff --git a/lib/CollectingHandler.js b/lib/CollectingHandler.js
new file mode 100644
index 0000000..8d63305
--- /dev/null
+++ b/lib/CollectingHandler.js
@@ -0,0 +1,55 @@
+module.exports = CollectingHandler;
+
+function CollectingHandler(cbs){
+	this._cbs = cbs || {};
+	this.events = [];
+}
+
+var EVENTS = require("./").EVENTS;
+Object.keys(EVENTS).forEach(function(name){
+	if(EVENTS[name] === 0){
+		name = "on" + name;
+		CollectingHandler.prototype[name] = function(){
+			this.events.push([name]);
+			if(this._cbs[name]) this._cbs[name]();
+		};
+	} else if(EVENTS[name] === 1){
+		name = "on" + name;
+		CollectingHandler.prototype[name] = function(a){
+			this.events.push([name, a]);
+			if(this._cbs[name]) this._cbs[name](a);
+		};
+	} else if(EVENTS[name] === 2){
+		name = "on" + name;
+		CollectingHandler.prototype[name] = function(a, b){
+			this.events.push([name, a, b]);
+			if(this._cbs[name]) this._cbs[name](a, b);
+		};
+	} else {
+		throw Error("wrong number of arguments");
+	}
+});
+
+CollectingHandler.prototype.onreset = function(){
+	this.events = [];
+	if(this._cbs.onreset) this._cbs.onreset();
+};
+
+CollectingHandler.prototype.restart = function(){
+	if(this._cbs.onreset) this._cbs.onreset();
+
+	for(var i = 0, len = this.events.length; i < len; i++){
+		if(this._cbs[this.events[i][0]]){
+
+			var num = this.events[i].length;
+
+			if(num === 1){
+				this._cbs[this.events[i][0]]();
+			} else if(num === 2){
+				this._cbs[this.events[i][0]](this.events[i][1]);
+			} else {
+				this._cbs[this.events[i][0]](this.events[i][1], this.events[i][2]);
+			}
+		}
+	}
+};
\ No newline at end of file
diff --git a/lib/index.js b/lib/index.js
index 0d261cd..d991259 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -32,6 +32,9 @@ module.exports = {
 	get DomUtils(){
 		return defineProp("DomUtils", require("domutils"));
 	},
+	get CollectingHandler(){
+		return defineProp("CollectingHandler", require("./CollectingHandler.js"));
+	},
 	// For legacy support
 	get DefaultHandler(){
 		return defineProp("DefaultHandler", this.DomHandler);

From 01d8adf9029b1dd4455088964743908639fdf90d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 3 Apr 2013 20:17:49 +0200
Subject: [PATCH 308/450] [tests] use the new CollectingHandler

---
 tests/01-events.js   | 46 +++++--------------------------------
 tests/02-stream.js   | 54 ++++++++------------------------------------
 tests/test-helper.js | 32 +++++++++++++++++++-------
 3 files changed, 39 insertions(+), 93 deletions(-)

diff --git a/tests/01-events.js b/tests/01-events.js
index 73204be..16dbfc7 100644
--- a/tests/01-events.js
+++ b/tests/01-events.js
@@ -1,45 +1,11 @@
-var helper = require("./test-helper.js"),
-	sliceArr = Array.prototype.slice;
+var helper = require("./test-helper.js");
 
 exports.dir = "Events";
 
 exports.test = function(test, cb){
-	var tokens = [], cbs;
-	if(typeof Proxy !== "undefined"){
-		cbs = Proxy.create({ get: function(a, name){
-			if(name === "onend"){
-				return function(){
-					cb(null, tokens.splice(0));
-				};
-			}
-			if(name === "onreset") return function(){};
-			return function(){
-				tokens.push({
-					event: name.substr(2),
-					data: sliceArr.apply(arguments)
-				});
-			};
-		}});
-	}
-	else{
-		cbs = {
-			onerror: cb,
-			onend: function(){
-				cb(null, tokens.splice(0));
-			}
-		};
-		helper.EVENTS.forEach(function(name){
-			cbs["on" + name] = function(){
-				if(name === "text" && tokens.length && tokens[tokens.length-1].event === "text"){
-					tokens[tokens.length-1].data[0] += arguments[0];
-					return;
-				}
-				tokens.push({
-					event: name,
-					data: sliceArr.apply(arguments)
-				});
-			};
-		});
-	}
-	helper.writeToParser(cbs, test.options.parser, test.html);
+	helper.writeToParser(
+		helper.getEventCollector(cb),
+		test.options.parser,
+		test.html
+	);
 };
\ No newline at end of file
diff --git a/tests/02-stream.js b/tests/02-stream.js
index 9e3f67d..72e48fd 100644
--- a/tests/02-stream.js
+++ b/tests/02-stream.js
@@ -1,55 +1,19 @@
 var helper = require("./test-helper.js"),
-	Stream = require("..").Stream,
-	sliceArr = Array.prototype.slice,
+	Stream = require("..").WritableStream,
 	fs = require("fs");
 
 exports.dir = "Stream";
 
 exports.test = function(test, cb){
-	var tokens = [],
-		stream = new Stream(test.options),
-		second = false;
-	
-	if(typeof Proxy !== "undefined"){
-		stream._events = Proxy.create({ get: function(a, name){
-			if(name === "end"){
-				return function(){
-					cb(null, tokens.splice(0));
-					if(!second){
-						second = true;
-						stream.parseComplete(fs.readFileSync(__dirname + test.file).toString());
-					}
-				};
-			}
-			if(helper.EVENTS.indexOf(name) !== -1){
-				return function(){
-					tokens.push({
-						event: name,
-						data: sliceArr.call(arguments, 0)
-					});
-				};
+	var stream = new Stream(test.options),
+	    second = false,
+	    handler = helper.getEventCollector(function(err, events){
+			cb(err, events);
+			if(!second){
+				second = true;
+				stream.parseComplete(fs.readFileSync(__dirname + test.file));
 			}
-		}});
-	}
-	else {
-		stream._events = {
-			error: cb,
-			end: function(){
-				cb(null, tokens.splice(0));
-				if(!second){
-					second = true;
-					stream.parseComplete(fs.readFileSync(__dirname + test.file).toString());
-				}
-			}
-		};
-		helper.EVENTS.forEach(function(name){
-			stream.on(name, function(){
-				tokens.push({
-					event: name,
-					data: sliceArr.apply(arguments)
-				});
-			});
 		});
-	}
+	
 	fs.createReadStream(__dirname + test.file).pipe(stream);
 };
\ No newline at end of file
diff --git a/tests/test-helper.js b/tests/test-helper.js
index 04f3b2b..ccf9804 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -1,22 +1,38 @@
 var htmlparser = require(".."),
 	Parser = htmlparser.Parser,
+	CollectingHandler = htmlparser.CollectingHandler,
 	chunkSize = 5;
 
 exports.writeToParser = function(handler, options, data){
 	var parser = new Parser(handler, options);
 	//first, try to run the test via chunks
-	for(var i = 0; i < data.length; i+=chunkSize){
-		parser.write(data.substring(i, i + chunkSize));
+	for(var i = 0; i < data.length; i += chunkSize){
+		parser.write(data.substr(i, chunkSize));
 	}
-	parser.done();
+	parser.end();
 	//then parse everything
 	parser.parseComplete(data);
 };
 
-var EVENTS = Object.keys(htmlparser.EVENTS);
+//returns a tree structure
+exports.getEventCollector = function(cb){
+	var handler = new CollectingHandler({onerror: cb, onend: function(){
+		cb(null, handler.events
+			.reduce(function(events, arr){
+				if(arr[0] === "onerror" || arr[0] === "onend");
+				else if(arr[0] === "ontext" && events.length && events[events.length-1].event === "text"){
+					events[events.length-1].data[0] += arr[1];
+				} else {
+					events.push({
+						event: arr[0].slice(2),
+						data: arr.slice(1)
+					});
+				}
 
-//remove onend and onerror from events
-EVENTS.splice(EVENTS.indexOf("end"), 1);
-EVENTS.splice(EVENTS.indexOf("error"), 1);
+				return events;
+			}, [])
+		);
+	}});
 
-exports.EVENTS = EVENTS;
\ No newline at end of file
+	return handler;
+};
\ No newline at end of file

From f2542db30b593353f2883164310106c540615f2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 4 Apr 2013 13:04:08 +0200
Subject: [PATCH 309/450] [tests] removed unused `f` var

---
 tests/00-runtests.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index f40ee10..c8c51f7 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -18,7 +18,7 @@ var runCount = 0,
 	var dir = path.resolve(__dirname, test.dir);
 
 	//read files, load them, run them
-	var f = fs
+	fs
 	.readdirSync(dir)
 	.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
 	.map(function(name){

From fcb35f0aa51a08318928143c2b8168d091082a97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 4 Apr 2013 13:05:36 +0200
Subject: [PATCH 310/450] 3.0.1

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 8520c8e..4e2e766 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "3.0.0",
+	"version": "3.0.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 779e608e0a3bf8791e478a508a1bbc3233b63308 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 4 Apr 2013 13:33:01 +0200
Subject: [PATCH 311/450] 3.0.2

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 4e2e766..fb9d84f 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "3.0.1",
+	"version": "3.0.2",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From c848d69306be64666b17cf7745cc6aaa0b7680b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 4 Apr 2013 17:32:29 +0200
Subject: [PATCH 312/450] [bench] use setImmediate instead of process.nextTick

---
 tests/bench.js | 58 +++++++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/tests/bench.js b/tests/bench.js
index 1e7b599..a5b99cf 100644
--- a/tests/bench.js
+++ b/tests/bench.js
@@ -2,10 +2,10 @@
 var node_xml = require("node-xml");
 
 function NodeXmlParser() {
-    var parser = new node_xml.SaxParser(function(cb) { });
-    this.parse = function(s) {
+	var parser = new node_xml.SaxParser(function(cb) { });
+	this.parse = function(s) {
 	parser.parseString(s);
-    };
+	};
 }
 
 var p = new NodeXmlParser();
@@ -13,10 +13,10 @@ var p = new NodeXmlParser();
 var libxml = require("libxmljs");
 
 function LibXmlJsParser() {
-    var parser = new libxml.SaxPushParser(function(cb) { });
-    this.parse = function(s) {
+	var parser = new libxml.SaxPushParser(function(cb) { });
+	this.parse = function(s) {
 	parser.push(s, false);
-    };
+	};
 }
 
 var p = new LibXmlJsParser();
@@ -24,7 +24,7 @@ var p = new LibXmlJsParser();
 var sax = require('sax');
 
 function SaxParser() {
-    var parser = sax.parser();
+	var parser = sax.parser();
 	this.parse = function(s) {
 	parser.write(s);
 	}
@@ -35,10 +35,10 @@ var p = new SaxParser();
 var expat = require('node-expat');
 
 function ExpatParser() {
-    var parser = new expat.Parser();
-    this.parse = function(s) {
+	var parser = new expat.Parser();
+	this.parse = function(s) {
 	parser.parse(s, false);
-    };
+	};
 }
 
 var p = new ExpatParser();
@@ -46,31 +46,31 @@ var p = new ExpatParser();
 var htmlparser = require('htmlparser');
 
 function HtmlParser() {
-    var handler = new htmlparser.DefaultHandler();
-    var parser = new htmlparser.Parser(handler);
-    this.parse = function(s) {
-    parser.parseComplete(s);
-    };
+	var handler = new htmlparser.DefaultHandler();
+	var parser = new htmlparser.Parser(handler);
+	this.parse = function(s) {
+	parser.parseComplete(s);
+	};
 }
 
 var p = new HtmlParser();
 */
-var htmlparser2 = require('htmlparser2/lib/Parser.js');
+var htmlparser2 = require('../lib/Parser.js');
 
 // provide callbacks
 // otherwise, parsing would be optimized
 var emptyCBs = {
-    onopentagname: function(){},
-    onattribute: function(){},
-    ontext: function(){},
-    onclosetag: function(){}
+	onopentagname: function(){},
+	onattribute: function(){},
+	ontext: function(){},
+	onclosetag: function(){}
 };
 
 function HtmlParser2() {
-    var parser = new htmlparser2(emptyCBs);
-    this.parse = function(s) {
-    parser.write(s);
-    };
+	var parser = new htmlparser2(emptyCBs);
+	this.parse = function(s) {
+		parser.write(s);
+	};
 }
 
 var p = new HtmlParser2();
@@ -79,16 +79,16 @@ var p = new HtmlParser2();
 p.parse("<r>");
 var nEl = 0;
 (function d() {
-    p.parse("<foo bar='baz'>quux</foo>");
-    nEl++;
-    process.nextTick(d);
+	p.parse("<foo bar='baz'>quux</foo>");
+	nEl++;
+	setImmediate(d);
 })();
 
 var its =[];
 setInterval(function() {
-    console.log(nEl + " el/s");
+	console.log(nEl + " el/s");
 	its.push(nEl);
-    nEl = 0;
+	nEl = 0;
 }, 1e3);
 
 process.on('SIGINT', function () {

From 138462032e732b1c7c71c5573374e38946eaf663 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 5 Apr 2013 11:02:57 +0200
Subject: [PATCH 313/450] [bench] try to test all available modules

---
 tests/bench.js | 209 ++++++++++++++++++++++++++-----------------------
 1 file changed, 109 insertions(+), 100 deletions(-)

diff --git a/tests/bench.js b/tests/bench.js
index a5b99cf..91bca68 100644
--- a/tests/bench.js
+++ b/tests/bench.js
@@ -1,100 +1,109 @@
-/*
-var node_xml = require("node-xml");
-
-function NodeXmlParser() {
-	var parser = new node_xml.SaxParser(function(cb) { });
-	this.parse = function(s) {
-	parser.parseString(s);
-	};
-}
-
-var p = new NodeXmlParser();
-*//*
-var libxml = require("libxmljs");
-
-function LibXmlJsParser() {
-	var parser = new libxml.SaxPushParser(function(cb) { });
-	this.parse = function(s) {
-	parser.push(s, false);
-	};
-}
-
-var p = new LibXmlJsParser();
-*//*
-var sax = require('sax');
-
-function SaxParser() {
-	var parser = sax.parser();
-	this.parse = function(s) {
-	parser.write(s);
-	}
-}
-
-var p = new SaxParser();
-*//*
-var expat = require('node-expat');
-
-function ExpatParser() {
-	var parser = new expat.Parser();
-	this.parse = function(s) {
-	parser.parse(s, false);
-	};
-}
-
-var p = new ExpatParser();
-*//*
-var htmlparser = require('htmlparser');
-
-function HtmlParser() {
-	var handler = new htmlparser.DefaultHandler();
-	var parser = new htmlparser.Parser(handler);
-	this.parse = function(s) {
-	parser.parseComplete(s);
-	};
-}
-
-var p = new HtmlParser();
-*/
-var htmlparser2 = require('../lib/Parser.js');
-
-// provide callbacks
-// otherwise, parsing would be optimized
-var emptyCBs = {
-	onopentagname: function(){},
-	onattribute: function(){},
-	ontext: function(){},
-	onclosetag: function(){}
-};
-
-function HtmlParser2() {
-	var parser = new htmlparser2(emptyCBs);
-	this.parse = function(s) {
-		parser.write(s);
-	};
-}
-
-var p = new HtmlParser2();
-
-
-p.parse("<r>");
-var nEl = 0;
-(function d() {
-	p.parse("<foo bar='baz'>quux</foo>");
-	nEl++;
-	setImmediate(d);
-})();
-
-var its =[];
-setInterval(function() {
-	console.log(nEl + " el/s");
-	its.push(nEl);
-	nEl = 0;
-}, 1e3);
-
-process.on('SIGINT', function () {
-	var average = its.reduce(function(average, v){
-		return average+v;
-	}) / its.length;
-	console.log("Average:", average, "el/s");
-	process.exit(0);
-});
\ No newline at end of file
+//dependencies have to be installed manually
+
+var ben = require("ben");
+
+var parsers = [];
+
+(function(){
+	try{
+		var node_xml = require("node-xml");
+
+		function NodeXmlParser() {
+			var parser = new node_xml.SaxParser(function(cb) { });
+			this.parse = function(s) {
+				parser.parseString(s);
+			};
+		}
+		parsers.push([NodeXmlParser, "node-xml"]);
+	} catch(e){}
+}());
+
+(function(){
+	try{
+		var libxml = require("libxmljs");
+
+		function LibXmlJsParser() {
+			var parser = new libxml.SaxPushParser(function(cb) { });
+			this.parse = function(s) {
+				parser.push(s, false);
+			};
+		}
+
+		parsers.push([LibXmlJsParser, "libxmljs"]);
+	} catch(e){}
+}());
+
+(function(){
+	try{
+		var sax = require('sax');
+
+		function SaxParser() {
+			var parser = sax.parser();
+			this.parse = function(s) {
+				parser.write(s);
+			};
+		}
+
+		parsers.push([SaxParser, "sax"]);
+	} catch(e){}
+}());
+
+(function(){
+	try{
+		var expat = require('node-expat');
+
+		function ExpatParser() {
+			var parser = new expat.Parser();
+			this.parse = function(s) {
+				parser.parse(s, false);
+			};
+		}
+
+		parsers.push([ExpatParser, "node-expat"]);
+	} catch(e){}
+}());
+
+(function(){
+	try{
+		var htmlparser = require('htmlparser');
+
+		function HtmlParser() {
+			var handler = new htmlparser.DefaultHandler();
+			var parser = new htmlparser.Parser(handler);
+			this.parse = function(s) {
+				parser.parseComplete(s);
+			};
+		}
+
+		parsers.push([HtmlParser, "htmlparser"]);
+	} catch(e){}
+}());
+
+(function(){
+	try{
+		var htmlparser2 = require('../lib/Parser.js');
+
+		function HtmlParser2() {
+			var parser = new htmlparser2();
+			this.parse = function(s) {
+				parser.write(s);
+			};
+		}
+
+		parsers.push([HtmlParser2, "htmlparser2"]);
+	} catch(e){}
+}());
+
+parsers.forEach(function(arr){
+	var p = new arr[0]();
+	var name = arr[1];
+
+	process.stdout.write(name + ":" + Array(14-name.length).join(" "));
+
+	p.parse("<r>");
+	var num = ben(1e6, function(){
+		p.parse("<foo bar='baz'>quux</foo>");
+	});
+
+	console.log((num * 1e3).toFixed(2), "ms/el");
+});

From 9f465cadd35e0de141aa93b99195145616d01782 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 5 Apr 2013 21:01:33 +0200
Subject: [PATCH 314/450] [bench] removed unused functions, improved output

---
 tests/bench.js | 182 ++++++++++++++++++++++++-------------------------
 1 file changed, 90 insertions(+), 92 deletions(-)

diff --git a/tests/bench.js b/tests/bench.js
index 91bca68..462b3b3 100644
--- a/tests/bench.js
+++ b/tests/bench.js
@@ -4,97 +4,86 @@ var ben = require("ben");
 
 var parsers = [];
 
-(function(){
-	try{
-		var node_xml = require("node-xml");
-
-		function NodeXmlParser() {
-			var parser = new node_xml.SaxParser(function(cb) { });
-			this.parse = function(s) {
-				parser.parseString(s);
-			};
-		}
-		parsers.push([NodeXmlParser, "node-xml"]);
-	} catch(e){}
-}());
-
-(function(){
-	try{
-		var libxml = require("libxmljs");
-
-		function LibXmlJsParser() {
-			var parser = new libxml.SaxPushParser(function(cb) { });
-			this.parse = function(s) {
-				parser.push(s, false);
-			};
-		}
-
-		parsers.push([LibXmlJsParser, "libxmljs"]);
-	} catch(e){}
-}());
-
-(function(){
-	try{
-		var sax = require('sax');
-
-		function SaxParser() {
-			var parser = sax.parser();
-			this.parse = function(s) {
-				parser.write(s);
-			};
-		}
-
-		parsers.push([SaxParser, "sax"]);
-	} catch(e){}
-}());
-
-(function(){
-	try{
-		var expat = require('node-expat');
-
-		function ExpatParser() {
-			var parser = new expat.Parser();
-			this.parse = function(s) {
-				parser.parse(s, false);
-			};
-		}
-
-		parsers.push([ExpatParser, "node-expat"]);
-	} catch(e){}
-}());
-
-(function(){
-	try{
-		var htmlparser = require('htmlparser');
-
-		function HtmlParser() {
-			var handler = new htmlparser.DefaultHandler();
-			var parser = new htmlparser.Parser(handler);
-			this.parse = function(s) {
-				parser.parseComplete(s);
-			};
-		}
-
-		parsers.push([HtmlParser, "htmlparser"]);
-	} catch(e){}
-}());
-
-(function(){
-	try{
-		var htmlparser2 = require('../lib/Parser.js');
-
-		function HtmlParser2() {
-			var parser = new htmlparser2();
-			this.parse = function(s) {
-				parser.write(s);
-			};
-		}
-
-		parsers.push([HtmlParser2, "htmlparser2"]);
-	} catch(e){}
-}());
-
-parsers.forEach(function(arr){
+
+try{
+	var node_xml = require("node-xml");
+
+	function NodeXmlParser() {
+		var parser = new node_xml.SaxParser(function(cb) { });
+		this.parse = function(s) {
+			parser.parseString(s);
+		};
+	}
+	parsers.push([NodeXmlParser, "node-xml"]);
+} catch(e){}
+
+try{
+	var libxml = require("libxmljs");
+
+	function LibXmlJsParser() {
+		var parser = new libxml.SaxPushParser(function(cb) { });
+		this.parse = function(s) {
+			parser.push(s, false);
+		};
+	}
+
+	parsers.push([LibXmlJsParser, "libxmljs"]);
+} catch(e){}
+
+try{
+	var sax = require('sax');
+
+	function SaxParser() {
+		var parser = sax.parser();
+		this.parse = function(s) {
+			parser.write(s);
+		};
+	}
+
+	parsers.push([SaxParser, "sax"]);
+} catch(e){}
+
+try{
+	var expat = require('node-expat');
+
+	function ExpatParser() {
+		var parser = new expat.Parser();
+		this.parse = function(s) {
+			parser.parse(s, false);
+		};
+	}
+
+	parsers.push([ExpatParser, "node-expat"]);
+} catch(e){}
+
+try{
+	var htmlparser = require('htmlparser');
+
+	function HtmlParser() {
+		var handler = new htmlparser.DefaultHandler();
+		var parser = new htmlparser.Parser(handler);
+		this.parse = function(s) {
+			parser.parseComplete(s);
+		};
+	}
+
+	parsers.push([HtmlParser, "htmlparser"]);
+} catch(e){}
+
+try{
+	var htmlparser2 = require('../lib/Parser.js');
+
+	function HtmlParser2() {
+		var parser = new htmlparser2();
+		this.parse = function(s) {
+			parser.write(s);
+		};
+	}
+
+	parsers.push([HtmlParser2, "htmlparser2"]);
+} catch(e){}
+
+var results = parsers.map(function(arr){
 	var p = new arr[0]();
 	var name = arr[1];
 
@@ -105,5 +94,14 @@ parsers.forEach(function(arr){
 		p.parse("<foo bar='baz'>quux</foo>");
 	});
 
-	console.log((num * 1e3).toFixed(2), "ms/el");
+	console.log((num > 0.01 ? "" : "0") + (num * 1e3).toFixed(2), "ms/el");
+
+	return [name, num];
 });
+
+console.log(
+	"\nWinner:",
+	results.sort(function(a, b){
+		return a[1] - b[1];
+	})[0][0]
+);
\ No newline at end of file

From 2f38140e8758c02b3ac6519f9d5a6238a4ed6bc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 5 Apr 2013 21:02:20 +0200
Subject: [PATCH 315/450] [readme] updated benchmarks

also use the more readable unit ms/el
---
 README.md | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 9add2fb..11dba9c 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ var parser = new htmlparser.Parser({
 		}
 	}
 });
-parser.write("Xyz <script language= javascript>var foo = '<<bar>>';< /  script>");
+parser.write("Xyz <script type="text/javascript">var foo = '<<bar>>';< /  script>");
 parser.done();
 ```
 
@@ -53,14 +53,20 @@ new htmlparser.FeedHandler(function(<error> error, <object> feed){
 ```
 
 ##Performance
-Using a slightly modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on a MacBook (late 2010)):
+Using a modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on OSX):
 
-* [htmlparser](https://github.com/tautologistics/node-htmlparser): 51779 el/s
-* [sax.js](https://github.com/isaacs/sax-js): 53169 el/s
-* [node-expat](https://github.com/astro/node-expat): 103388 el/s
-* [htmlparser2](https://github.com/fb55/node-htmlparser): 118614 el/s
+```
+node-xml:     28.03 ms/el
+libxmljs:     11.11 ms/el
+sax:          26.92 ms/el
+node-expat:   07.32 ms/el
+htmlparser:   16.40 ms/el
+htmlparser2:  06.32 ms/el
+
+Winner: htmlparser2
+```
 
-The test may be found in `tests/bench.js`.
+The test can be found in [`tests/bench.js`](tests/bench.js).
 
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
 This is a fork of the project above. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 

From bc0086219cddae44febd30570a3f66472d247168 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 5 Apr 2013 21:05:51 +0200
Subject: [PATCH 316/450] [doc] call `end`, use single quotes

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 11dba9c..f61aed2 100644
--- a/README.md
+++ b/README.md
@@ -24,8 +24,8 @@ var parser = new htmlparser.Parser({
 		}
 	}
 });
-parser.write("Xyz <script type="text/javascript">var foo = '<<bar>>';< /  script>");
-parser.done();
+parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';< /  script>");
+parser.end();
 ```
 
 Output (simplified):

From 6935c0df81034b0e3ad6d5892f05b9ac17162e9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 5 Apr 2013 21:11:36 +0200
Subject: [PATCH 317/450] [doc] updated section about node-htmlparser

---
 README.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index f61aed2..a0ce491 100644
--- a/README.md
+++ b/README.md
@@ -69,10 +69,8 @@ Winner: htmlparser2
 The test can be found in [`tests/bench.js`](tests/bench.js).
 
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
-This is a fork of the project above. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). Besides, the code is much better structured, has less duplications and is remarkably faster than the original. 
+This is a fork of the `htmlparser` module. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). `htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore.
 
-The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). I also fixed a couple of bugs & included some pull requests for the original project (eg. [RDF feed support](https://github.com/tautologistics/node-htmlparser/pull/35)).
-
-The support for location data and verbose output was removed a couple of versions ago. It's still available in the [verbose branch](https://github.com/FB55/node-htmlparser/tree/verbose). 
+The parser now provides a callback interface close to [sax.js](https://github.com/isaacs/sax-js) (originally targeted at [readabilitySAX](https://github.com/fb55/readabilitysax)). As a result, old handlers won't work anymore.
 
 The `DefaultHandler` and the `RssHandler` were renamed to clarify their purpose (to `DomHandler` and `FeedHandler`). The old names are still available when requiring `htmlparser2`, so your code should work as expected.

From 8a91aac7566e05689ffcaae9cd31e32b0ed3b94e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Tue, 9 Apr 2013 10:38:03 +0200
Subject: [PATCH 318/450] renamed repository, 3.0.3

---
 README.md    | 8 ++++----
 package.json | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index a0ce491..2c8002b 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/node-htmlparser.png)](http://travis-ci.org/fb55/node-htmlparser)
+#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/htmlparser2.png)](http://travis-ci.org/fb55/htmlparser2)
 
 A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
 
@@ -37,12 +37,12 @@ JS! Hooray!
 That's it?!
 ```
 
-Read more about the parser in the [wiki](https://github.com/FB55/node-htmlparser/wiki/Parser-options).
+Read more about the parser in the [wiki](https://github.com/fb55/htmlparser2/wiki/Parser-options).
 
 ##Get a DOM
 The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the `DomUtils` helper.
 
-The `DomHandler`, while still bundled with this module, was recently moved to it's [own module](https://github.com/FB55/domhandler). Have a look at it for further information.
+The `DomHandler`, while still bundled with this module, was recently moved to it's [own module](https://github.com/fb55/domhandler). Have a look at it for further information.
 
 ##Parsing RSS/RDF/Atom Feeds
 
@@ -53,7 +53,7 @@ new htmlparser.FeedHandler(function(<error> error, <object> feed){
 ```
 
 ##Performance
-Using a modified version of [node-expat](https://github.com/astro/node-expat)s `bench.js`, I received the following results (on OSX):
+Using a modified version of [node-expat](https://github.com/astro/node-expat)'s `bench.js`, I received the following results (on OSX):
 
 ```
 node-xml:     28.03 ms/el
diff --git a/package.json b/package.json
index fb9d84f..3ac697b 100644
--- a/package.json
+++ b/package.json
@@ -1,17 +1,17 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "3.0.2",
+	"version": "3.0.3",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
 	"repository": {
 		"type": "git",
-		"url": "git://github.com/fb55/node-htmlparser.git"
+		"url": "git://github.com/fb55/htmlparser2.git"
 	},
 	"bugs": {
 		"mail": "me@feedic.com",
-		"url": "http://github.com/fb55/node-htmlparser/issues"
+		"url": "http://github.com/fb55/htmlparser2/issues"
 	},
 	"directories": {
 		"lib": "lib/"
@@ -28,6 +28,6 @@
 	},
 	"licenses": [{
 		"type": "MIT",
-		"url": "http://github.com/tautologistics/node-htmlparser/raw/master/LICENSE"
+		"url": "http://github.com/fb55/htmlparser2/raw/master/LICENSE"
 	}]
 }

From e7ad785502b1ef3bb79cb39f9b563b1081eea677 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Wed, 10 Apr 2013 11:10:36 +0300
Subject: [PATCH 319/450] use DomUtils.getText in fetch, split getElements

---
 lib/FeedHandler.js | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index cb86cdb..1f9ed60 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -11,13 +11,16 @@ require("util").inherits(FeedHandler, DomHandler);
 
 FeedHandler.prototype.init = DomHandler;
 
-function getElements(what, where, one, recurse){
-	if(one) return DomUtils.getElementsByTagName(what, where, recurse, 1)[0];
-	return DomUtils.getElementsByTagName(what, where, recurse);
+function getElements(what, where){
+	return DomUtils.getElementsByTagName(what, where, false);
+}
+function getOneElement(what, where){
+	return DomUtils.getElementsByTagName(what, where, false, 1)[0];
 }
 function fetch(what, where, recurse){
-	var ret = DomUtils.getElementsByTagName(what, where, recurse, 1);
-	return ret.length > 0 && ret[0].children.length > 0 && ret[0].children[0].data;
+	return DomUtils.getText(
+		DomUtils.getElementsByTagName(what, where, !!recurse, 1)
+	);
 }
 
 var isValidFeed = function(value) {
@@ -26,7 +29,7 @@ var isValidFeed = function(value) {
 
 FeedHandler.prototype.onend = function() {
 	var feed = {},
-		feedRoot = getElements(isValidFeed, this.dom, true),
+		feedRoot = getOneElement(isValidFeed, this.dom),
 		tmp, childs;
 
 	if (feedRoot) {
@@ -36,7 +39,7 @@ FeedHandler.prototype.onend = function() {
 			feed.type = "atom";
 			if(tmp = fetch("id", childs)) feed.id = tmp;
 			if(tmp = fetch("title", childs)) feed.title = tmp;
-			if((tmp = getElements("link", childs, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
+			if((tmp = getOneElement("link", childs)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
 			if(tmp = fetch("subtitle", childs)) feed.description = tmp;
 			if(tmp = fetch("updated", childs)) feed.updated = new Date(tmp);
 			if(tmp = fetch("email", childs, true)) feed.author = tmp;
@@ -48,13 +51,13 @@ FeedHandler.prototype.onend = function() {
 
 				if(tmp = fetch("id", item)) entry.id = tmp;
 				if(tmp = fetch("title", item)) entry.title = tmp;
-				if((tmp = getElements("link", item, true)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
+				if((tmp = getOneElement("link", item)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
 				if(tmp = fetch("summary", item)) entry.description = tmp;
 				if(tmp = fetch("updated", item)) entry.pubDate = new Date(tmp);
 				return entry;
 			});
 		} else{
-			childs = getElements("channel", feedRoot.children, true).children;
+			childs = getOneElement("channel", feedRoot.children).children;
 
 			feed.type = feedRoot.name.substr(0, 3);
 			feed.id = "";
@@ -84,4 +87,4 @@ FeedHandler.prototype.onend = function() {
 	);
 };
 
-module.exports = FeedHandler;
\ No newline at end of file
+module.exports = FeedHandler;

From 6b995abf297eeb46a6c3f68da0a7b04bda906774 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 13:27:17 +0200
Subject: [PATCH 320/450] [tokenizer] name states consistently

---
 lib/Tokenizer.js | 206 +++++++++++++++++++++++------------------------
 1 file changed, 103 insertions(+), 103 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index aaeba83..35a17a9 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -3,9 +3,9 @@ module.exports = Tokenizer;
 var i = 0,
 
     TEXT = i++,
-    TAG_START = i++, //after <
+    BEFORE_TAG_NAME = i++, //after <
     IN_TAG_NAME = i++,
-    CLOSING_TAG_START = i++,
+    BEFORE_CLOSING_TAG_NAME = i++,
     IN_CLOSING_TAG_NAME = i++,
     AFTER_CLOSING_TAG_NAME = i++,
 
@@ -19,7 +19,7 @@ var i = 0,
     IN_ATTRIBUTE_VALUE_NO_QUOTES = i++,
 
     //declarations
-    DECLARATION_START = i++, // !
+    BEFORE_DECLARATION = i++, // !
     IN_DECLARATION = i++,
 
     //processing instructions
@@ -28,43 +28,43 @@ var i = 0,
     //comments
     BEFORE_COMMENT = i++,
     IN_COMMENT = i++,
-    COMMENT_END_1 = i++,
-    COMMENT_END_2 = i++,
+    AFTER_COMMENT_1 = i++,
+    AFTER_COMMENT_2 = i++,
 
     //cdata
-    CDATA_1 = i++, // [
-    CDATA_2 = i++, // C
-    CDATA_3 = i++, // D
-    CDATA_4 = i++, // A
-    CDATA_5 = i++, // T
-    CDATA_6 = i++, // A
+    BEFORE_CDATA_1 = i++, // [
+    BEFORE_CDATA_2 = i++, // C
+    BEFORE_CDATA_3 = i++, // D
+    BEFORE_CDATA_4 = i++, // A
+    BEFORE_CDATA_5 = i++, // T
+    BEFORE_CDATA_6 = i++, // A
     IN_CDATA = i++,// [
-    CDATA_END_1 = i++, // ]
-    CDATA_END_2 = i++, // ]
+    AFTER_CDATA_1 = i++, // ]
+    AFTER_CDATA_2 = i++, // ]
 
     //special tags
-    SPECIAL_START = i++, //S
-    SPECIAL_END = i++,   //S
-
-    SCRIPT_1 = i++, //C
-    SCRIPT_2 = i++, //R
-    SCRIPT_3 = i++, //I
-    SCRIPT_4 = i++, //P
-    SCRIPT_5 = i++, //T
-    SCRIPT_END_1 = i++, //C
-    SCRIPT_END_2 = i++, //R
-    SCRIPT_END_3 = i++, //I
-    SCRIPT_END_4 = i++, //P
-    SCRIPT_END_5 = i++, //T
-
-    STYLE_1 = i++, //T
-    STYLE_2 = i++, //Y
-    STYLE_3 = i++, //L
-    STYLE_4 = i++, //E
-    STYLE_END_1 = i++, //T
-    STYLE_END_2 = i++, //Y
-    STYLE_END_3 = i++, //L
-    STYLE_END_4 = i++; //E
+    BEFORE_SPECIAL = i++, //S
+    BEFORE_SPECIAL_END = i++,   //S
+
+    BEFORE_SCRIPT_1 = i++, //C
+    BEFORE_SCRIPT_2 = i++, //R
+    BEFORE_SCRIPT_3 = i++, //I
+    BEFORE_SCRIPT_4 = i++, //P
+    BEFORE_SCRIPT_5 = i++, //T
+    AFTER_SCRIPT_1 = i++, //C
+    AFTER_SCRIPT_2 = i++, //R
+    AFTER_SCRIPT_3 = i++, //I
+    AFTER_SCRIPT_4 = i++, //P
+    AFTER_SCRIPT_5 = i++, //T
+
+    BEFORE_STYLE_1 = i++, //T
+    BEFORE_STYLE_2 = i++, //Y
+    BEFORE_STYLE_3 = i++, //L
+    BEFORE_STYLE_4 = i++, //E
+    AFTER_STYLE_1 = i++, //T
+    AFTER_STYLE_2 = i++, //Y
+    AFTER_STYLE_3 = i++, //L
+    AFTER_STYLE_4 = i++; //E
 
 
 function whitespace(c){
@@ -91,18 +91,18 @@ Tokenizer.prototype.write = function(chunk){
 		if(this._state === TEXT){
 			if(c === "<"){
 				this._emitIfToken("text");
-				this._state = TAG_START;
+				this._state = BEFORE_TAG_NAME;
 				this._sectionStart = this._index;
 			}
-		} else if(this._state === TAG_START){
+		} else if(this._state === BEFORE_TAG_NAME){
 			if(c === "/"){
-				this._state = CLOSING_TAG_START;
+				this._state = BEFORE_CLOSING_TAG_NAME;
 			} else if(c === ">" || this._special > 0) {
 				this._state = TEXT;
 			} else {
 				if(whitespace(c));
 				else if(c === "!"){
-					this._state = DECLARATION_START;
+					this._state = BEFORE_DECLARATION;
 					this._sectionStart = this._index + 1;
 				} else if(c === "?"){
 					this._state = IN_PROCESSING_INSTRUCTION;
@@ -111,7 +111,7 @@ Tokenizer.prototype.write = function(chunk){
 					(!this._options || !this._options.xmlMode) &&
 					(c === "s" || c === "S")
 				){
-					this._state = SPECIAL_START;
+					this._state = BEFORE_SPECIAL;
 					this._sectionStart = this._index;
 				} else {
 					this._state = IN_TAG_NAME;
@@ -133,13 +133,13 @@ Tokenizer.prototype.write = function(chunk){
 				this._emitToken("opentagname");
 				this._state = BEFORE_ATTRIBUTE_NAME;
 			}
-		} else if(this._state === CLOSING_TAG_START){
+		} else if(this._state === BEFORE_CLOSING_TAG_NAME){
 			if(whitespace(c));
 			else if(c === ">"){
 				this._state = TEXT;
 			} else if(this._special > 0){
 				if(c === "s" || c === "S"){
-					this._state = SPECIAL_END;
+					this._state = BEFORE_SPECIAL_END;
 				}
 			} else {
 				this._state = IN_CLOSING_TAG_NAME;
@@ -238,8 +238,8 @@ Tokenizer.prototype.write = function(chunk){
 		/*
 		*	declarations
 		*/
-		else if(this._state === DECLARATION_START){
-			if(c === "[") this._state = CDATA_1;
+		else if(this._state === BEFORE_DECLARATION){
+			if(c === "[") this._state = BEFORE_CDATA_1;
 			else if(c === "-") this._state = BEFORE_COMMENT;
 			else this._state = IN_DECLARATION;
 		} else if(this._state === IN_DECLARATION){
@@ -272,11 +272,11 @@ Tokenizer.prototype.write = function(chunk){
 				this._state = IN_DECLARATION;
 			}
 		} else if(this._state === IN_COMMENT){
-			if(c === "-") this._state = COMMENT_END_1;
-		} else if(this._state === COMMENT_END_1){
-			if(c === "-") this._state = COMMENT_END_2;
+			if(c === "-") this._state = AFTER_COMMENT_1;
+		} else if(this._state === AFTER_COMMENT_1){
+			if(c === "-") this._state = AFTER_COMMENT_2;
 			else this._state = IN_COMMENT;
-		} else if(this._state === COMMENT_END_2){
+		} else if(this._state === AFTER_COMMENT_2){
 			if(c === ">"){
 				//remove 2 trailing chars
 				this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
@@ -290,22 +290,22 @@ Tokenizer.prototype.write = function(chunk){
 		/*
 		*	cdata
 		*/
-		else if(this._state === CDATA_1){
-			if(c === "C") this._state = CDATA_2;
+		else if(this._state === BEFORE_CDATA_1){
+			if(c === "C") this._state = BEFORE_CDATA_2;
 			else this._state = IN_DECLARATION;
-		} else if(this._state === CDATA_2){
-			if(c === "D") this._state = CDATA_3;
+		} else if(this._state === BEFORE_CDATA_2){
+			if(c === "D") this._state = BEFORE_CDATA_3;
 			else this._state = IN_DECLARATION;
-		} else if(this._state === CDATA_3){
-			if(c === "A") this._state = CDATA_4;
+		} else if(this._state === BEFORE_CDATA_3){
+			if(c === "A") this._state = BEFORE_CDATA_4;
 			else this._state = IN_DECLARATION;
-		} else if(this._state === CDATA_4){
-			if(c === "T") this._state = CDATA_5;
+		} else if(this._state === BEFORE_CDATA_4){
+			if(c === "T") this._state = BEFORE_CDATA_5;
 			else this._state = IN_DECLARATION;
-		} else if(this._state === CDATA_5){
-			if(c === "A") this._state = CDATA_6;
+		} else if(this._state === BEFORE_CDATA_5){
+			if(c === "A") this._state = BEFORE_CDATA_6;
 			else this._state = IN_DECLARATION;
-		} else if(this._state === CDATA_6){
+		} else if(this._state === BEFORE_CDATA_6){
 			if(c === "["){
 				this._state = IN_CDATA;
 				this._sectionStart = this._index + 1;
@@ -313,11 +313,11 @@ Tokenizer.prototype.write = function(chunk){
 				this._state = IN_DECLARATION;
 			}
 		} else if(this._state === IN_CDATA){
-			if(c === "]") this._state = CDATA_END_1;
-		} else if(this._state === CDATA_END_1){
-			if(c === "]") this._state = CDATA_END_2;
+			if(c === "]") this._state = AFTER_CDATA_1;
+		} else if(this._state === AFTER_CDATA_1){
+			if(c === "]") this._state = AFTER_CDATA_2;
 			else this._state = IN_CDATA;
-		} else if(this._state === CDATA_END_2){
+		} else if(this._state === AFTER_CDATA_2){
 			if(c === ">"){
 				//remove 2 trailing chars
 				this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
@@ -331,20 +331,20 @@ Tokenizer.prototype.write = function(chunk){
 		/*
 		* special tags
 		*/
-		else if(this._state === SPECIAL_START){
+		else if(this._state === BEFORE_SPECIAL){
 			if(c === "c" || c === "C"){
-				this._state = SCRIPT_1;
+				this._state = BEFORE_SCRIPT_1;
 			} else if(c === "t" || c === "T"){
-				this._state = STYLE_1;
+				this._state = BEFORE_STYLE_1;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
-		} else if(this._state === SPECIAL_END){
+		} else if(this._state === BEFORE_SPECIAL_END){
 			if(this._special === 1 && (c === "c" || c === "C")){
-				this._state = SCRIPT_END_1;
+				this._state = AFTER_SCRIPT_1;
 			} else if(this._special === 2 && (c === "t" || c === "T")){
-				this._state = STYLE_END_1;
+				this._state = AFTER_STYLE_1;
 			} 
 			else this._state = TEXT;
 		}
@@ -352,35 +352,35 @@ Tokenizer.prototype.write = function(chunk){
 		/*
 		* script
 		*/
-		else if(this._state === SCRIPT_1){
+		else if(this._state === BEFORE_SCRIPT_1){
 			if(c === "r" || c === "R"){
-				this._state = SCRIPT_2;
+				this._state = BEFORE_SCRIPT_2;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
-		} else if(this._state === SCRIPT_2){
+		} else if(this._state === BEFORE_SCRIPT_2){
 			if(c === "i" || c === "I"){
-				this._state = SCRIPT_3;
+				this._state = BEFORE_SCRIPT_3;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
-		} else if(this._state === SCRIPT_3){
+		} else if(this._state === BEFORE_SCRIPT_3){
 			if(c === "p" || c === "P"){
-				this._state = SCRIPT_4;
+				this._state = BEFORE_SCRIPT_4;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
-		} else if(this._state === SCRIPT_4){
+		} else if(this._state === BEFORE_SCRIPT_4){
 			if(c === "t" || c === "T"){
-				this._state = SCRIPT_5;
+				this._state = BEFORE_SCRIPT_5;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
-		} else if(this._state === SCRIPT_5){
+		} else if(this._state === BEFORE_SCRIPT_5){
 			if(c === "/" || c === ">" || whitespace(c)){
 				this._special = 1;
 			}
@@ -388,27 +388,27 @@ Tokenizer.prototype.write = function(chunk){
 			this._index--; //consume the token again
 		}
 
-		else if(this._state === SCRIPT_END_1){
+		else if(this._state === AFTER_SCRIPT_1){
 			if(c === "r" || c === "R"){
-				this._state = SCRIPT_END_2;
+				this._state = AFTER_SCRIPT_2;
 			} 
 			else this._state = TEXT;
-		} else if(this._state === SCRIPT_END_2){
+		} else if(this._state === AFTER_SCRIPT_2){
 			if(c === "i" || c === "I"){
-				this._state = SCRIPT_END_3;
+				this._state = AFTER_SCRIPT_3;
 			} 
 			else this._state = TEXT;
-		} else if(this._state === SCRIPT_END_3){
+		} else if(this._state === AFTER_SCRIPT_3){
 			if(c === "p" || c === "P"){
-				this._state = SCRIPT_END_4;
+				this._state = AFTER_SCRIPT_4;
 			} 
 			else this._state = TEXT;
-		} else if(this._state === SCRIPT_END_4){
+		} else if(this._state === AFTER_SCRIPT_4){
 			if(c === "t" || c === "T"){
-				this._state = SCRIPT_END_5;
+				this._state = AFTER_SCRIPT_5;
 			} 
 			else this._state = TEXT;
-		} else if(this._state === SCRIPT_END_5){
+		} else if(this._state === AFTER_SCRIPT_5){
 			if(c === ">" || whitespace(c)){
 				this._state = IN_CLOSING_TAG_NAME;
 				this._sectionStart = this._index - 6;
@@ -420,28 +420,28 @@ Tokenizer.prototype.write = function(chunk){
 		/*
 		* style
 		*/
-		else if(this._state === STYLE_1){
+		else if(this._state === BEFORE_STYLE_1){
 			if(c === "y" || c === "Y"){
-				this._state = STYLE_2;
+				this._state = BEFORE_STYLE_2;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
-		} else if(this._state === STYLE_2){
+		} else if(this._state === BEFORE_STYLE_2){
 			if(c === "l" || c === "L"){
-				this._state = STYLE_3;
+				this._state = BEFORE_STYLE_3;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
-		} else if(this._state === STYLE_3){
+		} else if(this._state === BEFORE_STYLE_3){
 			if(c === "e" || c === "E"){
-				this._state = STYLE_4;
+				this._state = BEFORE_STYLE_4;
 			} else {
 				this._state = IN_TAG_NAME;
 				this._index--; //consume the token again
 			}
-		} else if(this._state === STYLE_4){
+		} else if(this._state === BEFORE_STYLE_4){
 			if(c === "/" || c === ">" || whitespace(c)){
 				this._special = 2;
 			}
@@ -449,22 +449,22 @@ Tokenizer.prototype.write = function(chunk){
 			this._index--; //consume the token again
 		}
 
-		else if(this._state === STYLE_END_1){
+		else if(this._state === AFTER_STYLE_1){
 			if(c === "y" || c === "Y"){
-				this._state = STYLE_END_2;
+				this._state = AFTER_STYLE_2;
 			} 
 			else this._state = TEXT;
-		} else if(this._state === STYLE_END_2){
+		} else if(this._state === AFTER_STYLE_2){
 			if(c === "l" || c === "L"){
-				this._state = STYLE_END_3;
+				this._state = AFTER_STYLE_3;
 			} 
 			else this._state = TEXT;
-		} else if(this._state === STYLE_END_3){
+		} else if(this._state === AFTER_STYLE_3){
 			if(c === "e" || c === "E"){
-				this._state = STYLE_END_4;
+				this._state = AFTER_STYLE_4;
 			} 
 			else this._state = TEXT;
-		} else if(this._state === STYLE_END_4){
+		} else if(this._state === AFTER_STYLE_4){
 			if(c === ">" || whitespace(c)){
 				this._state = IN_CLOSING_TAG_NAME;
 				this._sectionStart = this._index - 5;
@@ -516,9 +516,9 @@ Tokenizer.prototype.end = function(chunk){
 
 	//if there is remaining data, emit it in a reasonable way
 	if(this._buffer === "" || this._sectionStart === -1 || this._sectionStart === this._index);
-	else if(this._state === IN_CDATA || this._state === CDATA_END_1 || this._state === CDATA_END_2){
+	else if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
 		this._emitIfToken("cdata");
-	} else if(this._state === IN_COMMENT || this._state === COMMENT_END_1 || this._state === COMMENT_END_2){
+	} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
 		this._emitIfToken("comment");
 	} else if(this._state === IN_TAG_NAME){
 		this._emitIfToken("opentagname");

From 0b881708c3589a3e7af54bad8562699440892202 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 14:42:31 +0300
Subject: [PATCH 321/450] [feedhandler] recursively walk the tree

---
 lib/FeedHandler.js | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 1f9ed60..a62e61c 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -12,14 +12,14 @@ require("util").inherits(FeedHandler, DomHandler);
 FeedHandler.prototype.init = DomHandler;
 
 function getElements(what, where){
-	return DomUtils.getElementsByTagName(what, where, false);
+	return DomUtils.getElementsByTagName(what, where, true);
 }
 function getOneElement(what, where){
-	return DomUtils.getElementsByTagName(what, where, false, 1)[0];
+	return DomUtils.getElementsByTagName(what, where, true, 1)[0];
 }
 function fetch(what, where, recurse){
 	return DomUtils.getText(
-		DomUtils.getElementsByTagName(what, where, !!recurse, 1)
+		DomUtils.getElementsByTagName(what, where, recurse, 1)
 	);
 }
 

From b06cb297a9f9d87bf8acd9484697c1b8d8d3e186 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 14:03:15 +0200
Subject: [PATCH 322/450] [readme] small updates

tests pass now thanks to updates to the domhandler module
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 2c8002b..58d8bf3 100644
--- a/README.md
+++ b/README.md
@@ -40,9 +40,9 @@ That's it?!
 Read more about the parser in the [wiki](https://github.com/fb55/htmlparser2/wiki/Parser-options).
 
 ##Get a DOM
-The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the `DomUtils` helper.
+The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper.
 
-The `DomHandler`, while still bundled with this module, was recently moved to it's [own module](https://github.com/fb55/domhandler). Have a look at it for further information.
+The `DomHandler`, while still bundled with this module, was moved to it's [own module](https://github.com/fb55/domhandler). Have a look at it for further information.
 
 ##Parsing RSS/RDF/Atom Feeds
 

From e6f01998632782111b85cb69dd7e7b9d25946520 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 15:18:27 +0200
Subject: [PATCH 323/450] [tokenizer] don't emit an "onopentagend" event for
 self-closing tags

---
 lib/Tokenizer.js | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 35a17a9..0edf338 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -121,7 +121,6 @@ Tokenizer.prototype.write = function(chunk){
 		} else if(this._state === IN_TAG_NAME){
 			if(c === "/"){
 				this._emitToken("opentagname");
-				this._cbs.onopentagend();
 				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === ">"){
@@ -173,7 +172,6 @@ Tokenizer.prototype.write = function(chunk){
 				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
 			} else if(c === "/"){
-				this._cbs.onopentagend();
 				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(!whitespace(c)){

From a3a9954f609bc6950604d8509e48ab0e61b886f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 15:18:38 +0200
Subject: [PATCH 324/450] [parser] fixed handling of self-closing tags

---
 lib/Parser.js | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index e8a3d47..429bc30 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -141,8 +141,18 @@ Parser.prototype.onclosetag = function(name){
 };
 
 Parser.prototype.onselfclosingtag = function(){
-	if(this._cbs.onclosetag && this._stack[this._stack.length-1] === this._tagname){
-		this._cbs.onclosetag(this._stack.pop());
+	var name = this._tagname;
+
+	this.onopentagend();
+
+	//self-closing tags won't be on the top of the stack
+	//cheaper check than before
+	if(this._stack[this._stack.length-1] === name){
+		if(this._cbs.onclosetag){
+			this._cbs.onclosetag(this._stack.pop());
+		} else {
+			this._stack.pop();
+		}
 	}
 };
 

From 9d478ea7dfe4c50e2117eeb2369e535c3401f917 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 15:19:02 +0200
Subject: [PATCH 325/450] [tests] stream tests are run again

---
 tests/02-stream.js | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/02-stream.js b/tests/02-stream.js
index 72e48fd..a32a42c 100644
--- a/tests/02-stream.js
+++ b/tests/02-stream.js
@@ -5,15 +5,17 @@ var helper = require("./test-helper.js"),
 exports.dir = "Stream";
 
 exports.test = function(test, cb){
-	var stream = new Stream(test.options),
-	    second = false,
+	var second = false,
 	    handler = helper.getEventCollector(function(err, events){
 			cb(err, events);
 			if(!second){
 				second = true;
-				stream.parseComplete(fs.readFileSync(__dirname + test.file));
+				handler.onreset();
+				stream = new Stream(handler, test.options);
+				stream.end(fs.readFileSync(__dirname + test.file));
 			}
-		});
-	
+		}),
+		stream = new Stream(handler, test.options);
+
 	fs.createReadStream(__dirname + test.file).pipe(stream);
 };
\ No newline at end of file

From e612238e1050ef67ded21c4ec2fffc0e4b42073c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 15:26:41 +0200
Subject: [PATCH 326/450] [tests/feeds] run rdf test again

---
 tests/Feeds/{_03-rdf.js => 03-rdf.js} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename tests/Feeds/{_03-rdf.js => 03-rdf.js} (92%)

diff --git a/tests/Feeds/_03-rdf.js b/tests/Feeds/03-rdf.js
similarity index 92%
rename from tests/Feeds/_03-rdf.js
rename to tests/Feeds/03-rdf.js
index 94f5e67..726d580 100644
--- a/tests/Feeds/_03-rdf.js
+++ b/tests/Feeds/03-rdf.js
@@ -9,12 +9,12 @@ exports.expected = {
     {
       "title": " Music Equipment Repair and Consignment ",
       "link": "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n",
-      "description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->"
+      "description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n"
     },
     {
       "title": "\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n",
       "link": "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n",
-      "description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->"
+      "description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n"
     }
   ]
 };
\ No newline at end of file

From 3b821dc5322b11b49a70a557ad8024d81385a852 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 15:27:09 +0200
Subject: [PATCH 327/450] [tests/stream] enabled xmlMode for RSS test

---
 tests/Stream/02-RSS.json | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/Stream/02-RSS.json b/tests/Stream/02-RSS.json
index 55660a2..c615326 100644
--- a/tests/Stream/02-RSS.json
+++ b/tests/Stream/02-RSS.json
@@ -1,6 +1,6 @@
 {
   "name": "RSS feed",
-  "options": {},
+  "options": {"xmlMode": true},
   "file": "/Documents/RSS_Example.xml",
   "expected": [
     {
@@ -124,15 +124,15 @@
       ]
     },
     {
-      "event": "closetag",
+      "event": "text",
       "data": [
-        "link"
+        "http://liftoff.msfc.nasa.gov/"
       ]
     },
     {
-      "event": "text",
+      "event": "closetag",
       "data": [
-        "http://liftoff.msfc.nasa.gov/"
+        "link"
       ]
     },
     {
@@ -453,15 +453,15 @@
       ]
     },
     {
-      "event": "closetag",
+      "event": "text",
       "data": [
-        "link"
+        "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
       ]
     },
     {
-      "event": "text",
+      "event": "closetag",
       "data": [
-        "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp"
+        "link"
       ]
     },
     {
@@ -763,15 +763,15 @@
       ]
     },
     {
-      "event": "closetag",
+      "event": "text",
       "data": [
-        "link"
+        "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
       ]
     },
     {
-      "event": "text",
+      "event": "closetag",
       "data": [
-        "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp"
+        "link"
       ]
     },
     {
@@ -949,15 +949,15 @@
       ]
     },
     {
-      "event": "closetag",
+      "event": "text",
       "data": [
-        "link"
+        "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
       ]
     },
     {
-      "event": "text",
+      "event": "closetag",
       "data": [
-        "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp"
+        "link"
       ]
     },
     {

From 1bb92f71fc91f78e46d4f1203fa6086d135b2288 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 15:52:50 +0200
Subject: [PATCH 328/450] [tests/stream] create a new handler for the second
 run

---
 tests/02-stream.js | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/tests/02-stream.js b/tests/02-stream.js
index a32a42c..fc22fed 100644
--- a/tests/02-stream.js
+++ b/tests/02-stream.js
@@ -5,17 +5,16 @@ var helper = require("./test-helper.js"),
 exports.dir = "Stream";
 
 exports.test = function(test, cb){
-	var second = false,
-	    handler = helper.getEventCollector(function(err, events){
-			cb(err, events);
-			if(!second){
-				second = true;
-				handler.onreset();
-				stream = new Stream(handler, test.options);
+	fs.createReadStream(__dirname + test.file).pipe(
+		new Stream(
+			helper.getEventCollector(function(err, events){
+				cb(err, events);
+
+				var handler = helper.getEventCollector(cb),
+				    stream = new Stream(handler, test.options);
+
 				stream.end(fs.readFileSync(__dirname + test.file));
 			}
-		}),
-		stream = new Stream(handler, test.options);
-
-	fs.createReadStream(__dirname + test.file).pipe(stream);
+		), test.options)
+	);
 };
\ No newline at end of file

From ae58e5685e7e7b9763be2b9f4d649a60c4cbd85b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 15:53:34 +0200
Subject: [PATCH 329/450] [tests/stream] added tests for the files in
 tests/Documents

---
 tests/Stream/03-Atom.json       |  644 ++++++++++++++
 tests/Stream/04-RDF.json        | 1399 +++++++++++++++++++++++++++++++
 tests/Stream/05-Attributes.json |  354 ++++++++
 3 files changed, 2397 insertions(+)
 create mode 100644 tests/Stream/03-Atom.json
 create mode 100644 tests/Stream/04-RDF.json
 create mode 100644 tests/Stream/05-Attributes.json

diff --git a/tests/Stream/03-Atom.json b/tests/Stream/03-Atom.json
new file mode 100644
index 0000000..965a538
--- /dev/null
+++ b/tests/Stream/03-Atom.json
@@ -0,0 +1,644 @@
+{
+  "name": "Atom feed",
+  "options": {"xmlMode": true},
+  "file": "/Documents/Atom_Example.xml",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "?xml",
+        "?xml version=\"1.0\" encoding=\"utf-8\"?"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " http://en.wikipedia.org/wiki/Atom_%28standard%29 "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "feed"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns",
+        "http://www.w3.org/2005/Atom"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "feed",
+        {
+          "xmlns": "http://www.w3.org/2005/Atom"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Example Feed"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "subtitle"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "subtitle",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "A subtitle."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "subtitle"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/feed/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rel",
+        "self"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "href": "http://example.org/feed/",
+          "rel": "self"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "href": "http://example.org/"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "id"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "id",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "id"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "updated"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "updated",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2003-12-13T18:30:02Z"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "updated"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "author"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "author",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "name"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "name",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "John Doe"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "name"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "email"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "email",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "johndoe@example.com"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "email"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "author"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "entry"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "entry",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Atom-Powered Robots Run Amok"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/2003/12/13/atom03"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "href": "http://example.org/2003/12/13/atom03"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rel",
+        "alternate"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "type",
+        "text/html"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/2003/12/13/atom03.html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "rel": "alternate",
+          "type": "text/html",
+          "href": "http://example.org/2003/12/13/atom03.html"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rel",
+        "edit"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.org/2003/12/13/atom03/edit"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {
+          "rel": "edit",
+          "href": "http://example.org/2003/12/13/atom03/edit"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "id"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "id",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "id"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "updated"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "updated",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2003-12-13T18:30:02Z"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "updated"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "summary"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "summary",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Some text."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "summary"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "entry"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "feed"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Stream/04-RDF.json b/tests/Stream/04-RDF.json
new file mode 100644
index 0000000..b4d0f18
--- /dev/null
+++ b/tests/Stream/04-RDF.json
@@ -0,0 +1,1399 @@
+{
+  "name": "RDF feed",
+  "options": {"xmlMode": true},
+  "file": "/Documents/RDF_Example.xml",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "?xml",
+        "?xml version=\"1.0\" encoding=\"UTF-8\"?"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "rdf:RDF"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:rdf",
+        "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns",
+        "http://purl.org/rss/1.0/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:ev",
+        "http://purl.org/rss/1.0/modules/event/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:content",
+        "http://purl.org/rss/1.0/modules/content/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:taxo",
+        "http://purl.org/rss/1.0/modules/taxonomy/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:dc",
+        "http://purl.org/dc/elements/1.1/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:syn",
+        "http://purl.org/rss/1.0/modules/syndication/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:dcterms",
+        "http://purl.org/dc/terms/"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "xmlns:admin",
+        "http://webns.net/mvcb/"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "rdf:RDF",
+        {
+          "xmlns:rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+          "xmlns": "http://purl.org/rss/1.0/",
+          "xmlns:ev": "http://purl.org/rss/1.0/modules/event/",
+          "xmlns:content": "http://purl.org/rss/1.0/modules/content/",
+          "xmlns:taxo": "http://purl.org/rss/1.0/modules/taxonomy/",
+          "xmlns:dc": "http://purl.org/dc/elements/1.1/",
+          "xmlns:syn": "http://purl.org/rss/1.0/modules/syndication/",
+          "xmlns:dcterms": "http://purl.org/dc/terms/",
+          "xmlns:admin": "http://webns.net/mvcb/"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "channel"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rdf:about",
+        "http://sfbay.craigslist.org/ccc/"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "channel",
+        {
+          "rdf:about": "http://sfbay.craigslist.org/ccc/"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "craigslist | all community in SF bay area"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://sfbay.craigslist.org/ccc/"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:language",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "en-us"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:rights",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Copyright 2011 craigslist, inc."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:publisher"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:publisher",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "webmaster@craigslist.org"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:publisher"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:creator"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:creator",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "webmaster@craigslist.org"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:creator"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:source",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "http://sfbay.craigslist.org/ccc//"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "craigslist | all community in SF bay area"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:type",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Collection"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "syn:updateBase"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "syn:updateBase",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:39:10-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "syn:updateBase"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "syn:updateFrequency"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "syn:updateFrequency",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "4"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "syn:updateFrequency"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "syn:updatePeriod"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "syn:updatePeriod",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "hourly"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "syn:updatePeriod"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "items"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "items",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "rdf:Seq"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "rdf:Seq",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "rdf:li"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rdf:resource",
+        "http://sfbay.craigslist.org/sby/muc/2681301534.html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "rdf:li",
+        {
+          "rdf:resource": "http://sfbay.craigslist.org/sby/muc/2681301534.html"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "rdf:li"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "rdf:Seq"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "items"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "channel"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rdf:about",
+        "http://sfbay.craigslist.org/sby/muc/2681301534.html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {
+          "rdf:about": "http://sfbay.craigslist.org/sby/muc/2681301534.html"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        " Music Equipment Repair and Consignment "
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:date"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:date",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:35:17-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:date"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:language",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "en-us"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:rights",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Copyright 2011 craigslist, inc."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:source",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:title",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        " Music Equipment Repair and Consignment "
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:type",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "text"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dcterms:issued"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dcterms:issued",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:35:17-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dcterms:issued"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "rdf:about",
+        "http://sfbay.craigslist.org/eby/rid/2685010755.html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "item",
+        {
+          "rdf:about": "http://sfbay.craigslist.org/eby/rid/2685010755.html"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "link",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "link"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "description",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "description"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:date"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:date",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:34:54-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:date"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:language",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "en-us"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:language"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:rights",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Copyright 2011 craigslist, inc."
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:rights"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:source",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:source"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:title",
+        {}
+      ]
+    },
+    {
+      "event": "cdatastart",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n"
+      ]
+    },
+    {
+      "event": "cdataend",
+      "data": []
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dc:type",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "text"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dc:type"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "dcterms:issued"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "dcterms:issued",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "2011-11-04T09:34:54-07:00"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "dcterms:issued"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "item"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "rdf:RDF"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Stream/05-Attributes.json b/tests/Stream/05-Attributes.json
new file mode 100644
index 0000000..a1bd06d
--- /dev/null
+++ b/tests/Stream/05-Attributes.json
@@ -0,0 +1,354 @@
+{
+  "name": "Attributes",
+  "options": {},
+  "file": "/Documents/Attributes.html",
+  "expected": [
+    {
+      "event": "processinginstruction",
+      "data": [
+        "!doctype",
+        "!doctype html"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "html"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "html",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "head"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "head",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "title",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "Attributes test"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "title"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "head"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "body"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "body",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " Normal attributes "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "id",
+        "test0"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "value0"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "title",
+        "value1"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "id": "test0",
+          "class": "value0",
+          "title": "value1"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "class=\"value0\" title=\"value1\""
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n\t"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " Attributes with no quotes or value "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "id",
+        "test1"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "value2"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "disabled",
+        ""
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "id": "test1",
+          "class": "value2",
+          "disabled": ""
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "class=value2 disabled"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\n\t"
+      ]
+    },
+    {
+      "event": "comment",
+      "data": [
+        " Attributes with no space between them. No valid, but accepted by the browser "
+      ]
+    },
+    {
+      "event": "commentend",
+      "data": []
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n\t"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "id",
+        "test2"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "value4"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "title",
+        "value5"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "button",
+        {
+          "id": "test2",
+          "class": "value4",
+          "title": "value5"
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "class=\"value4\"title=\"value5\""
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "button"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "body"
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "\n"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "html"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 83c75dc8f923f18e53428a6481ced807f90eee55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 15:55:33 +0200
Subject: [PATCH 330/450] 3.0.4

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 3ac697b..d23c3cb 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "3.0.3",
+	"version": "3.0.4",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From e36f3d0e4b309eea9d6366461d2704bb71e90934 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 16:07:27 +0200
Subject: [PATCH 331/450] [parser] lowercase instruction names if lowerCaseTags
 option is set

for backwards compat
---
 lib/Parser.js | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 429bc30..c0903e2 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -170,13 +170,17 @@ Parser.prototype.onattribvalue = function attribValue(value){
 
 Parser.prototype.ondeclaration = function(value){
 	if(this._cbs.onprocessinginstruction){
-		this._cbs.onprocessinginstruction("!" + value.split(/\s|\//, 1)[0], "!" + value);
+		var name = value.split(/\s|\//, 1)[0];
+		if(this._options.lowerCaseTags) name = name.toLowerCase();
+		this._cbs.onprocessinginstruction("!" + name, "!" + value);
 	}
 };
 
 Parser.prototype.onprocessinginstruction = function(value){
 	if(this._cbs.onprocessinginstruction){
-		this._cbs.onprocessinginstruction("?" + value.split(/\s|\//, 1)[0], "?" + value);
+		var name = value.split(/\s|\//, 1)[0];
+		if(this._options.lowerCaseTags) name = name.toLowerCase();
+		this._cbs.onprocessinginstruction("?" + name, "?" + value);
 	}
 };
 

From 61c5a802ef6b66b6d588fe734bc533da7b9ecc0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 16:07:31 +0200
Subject: [PATCH 332/450] 3.0.5

---
 package.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/package.json b/package.json
index d23c3cb..d018119 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
-	"description": "Performance-optimized forgiving HTML/XML/RSS parser",
-	"version": "3.0.4",
+	"description": "Fast & forgiving HTML/XML/RSS parser",
+	"version": "3.0.5",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From d79b1b3965c29bd8caaafacd9ea44c5f755ebd41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 15 Apr 2013 21:05:16 +0200
Subject: [PATCH 333/450] [tests/events] added test case for jsdom#368

---
 tests/Events/10-crazy-attrib.json | 52 +++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 tests/Events/10-crazy-attrib.json

diff --git a/tests/Events/10-crazy-attrib.json b/tests/Events/10-crazy-attrib.json
new file mode 100644
index 0000000..d46c436
--- /dev/null
+++ b/tests/Events/10-crazy-attrib.json
@@ -0,0 +1,52 @@
+{
+  "name": "crazy attribute",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<p <='' FAIL>stuff</p>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "p"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "<",
+        ""
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "FAIL",
+        ""
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "p",
+        {
+          "<": "",
+          "FAIL": ""
+        }
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "stuff"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "p"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 1123da8e380761d75dfda58894c724ac8e0fec47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 18 May 2013 10:42:45 +0200
Subject: [PATCH 334/450] changed behavior for non-xml mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• lowercase tag and attribute names by default
• CDATA is now emitted as a comment (fixes tmpvar/jsdom#618)
---
 lib/Parser.js | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index c0903e2..858006a 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -94,7 +94,7 @@ Parser.prototype.ontext = function(data){
 };
 
 Parser.prototype.onopentagname = function(name){
-	if(this._options.lowerCaseTags) name = name.toLowerCase();
+	if(!this._options.xmlMode || this._options.lowerCaseTags) name = name.toLowerCase();
 
 	this._tagname = name;
 
@@ -127,7 +127,7 @@ Parser.prototype.onopentagend = function(){
 };
 
 Parser.prototype.onclosetag = function(name){
-	if(this._options.lowerCaseTags) name = name.toLowerCase();
+	if(!this._options.xmlMode || this._options.lowerCaseTags) name = name.toLowerCase();
 	if(this._stack.length && (!(name in emptyTags) || this._options.xmlMode)){
 		var pos = this._stack.lastIndexOf(name);
 		if(pos !== -1){
@@ -158,7 +158,9 @@ Parser.prototype.onselfclosingtag = function(){
 
 Parser.prototype.onattribname = function(name){
 	if(this._attribname !== "") this.onattribvalue("");
-	if(this._options.lowerCaseAttributeNames) name = name.toLowerCase();
+	if(!this._options.xmlMode || this._options.lowerCaseAttributeNames){
+		name = name.toLowerCase();
+	}
 	this._attribname = name;
 };
 
@@ -171,7 +173,9 @@ Parser.prototype.onattribvalue = function attribValue(value){
 Parser.prototype.ondeclaration = function(value){
 	if(this._cbs.onprocessinginstruction){
 		var name = value.split(/\s|\//, 1)[0];
-		if(this._options.lowerCaseTags) name = name.toLowerCase();
+		if(!this._options.xmlMode || this._options.lowerCaseTags){
+			name = name.toLowerCase();
+		}
 		this._cbs.onprocessinginstruction("!" + name, "!" + value);
 	}
 };
@@ -179,7 +183,9 @@ Parser.prototype.ondeclaration = function(value){
 Parser.prototype.onprocessinginstruction = function(value){
 	if(this._cbs.onprocessinginstruction){
 		var name = value.split(/\s|\//, 1)[0];
-		if(this._options.lowerCaseTags) name = name.toLowerCase();
+		if(!this._options.xmlMode || this._options.lowerCaseTags){
+			name = name.toLowerCase();
+		}
 		this._cbs.onprocessinginstruction("?" + name, "?" + value);
 	}
 };
@@ -190,9 +196,13 @@ Parser.prototype.oncomment = function(value){
 };
 
 Parser.prototype.oncdata = function(value){
-	if(this._cbs.oncdatastart) this._cbs.oncdatastart();
-	if(this._cbs.ontext) this._cbs.ontext(value);
-	if(this._cbs.oncdataend) this._cbs.oncdataend();
+	if(this._options.xmlMode){
+		if(this._cbs.oncdatastart) this._cbs.oncdatastart();
+		if(this._cbs.ontext) this._cbs.ontext(value);
+		if(this._cbs.oncdataend) this._cbs.oncdataend();
+	} else {
+		this.oncomment("[CDATA[" + value + "]]");
+	}
 };
 
 Parser.prototype.onerror = function(err){

From 357a825e2c1959803556a28685481fcaf7c37e1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 18 May 2013 12:41:54 +0200
Subject: [PATCH 335/450] [tests/events] updated tests to reflect latest
 changes

---
 tests/Events/04-cdata.json        | 2 +-
 tests/Events/10-crazy-attrib.json | 4 ++--
 tests/Stream/01-basic.json        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/Events/04-cdata.json b/tests/Events/04-cdata.json
index cf350ff..71d4860 100644
--- a/tests/Events/04-cdata.json
+++ b/tests/Events/04-cdata.json
@@ -2,7 +2,7 @@
   "name": "CDATA",
   "options": {
     "handler": {},
-    "parser": {}
+    "parser": {"xmlMode": true}
   },
   "html": "<tag><![CDATA[ asdf ><asdf></adsf><> fo]]></tag>",
   "expected": [
diff --git a/tests/Events/10-crazy-attrib.json b/tests/Events/10-crazy-attrib.json
index d46c436..baf319f 100644
--- a/tests/Events/10-crazy-attrib.json
+++ b/tests/Events/10-crazy-attrib.json
@@ -22,7 +22,7 @@
     {
       "event": "attribute",
       "data": [
-        "FAIL",
+        "fail",
         ""
       ]
     },
@@ -32,7 +32,7 @@
         "p",
         {
           "<": "",
-          "FAIL": ""
+          "fail": ""
         }
       ]
     },
diff --git a/tests/Stream/01-basic.json b/tests/Stream/01-basic.json
index 9fbe1eb..9ae3e3f 100644
--- a/tests/Stream/01-basic.json
+++ b/tests/Stream/01-basic.json
@@ -6,7 +6,7 @@
     {
       "event": "processinginstruction",
       "data": [
-        "!DOCTYPE",
+        "!doctype",
         "!DOCTYPE html"
       ]
     },

From 96c41b1243a14dc0f0adcb3fbb562a1bd9e4c86b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 18 May 2013 12:42:01 +0200
Subject: [PATCH 336/450] 3.1.0

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index d018119..cc9333c 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.0.5",
+	"version": "3.1.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 75fb1cf174f0e6a517573ad51fa94a2d987f3222 Mon Sep 17 00:00:00 2001
From: Andreas Lind Petersen <andreas@one.com>
Date: Wed, 29 May 2013 23:50:14 +0200
Subject: [PATCH 337/450] Added missing void elements.

Source: http://www.w3.org/TR/html5/syntax.html#void-elements
---
 lib/Parser.js | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 858006a..5f26cec 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -68,7 +68,12 @@ var emptyTags = {
 	link: true,
 	meta: true,
 	param: true,
-	embed: true
+	embed: true,
+	command: true,
+	keygen: true,
+	source: true,
+	track: true,
+	wbr: true
 };
 
 function Parser(cbs, options){

From 7ca6d22595978db6ebdc784bbcee2b8260864de9 Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Wed, 5 Jun 2013 14:35:38 +0200
Subject: [PATCH 338/450] [tokenizer] text in special tags there looks like a
 tag ending

---
 lib/Tokenizer.js                      |  2 +
 tests/Events/11-script_in_script.json | 54 +++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 tests/Events/11-script_in_script.json

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 0edf338..8ff5f8d 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -139,6 +139,8 @@ Tokenizer.prototype.write = function(chunk){
 			} else if(this._special > 0){
 				if(c === "s" || c === "S"){
 					this._state = BEFORE_SPECIAL_END;
+				} else {
+					this._state = TEXT;
 				}
 			} else {
 				this._state = IN_CLOSING_TAG_NAME;
diff --git a/tests/Events/11-script_in_script.json b/tests/Events/11-script_in_script.json
new file mode 100644
index 0000000..ddbb87c
--- /dev/null
+++ b/tests/Events/11-script_in_script.json
@@ -0,0 +1,54 @@
+{
+  "name": "Scripts creating other scripts",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<p><script>var str = '<script></'+'script>';</script></p>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "p"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "p",
+        {}
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "script",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        "var str = '<script></'+'script>';"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "script"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "p"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 02f12e25a0023e6760d86b6fa9078bf09d32e7f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Wed, 5 Jun 2013 16:26:02 +0200
Subject: [PATCH 339/450] [tokenizer] consume token again

after switching from BEFORE_CLOSING_TAG_NAME to TEXT state (inside a special tag)
---
 lib/Tokenizer.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 8ff5f8d..6aff603 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -141,6 +141,7 @@ Tokenizer.prototype.write = function(chunk){
 					this._state = BEFORE_SPECIAL_END;
 				} else {
 					this._state = TEXT;
+					this._index--;
 				}
 			} else {
 				this._state = IN_CLOSING_TAG_NAME;
@@ -545,4 +546,4 @@ Tokenizer.prototype._emitIfToken = function(name){
 		this._cbs["on" + name](this._buffer.substring(this._sectionStart, this._index));
 	}
 	this._sectionStart = -1;
-};
\ No newline at end of file
+};

From 6e1669ffe09c4e1dbf064739b5deaf7b7d25690c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Wed, 5 Jun 2013 17:54:34 +0300
Subject: [PATCH 340/450] [parser] still recognize other options in
 non-xml-mode

using the easiest solution (applying DeMorgan).
---
 lib/Parser.js | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 5f26cec..22a93f0 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -99,7 +99,9 @@ Parser.prototype.ontext = function(data){
 };
 
 Parser.prototype.onopentagname = function(name){
-	if(!this._options.xmlMode || this._options.lowerCaseTags) name = name.toLowerCase();
+	if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
+		name = name.toLowerCase();
+	}
 
 	this._tagname = name;
 
@@ -132,7 +134,9 @@ Parser.prototype.onopentagend = function(){
 };
 
 Parser.prototype.onclosetag = function(name){
-	if(!this._options.xmlMode || this._options.lowerCaseTags) name = name.toLowerCase();
+	if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
+		name = name.toLowerCase();
+	}
 	if(this._stack.length && (!(name in emptyTags) || this._options.xmlMode)){
 		var pos = this._stack.lastIndexOf(name);
 		if(pos !== -1){
@@ -163,7 +167,7 @@ Parser.prototype.onselfclosingtag = function(){
 
 Parser.prototype.onattribname = function(name){
 	if(this._attribname !== "") this.onattribvalue("");
-	if(!this._options.xmlMode || this._options.lowerCaseAttributeNames){
+	if(!(this._options.xmlMode || "lowerCaseAttributeNames" in this._options) || this._options.lowerCaseAttributeNames){
 		name = name.toLowerCase();
 	}
 	this._attribname = name;
@@ -178,7 +182,7 @@ Parser.prototype.onattribvalue = function attribValue(value){
 Parser.prototype.ondeclaration = function(value){
 	if(this._cbs.onprocessinginstruction){
 		var name = value.split(/\s|\//, 1)[0];
-		if(!this._options.xmlMode || this._options.lowerCaseTags){
+		if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 			name = name.toLowerCase();
 		}
 		this._cbs.onprocessinginstruction("!" + name, "!" + value);
@@ -188,7 +192,7 @@ Parser.prototype.ondeclaration = function(value){
 Parser.prototype.onprocessinginstruction = function(value){
 	if(this._cbs.onprocessinginstruction){
 		var name = value.split(/\s|\//, 1)[0];
-		if(!this._options.xmlMode || this._options.lowerCaseTags){
+		if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 			name = name.toLowerCase();
 		}
 		this._cbs.onprocessinginstruction("?" + name, "?" + value);

From 231a746f8dc1e71d8aafc5a5df479254c033e93e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Wed, 5 Jun 2013 16:55:04 +0200
Subject: [PATCH 341/450] 3.1.1

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index cc9333c..51e859a 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.1.0",
+	"version": "3.1.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 7ef5de8c232227d109fe0bab2be626c53c69e5d7 Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Thu, 6 Jun 2013 22:25:43 +0200
Subject: [PATCH 342/450] [tokenizer] don't reset comment state in case of long
 endings

---
 lib/Tokenizer.js                      |  2 ++
 tests/Events/12-long-comment-end.json | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/Events/12-long-comment-end.json

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 6aff603..69cd782 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -283,6 +283,8 @@ Tokenizer.prototype.write = function(chunk){
 				this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
+			} else if (c === '-') {
+				// Keep the state at AFTER_COMMENT_2
 			} else {
 				this._state = IN_COMMENT;
 			}
diff --git a/tests/Events/12-long-comment-end.json b/tests/Events/12-long-comment-end.json
new file mode 100644
index 0000000..e81f307
--- /dev/null
+++ b/tests/Events/12-long-comment-end.json
@@ -0,0 +1,20 @@
+{
+  "name": "Long comment ending",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<meta id='before'><!-- text ---><meta id='after'>",
+  "expected": [
+  { "event": "opentagname", "data": [ "meta" ] },
+  { "event": "attribute",   "data": [ "id", "before" ] },
+  { "event": "opentag",     "data": [ "meta", {"id": "before"} ] },
+  { "event": "closetag",    "data": [ "meta" ] },
+  { "event": "comment",     "data": [ " text -" ] },
+  { "event": "commentend",  "data": [] },
+  { "event": "opentagname", "data": [ "meta" ] },
+  { "event": "attribute",   "data": [ "id", "after" ] },
+  { "event": "opentag",     "data": [ "meta", {"id": "after"} ] },
+  { "event": "closetag",    "data": [ "meta" ] }
+  ]
+}
\ No newline at end of file

From e8dc84a21848d6ab6ccee599b5a1e5534715aff6 Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Fri, 7 Jun 2013 08:48:14 +0200
Subject: [PATCH 343/450] [Tokenizer] don't reset CDATA state in case of long
 endings

---
 lib/Tokenizer.js                    |  4 +++-
 tests/Events/13-long-cdata-end.json | 22 ++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 tests/Events/13-long-cdata-end.json

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 69cd782..d4f0f67 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -326,7 +326,9 @@ Tokenizer.prototype.write = function(chunk){
 				this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
-			} else {
+			} else if (c === ']') {
+				// Keep the state at AFTER_CDATA_2
+			}else {
 				this._state = IN_CDATA;
 			}
 		}
diff --git a/tests/Events/13-long-cdata-end.json b/tests/Events/13-long-cdata-end.json
new file mode 100644
index 0000000..34b7b41
--- /dev/null
+++ b/tests/Events/13-long-cdata-end.json
@@ -0,0 +1,22 @@
+{
+  "name": "Long CDATA ending",
+  "options": {
+    "handler": {},
+    "parser": {"xmlMode": true}
+  },
+  "html": "<before /><tag><![CDATA[ text ]]]></tag><after />",
+  "expected": [
+  { "event": "opentagname", "data": [ "before" ] },
+  { "event": "opentag",     "data": [ "before", {} ] },
+  { "event": "closetag",    "data": [ "before" ] },
+  { "event": "opentagname", "data": [ "tag" ] },
+  { "event": "opentag",     "data": [ "tag", {} ] },
+  { "event": "cdatastart",  "data": [] },
+  { "event": "text",        "data": [ " text ]" ] },
+  { "event": "cdataend",    "data": [] },
+  { "event": "closetag",    "data": [ "tag" ] },
+  { "event": "opentagname", "data": [ "after" ] },
+  { "event": "opentag",     "data": [ "after", {} ] },
+  { "event": "closetag",    "data": [ "after" ] }
+  ]
+}
\ No newline at end of file

From a768e88ac581ef098a2a383c9f0cb7da5d10b24f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Fri, 7 Jun 2013 16:58:39 +0200
Subject: [PATCH 344/450] readme: added version badge

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 58d8bf3..85b4c77 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/htmlparser2.png)](http://travis-ci.org/fb55/htmlparser2)
+#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/htmlparser2.png)](http://travis-ci.org/fb55/htmlparser2) [![NPM version](https://badge.fury.io/js/htmlparser2.png)](https://npmjs.org/package/htmlparser2)
 
 A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
 

From 40a2339aa0304665cd8d753ee8907bce1619ff62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Fri, 7 Jun 2013 21:05:13 +0300
Subject: [PATCH 345/450] [readme] added yet another badge (dependency
 versions)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 85b4c77..603c4c3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-#htmlparser2 [![Build Status](https://secure.travis-ci.org/fb55/htmlparser2.png)](http://travis-ci.org/fb55/htmlparser2) [![NPM version](https://badge.fury.io/js/htmlparser2.png)](https://npmjs.org/package/htmlparser2)
+#htmlparser2 [![NPM version](https://badge.fury.io/js/htmlparser2.png)](https://npmjs.org/package/htmlparser2) [![Build Status](https://secure.travis-ci.org/fb55/htmlparser2.png)](http://travis-ci.org/fb55/htmlparser2) [![Dependency Status](https://david-dm.org/fb55/htmlparser2.png)](https://david-dm.org/fb55/htmlparser2)
 
 A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle streams (chunked data) and supports custom handlers for writing custom DOMs/output.
 

From 8b390bd088ba7f97d7a48c92fcd791f9241e4cf9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 9 Jun 2013 15:05:02 +0200
Subject: [PATCH 346/450] [bench] added the hubbub & html-parser modules

todo: update readme
---
 tests/bench.js | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tests/bench.js b/tests/bench.js
index 462b3b3..01ceeef 100644
--- a/tests/bench.js
+++ b/tests/bench.js
@@ -70,6 +70,34 @@ try{
 	parsers.push([HtmlParser, "htmlparser"]);
 } catch(e){}
 
+
+try{
+	var hubbub = require('hubbub');
+
+	function Hubbub() {
+		var handler = new hubbub.DefaultHandler();
+		var parser = new hubbub.Parser(handler);
+		this.parse = function(s) {
+			parser.parseComplete(s);
+		};
+	}
+
+	parsers.push([Hubbub, "hubbub"]);
+} catch(e){}
+
+try{
+	var htmlParser = require("html-parser");
+
+	function HTMLParser() {
+		var cbs = {};
+		this.parse = function(s){
+			htmlParser.parse(s, cbs);
+		};
+	}
+
+	parsers.push([HTMLParser, "html-parser"]);
+} catch(e){}
+
 try{
 	var htmlparser2 = require('../lib/Parser.js');
 

From dda8df2ec3a7718d22e4675b115147cab4bc459e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 9 Jun 2013 15:05:08 +0200
Subject: [PATCH 347/450] 3.1.2

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 51e859a..34b2c26 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.1.1",
+	"version": "3.1.2",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 7fd58aaac4bfa50c2f8f1877412252d2ae0f8823 Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Mon, 10 Jun 2013 21:12:36 +0200
Subject: [PATCH 348/450] [Parser] open tags before close if never opened

---
 lib/Parser.js                               |  5 +++++
 tests/Events/14-close-tag-not-in-stack.json | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 tests/Events/14-close-tag-not-in-stack.json

diff --git a/lib/Parser.js b/lib/Parser.js
index 22a93f0..4fee3f8 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -145,6 +145,11 @@ Parser.prototype.onclosetag = function(name){
 				while(pos--) this._cbs.onclosetag(this._stack.pop());
 			}
 			else this._stack.splice(pos);
+		} else {
+			this.onopentagname(name);
+			this.onopentagend();
+			this._stack.pop();
+			if(this._cbs.onclosetag) this._cbs.onclosetag(name);
 		}
 	}
 };
diff --git a/tests/Events/14-close-tag-not-in-stack.json b/tests/Events/14-close-tag-not-in-stack.json
new file mode 100644
index 0000000..040a0a6
--- /dev/null
+++ b/tests/Events/14-close-tag-not-in-stack.json
@@ -0,0 +1,18 @@
+{
+  "name": "Close tags there are not in stack",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<div>Hallo</p>World</div>",
+  "expected": [
+  	{ "event": "opentagname", "data": [ "div" ] },
+  	{ "event": "opentag",     "data": [ "div", {} ] },
+  	{ "event": "text",        "data": [ "Hallo" ] },
+  	{ "event": "opentagname", "data": [ "p" ] },
+  	{ "event": "opentag",     "data": [ "p", {} ] },
+  	{ "event": "closetag",    "data": [ "p" ] },
+  	{ "event": "text",        "data": [ "World" ] },
+  	{ "event": "closetag",    "data": [ "div" ] }
+  ]
+}
\ No newline at end of file

From 694dea7beb0a74ed8b5596b4a67bd8e0ef7a52b2 Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Tue, 11 Jun 2013 13:53:08 +0200
Subject: [PATCH 349/450] [Parser] implicit open only p and br tags

---
 lib/Parser.js                               |  9 ++++----
 tests/Events/14-close-tag-not-in-stack.json | 18 ----------------
 tests/Events/14-implicit-open-tags.json     | 24 +++++++++++++++++++++
 3 files changed, 29 insertions(+), 22 deletions(-)
 delete mode 100644 tests/Events/14-close-tag-not-in-stack.json
 create mode 100644 tests/Events/14-implicit-open-tags.json

diff --git a/lib/Parser.js b/lib/Parser.js
index 4fee3f8..5b5049b 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -145,12 +145,13 @@ Parser.prototype.onclosetag = function(name){
 				while(pos--) this._cbs.onclosetag(this._stack.pop());
 			}
 			else this._stack.splice(pos);
-		} else {
+		} else if (name === "p" && !this._options.xmlMode) {
 			this.onopentagname(name);
-			this.onopentagend();
-			this._stack.pop();
-			if(this._cbs.onclosetag) this._cbs.onclosetag(name);
+			this.onselfclosingtag();
 		}
+	} else if ((name === 'br' || name === 'p') && !this._options.xmlMode) {
+		this.onopentagname(name);
+		this.onselfclosingtag();		
 	}
 };
 
diff --git a/tests/Events/14-close-tag-not-in-stack.json b/tests/Events/14-close-tag-not-in-stack.json
deleted file mode 100644
index 040a0a6..0000000
--- a/tests/Events/14-close-tag-not-in-stack.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-  "name": "Close tags there are not in stack",
-  "options": {
-    "handler": {},
-    "parser": {}
-  },
-  "html": "<div>Hallo</p>World</div>",
-  "expected": [
-  	{ "event": "opentagname", "data": [ "div" ] },
-  	{ "event": "opentag",     "data": [ "div", {} ] },
-  	{ "event": "text",        "data": [ "Hallo" ] },
-  	{ "event": "opentagname", "data": [ "p" ] },
-  	{ "event": "opentag",     "data": [ "p", {} ] },
-  	{ "event": "closetag",    "data": [ "p" ] },
-  	{ "event": "text",        "data": [ "World" ] },
-  	{ "event": "closetag",    "data": [ "div" ] }
-  ]
-}
\ No newline at end of file
diff --git a/tests/Events/14-implicit-open-tags.json b/tests/Events/14-implicit-open-tags.json
new file mode 100644
index 0000000..096925a
--- /dev/null
+++ b/tests/Events/14-implicit-open-tags.json
@@ -0,0 +1,24 @@
+{
+  "name": "Implicit open p and br tags",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<div>Hallo</p>World</br></div></p>",
+  "expected": [
+  	{ "event": "opentagname", "data": [ "div" ] },
+  	{ "event": "opentag",     "data": [ "div", {} ] },
+  	{ "event": "text",        "data": [ "Hallo" ] },
+  	{ "event": "opentagname", "data": [ "p" ] },
+  	{ "event": "opentag",     "data": [ "p", {} ] },
+  	{ "event": "closetag",    "data": [ "p" ] },
+  	{ "event": "text",        "data": [ "World" ] },
+  	{ "event": "opentagname", "data": [ "br" ] },
+  	{ "event": "opentag",     "data": [ "br", {} ] },
+  	{ "event": "closetag",    "data": [ "br" ] },
+  	{ "event": "closetag",    "data": [ "div" ] },
+  	{ "event": "opentagname", "data": [ "p" ] },
+  	{ "event": "opentag",     "data": [ "p", {} ] },
+  	{ "event": "closetag",    "data": [ "p" ] }
+  ]
+}
\ No newline at end of file

From d64986c87eb9b0af18d24d4481ac084d04c41918 Mon Sep 17 00:00:00 2001
From: abarre <a@fasterize.com>
Date: Thu, 13 Jun 2013 11:21:12 +0200
Subject: [PATCH 350/450] Fix perf regression in the Tokenizer : avoid a
 concatenation

Version 2.3.1 :
-> % node bench2.js
htmlparser2:  01.86 ms/el

Version 3.1.2 without the fix :
-> % node tests/bench.js
htmlparser2:  04.50 ms/el

Version 3.1.2 with the fix :
-> % node tests/bench.js
htmlparser2:  01.75 ms/el
---
 lib/Tokenizer.js | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index d4f0f67..22673cf 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -90,7 +90,7 @@ Tokenizer.prototype.write = function(chunk){
 		var c = this._buffer.charAt(this._index);
 		if(this._state === TEXT){
 			if(c === "<"){
-				this._emitIfToken("text");
+				this._emitIfToken("ontext");
 				this._state = BEFORE_TAG_NAME;
 				this._sectionStart = this._index;
 			}
@@ -120,16 +120,16 @@ Tokenizer.prototype.write = function(chunk){
 			}
 		} else if(this._state === IN_TAG_NAME){
 			if(c === "/"){
-				this._emitToken("opentagname");
+				this._emitToken("onopentagname");
 				this._cbs.onselfclosingtag();
 				this._state = AFTER_CLOSING_TAG_NAME;
 			} else if(c === ">"){
-				this._emitToken("opentagname");
+				this._emitToken("onopentagname");
 				this._cbs.onopentagend();
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
-				this._emitToken("opentagname");
+				this._emitToken("onopentagname");
 				this._state = BEFORE_ATTRIBUTE_NAME;
 			}
 		} else if(this._state === BEFORE_CLOSING_TAG_NAME){
@@ -149,12 +149,12 @@ Tokenizer.prototype.write = function(chunk){
 			}
 		} else if(this._state === IN_CLOSING_TAG_NAME){
 			if(c === ">"){
-				this._emitToken("closetag");
+				this._emitToken("onclosetag");
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 				this._special = 0;
 			} else if(whitespace(c)){
-				this._emitToken("closetag");
+				this._emitToken("onclosetag");
 				this._state = AFTER_CLOSING_TAG_NAME;
 				this._special = 0;
 			}
@@ -183,13 +183,13 @@ Tokenizer.prototype.write = function(chunk){
 			}
 		} else if(this._state === IN_ATTRIBUTE_NAME){
 			if(c === "="){
-				this._emitIfToken("attribname");
+				this._emitIfToken("onattribname");
 				this._state = BEFORE_ATTRIBUTE_VALUE;
 			} else if(whitespace(c)){
-				this._emitIfToken("attribname");
+				this._emitIfToken("onattribname");
 				this._state = AFTER_ATTRIBUTE_NAME;
 			} else if(c === "/" || c === ">"){
-				this._emitIfToken("attribname");
+				this._emitIfToken("onattribname");
 				this._state = BEFORE_ATTRIBUTE_NAME;
 				this._index--;
 			}
@@ -216,22 +216,22 @@ Tokenizer.prototype.write = function(chunk){
 			}
 		} else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){
 			if(c === "\""){
-				this._emitToken("attribvalue");
+				this._emitToken("onattribvalue");
 				this._state = BEFORE_ATTRIBUTE_NAME;
 			}
 		} else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){
 			if(c === "'"){
-				this._emitToken("attribvalue");
 				this._state = BEFORE_ATTRIBUTE_NAME;	
+				this._emitToken("onattribvalue");
 			}
 		} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
 			if(c === ">"){
-				this._emitToken("attribvalue");
+				this._emitToken("onattribvalue");
 				this._state = TEXT;
 				this._cbs.onopentagend();
 				this._sectionStart = this._index + 1;
 			} else if(whitespace(c)){
-				this._emitToken("attribvalue");
+				this._emitToken("onattribvalue");
 				this._state = BEFORE_ATTRIBUTE_NAME;
 			}
 		}
@@ -245,7 +245,7 @@ Tokenizer.prototype.write = function(chunk){
 			else this._state = IN_DECLARATION;
 		} else if(this._state === IN_DECLARATION){
 			if(c === ">"){
-				this._emitToken("declaration");
+				this._emitToken("ondeclaration");
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			}
@@ -256,7 +256,7 @@ Tokenizer.prototype.write = function(chunk){
 		*/
 		else if(this._state === IN_PROCESSING_INSTRUCTION){
 			if(c === ">"){
-				this._emitToken("processinginstruction");
+				this._emitToken("onprocessinginstruction");
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
 			}
@@ -492,7 +492,7 @@ Tokenizer.prototype.write = function(chunk){
 		this._index = 0;
 	} else {
 		if(this._state === TEXT){
-			this._emitIfToken("text");
+			this._emitIfToken("ontext");
 			this._buffer = "";
 			this._index = 0;
 		} else if(this._sectionStart === this._index){
@@ -522,15 +522,15 @@ Tokenizer.prototype.end = function(chunk){
 	//if there is remaining data, emit it in a reasonable way
 	if(this._buffer === "" || this._sectionStart === -1 || this._sectionStart === this._index);
 	else if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
-		this._emitIfToken("cdata");
+		this._emitIfToken("oncdata");
 	} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
-		this._emitIfToken("comment");
+		this._emitIfToken("oncomment");
 	} else if(this._state === IN_TAG_NAME){
-		this._emitIfToken("opentagname");
+		this._emitIfToken("onopentagname");
 	} else if(this._state === IN_CLOSING_TAG_NAME){
-		this._emitIfToken("closetag");
+		this._emitIfToken("onclosetag");
 	} else {
-		this._emitIfToken("text");
+		this._emitIfToken("ontext");
 	}
 
 	this._cbs.onend();
@@ -541,13 +541,13 @@ Tokenizer.prototype.reset = function(){
 };
 
 Tokenizer.prototype._emitToken = function(name){
-	this._cbs["on" + name](this._buffer.substring(this._sectionStart, this._index));
+	this._cbs[name](this._buffer.substring(this._sectionStart, this._index));
 	this._sectionStart = -1;
 };
 
 Tokenizer.prototype._emitIfToken = function(name){
 	if(this._index > this._sectionStart){
-		this._cbs["on" + name](this._buffer.substring(this._sectionStart, this._index));
+		this._cbs[name](this._buffer.substring(this._sectionStart, this._index));
 	}
 	this._sectionStart = -1;
 };

From eade8202c36950aee132fb798802d3cca4bd707e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 14 Jun 2013 14:09:27 +0200
Subject: [PATCH 351/450] 3.1.3

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 34b2c26..e80e64a 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.1.2",
+	"version": "3.1.3",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 0ca2c1e6ce46041e0b039eb5626a3c149bde7ad3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 14 Jun 2013 14:13:11 +0200
Subject: [PATCH 352/450] [parser] renamed emptyTags to voidElements, sorted
 them

---
 lib/Parser.js | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 5b5049b..546328d 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -53,24 +53,24 @@ var openImpliesClose = {
 	optgroup: { optgroup:true }
 };
 
-var emptyTags = {
+var voidElements = {
 	__proto__: null,
 	area: true,
 	base: true,
 	basefont: true,
 	br: true,
 	col: true,
+	command: true,
+	embed: true,
 	frame: true,
 	hr: true,
 	img: true,
 	input: true,
 	isindex: true,
+	keygen: true,
 	link: true,
 	meta: true,
 	param: true,
-	embed: true,
-	command: true,
-	keygen: true,
 	source: true,
 	track: true,
 	wbr: true
@@ -113,7 +113,7 @@ Parser.prototype.onopentagname = function(name){
 		);
 	}
 
-	if(this._options.xmlMode || !(name in emptyTags)){
+	if(this._options.xmlMode || !(name in voidElements)){
 		this._stack.push(name);
 	}
 
@@ -127,7 +127,7 @@ Parser.prototype.onopentagend = function(){
 		if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
 		this._attribs = null;
 	}
-	if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in emptyTags){
+	if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){
 		this._cbs.onclosetag(this._tagname);
 	}
 	this._tagname = "";
@@ -137,7 +137,7 @@ Parser.prototype.onclosetag = function(name){
 	if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 		name = name.toLowerCase();
 	}
-	if(this._stack.length && (!(name in emptyTags) || this._options.xmlMode)){
+	if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){
 		var pos = this._stack.lastIndexOf(name);
 		if(pos !== -1){
 			if(this._cbs.onclosetag){

From 26117ef30f188e06240372c59c3adbf6848bb278 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 14 Jun 2013 14:32:10 +0200
Subject: [PATCH 353/450] [parser] improved consistency & simplified

---
 lib/Parser.js | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 546328d..de0c442 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -145,11 +145,11 @@ Parser.prototype.onclosetag = function(name){
 				while(pos--) this._cbs.onclosetag(this._stack.pop());
 			}
 			else this._stack.splice(pos);
-		} else if (name === "p" && !this._options.xmlMode) {
+		} else if(name === "p" && !this._options.xmlMode){
 			this.onopentagname(name);
 			this.onselfclosingtag();
 		}
-	} else if ((name === 'br' || name === 'p') && !this._options.xmlMode) {
+	} else if(!this._options.xmlMode && (name === "br" || name === "p")){
 		this.onopentagname(name);
 		this.onselfclosingtag();		
 	}
@@ -160,14 +160,13 @@ Parser.prototype.onselfclosingtag = function(){
 
 	this.onopentagend();
 
-	//self-closing tags won't be on the top of the stack
-	//cheaper check than before
+	//self-closing tags will be on the top of the stack
+	//(cheaper check than in onclosetag)
 	if(this._stack[this._stack.length-1] === name){
 		if(this._cbs.onclosetag){
-			this._cbs.onclosetag(this._stack.pop());
-		} else {
-			this._stack.pop();
+			this._cbs.onclosetag(name);
 		}
+		this._stack.pop();
 	}
 };
 

From 79323679343be1f1257936e40c332293cfdd85f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 14 Jun 2013 14:42:02 +0200
Subject: [PATCH 354/450] [tokenizer] simplified `end` logic

---
 lib/Tokenizer.js | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 22673cf..788d0d2 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -520,17 +520,17 @@ Tokenizer.prototype.end = function(chunk){
 	if(chunk) this.write(chunk);
 
 	//if there is remaining data, emit it in a reasonable way
-	if(this._buffer === "" || this._sectionStart === -1 || this._sectionStart === this._index);
+	if(this._buffer === "" || this._sectionStart <= this._index);
 	else if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
-		this._emitIfToken("oncdata");
+		this._emitToken("oncdata");
 	} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
-		this._emitIfToken("oncomment");
+		this._emitToken("oncomment");
 	} else if(this._state === IN_TAG_NAME){
-		this._emitIfToken("onopentagname");
+		this._emitToken("onopentagname");
 	} else if(this._state === IN_CLOSING_TAG_NAME){
-		this._emitIfToken("onclosetag");
+		this._emitToken("onclosetag");
 	} else {
-		this._emitIfToken("ontext");
+		this._emitToken("ontext");
 	}
 
 	this._cbs.onend();

From 45d90674fa184c7bfdf042a616b2e7ba0f0997ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 14 Jun 2013 14:48:19 +0200
Subject: [PATCH 355/450] [tokenizer] removed noop blocks in
 AFTER_{COMMENT,CDATA}_2

---
 lib/Tokenizer.js | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 788d0d2..7b560a5 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -283,11 +283,10 @@ Tokenizer.prototype.write = function(chunk){
 				this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
-			} else if (c === '-') {
-				// Keep the state at AFTER_COMMENT_2
-			} else {
+			} else if (c !== "-") {
 				this._state = IN_COMMENT;
 			}
+			// else: stay in AFTER_COMMENT_2 (`--->`)
 		}
 
 		/*
@@ -326,11 +325,10 @@ Tokenizer.prototype.write = function(chunk){
 				this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
 				this._state = TEXT;
 				this._sectionStart = this._index + 1;
-			} else if (c === ']') {
-				// Keep the state at AFTER_CDATA_2
-			}else {
+			} else if (c !== "]") {
 				this._state = IN_CDATA;
 			}
+			//else: stay in AFTER_CDATA_2 (`]]]>`)
 		}
 
 		/*

From 87c6f2b30096329b325be9fe31e5a2a03cc05596 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 14 Jun 2013 14:55:23 +0200
Subject: [PATCH 356/450] [tokenizer] use `continue` instead of decreasing the
 index

---
 lib/Tokenizer.js | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 7b560a5..5429417 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -141,7 +141,7 @@ Tokenizer.prototype.write = function(chunk){
 					this._state = BEFORE_SPECIAL_END;
 				} else {
 					this._state = TEXT;
-					this._index--;
+					continue;
 				}
 			} else {
 				this._state = IN_CLOSING_TAG_NAME;
@@ -191,14 +191,14 @@ Tokenizer.prototype.write = function(chunk){
 			} else if(c === "/" || c === ">"){
 				this._emitIfToken("onattribname");
 				this._state = BEFORE_ATTRIBUTE_NAME;
-				this._index--;
+				continue;
 			}
 		} else if(this._state === AFTER_ATTRIBUTE_NAME){
 			if(c === "="){
 				this._state = BEFORE_ATTRIBUTE_VALUE;
 			} else if(c === "/" || c === ">"){
 				this._state = BEFORE_ATTRIBUTE_NAME;
-				this._index--;
+				continue;
 			} else if(!whitespace(c)){
 				this._state = IN_ATTRIBUTE_NAME;
 				this._sectionStart = this._index;
@@ -341,7 +341,7 @@ Tokenizer.prototype.write = function(chunk){
 				this._state = BEFORE_STYLE_1;
 			} else {
 				this._state = IN_TAG_NAME;
-				this._index--; //consume the token again
+				continue; //consume the token again
 			}
 		} else if(this._state === BEFORE_SPECIAL_END){
 			if(this._special === 1 && (c === "c" || c === "C")){
@@ -360,35 +360,35 @@ Tokenizer.prototype.write = function(chunk){
 				this._state = BEFORE_SCRIPT_2;
 			} else {
 				this._state = IN_TAG_NAME;
-				this._index--; //consume the token again
+				continue; //consume the token again
 			}
 		} else if(this._state === BEFORE_SCRIPT_2){
 			if(c === "i" || c === "I"){
 				this._state = BEFORE_SCRIPT_3;
 			} else {
 				this._state = IN_TAG_NAME;
-				this._index--; //consume the token again
+				continue; //consume the token again
 			}
 		} else if(this._state === BEFORE_SCRIPT_3){
 			if(c === "p" || c === "P"){
 				this._state = BEFORE_SCRIPT_4;
 			} else {
 				this._state = IN_TAG_NAME;
-				this._index--; //consume the token again
+				continue; //consume the token again
 			}
 		} else if(this._state === BEFORE_SCRIPT_4){
 			if(c === "t" || c === "T"){
 				this._state = BEFORE_SCRIPT_5;
 			} else {
 				this._state = IN_TAG_NAME;
-				this._index--; //consume the token again
+				continue; //consume the token again
 			}
 		} else if(this._state === BEFORE_SCRIPT_5){
 			if(c === "/" || c === ">" || whitespace(c)){
 				this._special = 1;
 			}
 			this._state = IN_TAG_NAME;
-			this._index--; //consume the token again
+			continue; //consume the token again
 		}
 
 		else if(this._state === AFTER_SCRIPT_1){
@@ -415,7 +415,7 @@ Tokenizer.prototype.write = function(chunk){
 			if(c === ">" || whitespace(c)){
 				this._state = IN_CLOSING_TAG_NAME;
 				this._sectionStart = this._index - 6;
-				this._index--; //reconsume the token
+				continue; //reconsume the token
 			} 
 			else this._state = TEXT;
 		}
@@ -428,28 +428,28 @@ Tokenizer.prototype.write = function(chunk){
 				this._state = BEFORE_STYLE_2;
 			} else {
 				this._state = IN_TAG_NAME;
-				this._index--; //consume the token again
+				continue; //consume the token again
 			}
 		} else if(this._state === BEFORE_STYLE_2){
 			if(c === "l" || c === "L"){
 				this._state = BEFORE_STYLE_3;
 			} else {
 				this._state = IN_TAG_NAME;
-				this._index--; //consume the token again
+				continue; //consume the token again
 			}
 		} else if(this._state === BEFORE_STYLE_3){
 			if(c === "e" || c === "E"){
 				this._state = BEFORE_STYLE_4;
 			} else {
 				this._state = IN_TAG_NAME;
-				this._index--; //consume the token again
+				continue; //consume the token again
 			}
 		} else if(this._state === BEFORE_STYLE_4){
 			if(c === "/" || c === ">" || whitespace(c)){
 				this._special = 2;
 			}
 			this._state = IN_TAG_NAME;
-			this._index--; //consume the token again
+			continue; //consume the token again
 		}
 
 		else if(this._state === AFTER_STYLE_1){
@@ -471,7 +471,7 @@ Tokenizer.prototype.write = function(chunk){
 			if(c === ">" || whitespace(c)){
 				this._state = IN_CLOSING_TAG_NAME;
 				this._sectionStart = this._index - 5;
-				this._index--; //reconsume the token
+				continue; //reconsume the token
 			} 
 			else this._state = TEXT;
 		}

From 7608c11c2417e473ccdbf11268dec4b22843391f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 14 Jun 2013 14:55:52 +0200
Subject: [PATCH 357/450] [bench] removed unnecessary noop functions

---
 tests/99-benchmark.js | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
index e44d76a..af58625 100644
--- a/tests/99-benchmark.js
+++ b/tests/99-benchmark.js
@@ -9,13 +9,8 @@ var multiply = function(text){
 		special: multiply("<script> THIS IS <SPECIAL> </script>"),
 		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
 	},
-	empty = function(){},
 	cbs = {};
 
-require("./test-helper.js").EVENTS.forEach(function(name){
-    cbs["on" + name] = empty;
-});
-
 var parser = new (require("../lib/Parser.js"))(cbs),
 	ben = require("ben");
 
@@ -23,4 +18,4 @@ Object.keys(tests).forEach(function(name){
 	console.log("Test", name, "took", ben(150, function(){
 		parser.parseComplete(tests[name]);
 	}));
-});
\ No newline at end of file
+});

From d00b39155f55ca3df4758d2414baecb71d4acc46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 14 Jun 2013 15:10:46 +0200
Subject: [PATCH 358/450] [tokenizer] improved handling of remaining data

---
 lib/Tokenizer.js | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 5429417..5ef6d14 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -108,7 +108,7 @@ Tokenizer.prototype.write = function(chunk){
 					this._state = IN_PROCESSING_INSTRUCTION;
 					this._sectionStart = this._index + 1;
 				} else if(
-					(!this._options || !this._options.xmlMode) &&
+					!(this._options && this._options.xmlMode) &&
 					(c === "s" || c === "S")
 				){
 					this._state = BEFORE_SPECIAL;
@@ -490,7 +490,9 @@ Tokenizer.prototype.write = function(chunk){
 		this._index = 0;
 	} else {
 		if(this._state === TEXT){
-			this._emitIfToken("ontext");
+			if(this._sectionStart !== this._index){
+				this._cbs.ontext(this._buffer.substr(this._sectionStart));
+			}
 			this._buffer = "";
 			this._index = 0;
 		} else if(this._sectionStart === this._index){
@@ -518,17 +520,20 @@ Tokenizer.prototype.end = function(chunk){
 	if(chunk) this.write(chunk);
 
 	//if there is remaining data, emit it in a reasonable way
-	if(this._buffer === "" || this._sectionStart <= this._index);
-	else if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
-		this._emitToken("oncdata");
-	} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
-		this._emitToken("oncomment");
-	} else if(this._state === IN_TAG_NAME){
-		this._emitToken("onopentagname");
-	} else if(this._state === IN_CLOSING_TAG_NAME){
-		this._emitToken("onclosetag");
-	} else {
-		this._emitToken("ontext");
+	if(this._sectionStart > this._index){
+		var data = this._buffer.substr(this._sectionStart);
+
+		if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
+			this._cbs.oncdata(data);
+		} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
+			this._cbs.oncomment(data);
+		} else if(this._state === IN_TAG_NAME){
+			this._cbs.onopentagname(data);
+		} else if(this._state === IN_CLOSING_TAG_NAME){
+			this._cbs.onclosetag(data);
+		} else {
+			this._cbs.ontext(data);
+		}
 	}
 
 	this._cbs.onend();

From 863183a8651fb9ad05d85220d98e353704c1713e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Sat, 15 Jun 2013 23:36:26 +0300
Subject: [PATCH 359/450] [readme] it~~'~~s

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 603c4c3..58df019 100644
--- a/README.md
+++ b/README.md
@@ -42,7 +42,7 @@ Read more about the parser in the [wiki](https://github.com/fb55/htmlparser2/wik
 ##Get a DOM
 The `DomHandler` (known as `DefaultHandler` in the original `htmlparser` module) produces a DOM (document object model) that can be manipulated using the [`DomUtils`](https://github.com/fb55/DomUtils) helper.
 
-The `DomHandler`, while still bundled with this module, was moved to it's [own module](https://github.com/fb55/domhandler). Have a look at it for further information.
+The `DomHandler`, while still bundled with this module, was moved to its [own module](https://github.com/fb55/domhandler). Have a look at it for further information.
 
 ##Parsing RSS/RDF/Atom Feeds
 

From 77bf0ae47eb9aab5b75c1c5dd65241a0e1307e28 Mon Sep 17 00:00:00 2001
From: Forbes Lindesay <forbes@lindesay.co.uk>
Date: Thu, 20 Jun 2013 07:30:33 +0200
Subject: [PATCH 360/450] Add parseDOM and parseFeed helper methods

---
 lib/index.js | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/lib/index.js b/lib/index.js
index d991259..86d2392 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -5,6 +5,18 @@ function defineProp(name, value){
 }
 
 module.exports = {
+	parseDOM: function (html) {
+		var handler = new module.exports.DomHandler();
+		var parser = new module.exports.Parser(handler);
+		parser.parseComplete(html);
+		return handler.dom;
+	},
+	parseFeed: function (feed) {
+		var handler = new module.exports.FeedHandler();
+		var parser = new module.exports.Parser(handler);
+		parser.parseComplete(feed);
+		return handler.dom;
+	},
 	get Parser(){
 		return defineProp("Parser", require("./Parser.js"));
 	},
@@ -61,4 +73,4 @@ module.exports = {
 		error: 1,
 		end: 0
 	}
-};
\ No newline at end of file
+};

From 16aef006915d17c5f3993ed7e6ace40a1b1b96b7 Mon Sep 17 00:00:00 2001
From: Forbes Lindesay <forbes@lindesay.co.uk>
Date: Thu, 20 Jun 2013 13:56:37 +0200
Subject: [PATCH 361/450] Add link to live demo

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 58df019..e961e7d 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,8 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 
 ##Installing
 	npm install htmlparser2
+	
+A live demo of htmlparser2 is available at http://htmlparser.forbeslindesay.co.uk/
 
 ##Usage
 

From b00177f4851ee2fe88a71e9ca474461f9ee23b5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Sun, 23 Jun 2013 12:27:27 +0300
Subject: [PATCH 362/450] [parser] default options & cbs to empty objects

fixes #57
---
 lib/Parser.js | 54 +++++++++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index de0c442..143b46f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -1,31 +1,27 @@
 var Tokenizer = require("./Tokenizer.js");
 
-var defaultOpts = {
-	xmlMode: false, //Special behavior for script/style tags by default
-	lowerCaseAttributeNames: false, //call .toLowerCase for each attribute name
-	lowerCaseTags: false //call .toLowerCase for each tag name
-};
-
-var defaultCbs = {
-	/*
-		This is just a plain object
-		so that the parser doesn't
-		throw if no arguments were
-		provided.
-	*/
-	/*
-		oncdataend,
-		oncdatastart,
-		onclosetag,
-		oncomment,
-		oncommentend,
-		onerror,
-		onopentag,
-		onprocessinginstruction,
-		onreset,
-		ontext
-	*/
-};
+/*
+	Options:
+	
+	xmlMode: Special behavior for script/style tags (true by default)
+	lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)
+	lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)
+*/
+
+/*
+	Callbacks:
+	
+	oncdataend,
+	oncdatastart,
+	onclosetag,
+	oncomment,
+	oncommentend,
+	onerror,
+	onopentag,
+	onprocessinginstruction,
+	onreset,
+	ontext
+*/
 
 var formTags = {
 	input: true,
@@ -77,10 +73,8 @@ var voidElements = {
 };
 
 function Parser(cbs, options){
-	if(!options) options = defaultOpts;
-	if(!cbs) cbs = defaultCbs;
-	this._options = options;
-	this._cbs = cbs;
+	this._options = options || {};
+	this._cbs = cbs || {};
 
 	this._tagname = "";
 	this._attribname = "";

From 529f7273402157584d0a2636c9e5c4c611e3189f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 23 Jun 2013 11:36:27 +0200
Subject: [PATCH 363/450] 3.1.4

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index e80e64a..bc73ecc 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.1.3",
+	"version": "3.1.4",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 9f54942c29c8527898f4a8993474b201fc0b3057 Mon Sep 17 00:00:00 2001
From: Zach Smith <zach@amicushq.com>
Date: Fri, 19 Jul 2013 11:30:18 -0400
Subject: [PATCH 364/450] [tokenizer] fix case where `<` followed by whitespace
 doesn't parse correctly

---
 lib/Tokenizer.js                   |  2 +-
 tests/Events/15-lt-whitespace.json | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 tests/Events/15-lt-whitespace.json

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 5ef6d14..75003e7 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -97,7 +97,7 @@ Tokenizer.prototype.write = function(chunk){
 		} else if(this._state === BEFORE_TAG_NAME){
 			if(c === "/"){
 				this._state = BEFORE_CLOSING_TAG_NAME;
-			} else if(c === ">" || this._special > 0) {
+			} else if(c === ">" || this._special > 0 || whitespace(c)) {
 				this._state = TEXT;
 			} else {
 				if(whitespace(c));
diff --git a/tests/Events/15-lt-whitespace.json b/tests/Events/15-lt-whitespace.json
new file mode 100644
index 0000000..aae6eb0
--- /dev/null
+++ b/tests/Events/15-lt-whitespace.json
@@ -0,0 +1,16 @@
+{
+  "name": "lt followed by whitespace",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "a < b",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "a < b"
+      ]
+    }
+  ]
+}

From 830c1570febfe165c07aabb4ab01bb3724782d62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 21 Jul 2013 21:02:29 +0200
Subject: [PATCH 365/450] 3.1.5

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index bc73ecc..0be5a0c 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.1.4",
+	"version": "3.1.5",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From a6b68654cdc08ead2c04d0d91386cd014f845149 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Sun, 21 Jul 2013 21:08:51 +0200
Subject: [PATCH 366/450] [parser] don't overwrite attribute values on second
 occurence

as described in #42
---
 lib/Parser.js | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 143b46f..9dbe44f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -174,7 +174,12 @@ Parser.prototype.onattribname = function(name){
 
 Parser.prototype.onattribvalue = function attribValue(value){
 	if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, value);
-	if(this._attribs) this._attribs[this._attribname] = value;
+	if(
+		this._attribs &&
+		!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
+	){
+		this._attribs[this._attribname] = value;
+	}
 	this._attribname = "";
 };
 

From 4d5615712a0a97d9e8c379bc4a49a3d80e2fde72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Sun, 21 Jul 2013 21:16:59 +0200
Subject: [PATCH 367/450] [readme] behavior of example changed due to #58

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e961e7d..9a7d218 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ var parser = new htmlparser.Parser({
 		}
 	}
 });
-parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';< /  script>");
+parser.write("Xyz <script type='text/javascript'>var foo = '<<bar>>';</ script>");
 parser.end();
 ```
 

From ca311d44a472143311571e0c200aeb748475f6e8 Mon Sep 17 00:00:00 2001
From: ForbesLindesay <forbes@lindesay.co.uk>
Date: Sun, 21 Jul 2013 21:55:08 +0100
Subject: [PATCH 368/450] Add .gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0db216b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+npm-debug.log
+node_modules

From 909a3f1a63ae1b7ccfbdf2c43e3d5ee7a00bef54 Mon Sep 17 00:00:00 2001
From: ForbesLindesay <forbes@lindesay.co.uk>
Date: Sun, 21 Jul 2013 22:12:56 +0100
Subject: [PATCH 369/450] Add .gitattributes so tests still work on windows

---
 .gitattributes | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..4bb50dc
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text eol=lf
\ No newline at end of file

From f6f93ef5226854fad5e7f48f3c113f2afa013e9d Mon Sep 17 00:00:00 2001
From: ForbesLindesay <forbes@lindesay.co.uk>
Date: Sun, 21 Jul 2013 22:15:08 +0100
Subject: [PATCH 370/450] Normalize line endings

---
 tests/99-benchmark.js | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
index af58625..d569e42 100644
--- a/tests/99-benchmark.js
+++ b/tests/99-benchmark.js
@@ -1,21 +1,21 @@
-var multiply = function(text){
-		return Array(5e3+1).join(text);
-	},
-	tests = {
-		self_closing: multiply("<br/>"),
-		tag: multiply("<tag foo=bar foobar> Text </tag>"),
-		comment: multiply("<!-- this is <<a> comment -->"),
-		directive: multiply("<?foo bar?>"),
-		special: multiply("<script> THIS IS <SPECIAL> </script>"),
-		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
-	},
-	cbs = {};
-
-var parser = new (require("../lib/Parser.js"))(cbs),
-	ben = require("ben");
-
-Object.keys(tests).forEach(function(name){
-	console.log("Test", name, "took", ben(150, function(){
-		parser.parseComplete(tests[name]);
-	}));
-});
+var multiply = function(text){
+		return Array(5e3+1).join(text);
+	},
+	tests = {
+		self_closing: multiply("<br/>"),
+		tag: multiply("<tag foo=bar foobar> Text </tag>"),
+		comment: multiply("<!-- this is <<a> comment -->"),
+		directive: multiply("<?foo bar?>"),
+		special: multiply("<script> THIS IS <SPECIAL> </script>"),
+		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
+	},
+	cbs = {};
+
+var parser = new (require("../lib/Parser.js"))(cbs),
+	ben = require("ben");
+
+Object.keys(tests).forEach(function(name){
+	console.log("Test", name, "took", ben(150, function(){
+		parser.parseComplete(tests[name]);
+	}));
+});

From 263775f6713757ea8bd9289e17cc60b8355736fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 30 Jul 2013 16:30:51 +0200
Subject: [PATCH 371/450] [tokenizer] recognize the form field (U+0C), drop the
 carriage return (U+0D)

to be inline with the HTML5 spec

(recognized in MatthewMueller/cheerio#242)
---
 lib/Tokenizer.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 75003e7..2429841 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -68,7 +68,7 @@ var i = 0,
 
 
 function whitespace(c){
-	return c === " " || c === "\t" || c === "\r" || c === "\n";
+	return c === " " || c === "\n" || c === "\t" || c === "\f";
 }
 
 function Tokenizer(options, cbs){

From f8ddbe67f17ea29f7d55de6cda8990f33531ad6f Mon Sep 17 00:00:00 2001
From: Andreas Madsen <amwebdk@gmail.com>
Date: Thu, 1 Aug 2013 19:43:36 +0200
Subject: [PATCH 372/450] [Tokenizer] move if context to methods allowing
 .write to be optimized

---
 lib/Tokenizer.js | 829 +++++++++++++++++++++++++++++------------------
 1 file changed, 508 insertions(+), 321 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 2429841..a43c99f 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -80,433 +80,620 @@ function Tokenizer(options, cbs){
 	this._special = 0; // 1 for script, 2 for style
 	this._cbs = cbs;
 	this._running = true;
+	this._reconsume = false;
+	this._xmlMode = this._options && this._options.xmlMode;
 }
 
+Tokenizer.prototype._stateText = function (c) {
+	if(c === "<"){
+		this._emitIfToken("ontext");
+		this._state = BEFORE_TAG_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateBeforeTagName = function (c) {
+	if(c === "/"){
+		this._state = BEFORE_CLOSING_TAG_NAME;
+	} else if(c === ">" || this._special > 0 || whitespace(c)) {
+		this._state = TEXT;
+	} else if(whitespace(c)) {
+		// skip
+	} else if(c === "!"){
+		this._state = BEFORE_DECLARATION;
+		this._sectionStart = this._index + 1;
+	} else if(c === "?"){
+		this._state = IN_PROCESSING_INSTRUCTION;
+		this._sectionStart = this._index + 1;
+	} else if(!this._xmlMode && (c === "s" || c === "S")){
+		this._state = BEFORE_SPECIAL;
+		this._sectionStart = this._index;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInTagName = function (c) {
+	if(c === "/"){
+		this._emitToken("onopentagname");
+		this._cbs.onselfclosingtag();
+		this._state = AFTER_CLOSING_TAG_NAME;
+	} else if(c === ">"){
+		this._emitToken("onopentagname");
+		this._cbs.onopentagend();
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	} else if(whitespace(c)){
+		this._emitToken("onopentagname");
+		this._state = BEFORE_ATTRIBUTE_NAME;
+	}
+};
+
+Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
+	if(whitespace(c));
+	else if(c === ">"){
+		this._state = TEXT;
+	} else if(this._special > 0){
+		if(c === "s" || c === "S"){
+			this._state = BEFORE_SPECIAL_END;
+		} else {
+			this._state = TEXT;
+			this._reconsume = true;
+		}
+	} else {
+		this._state = IN_CLOSING_TAG_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInCloseingTagName = function (c) {
+	if(c === ">"){
+		this._emitToken("onclosetag");
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+		this._special = 0;
+	} else if(whitespace(c)){
+		this._emitToken("onclosetag");
+		this._state = AFTER_CLOSING_TAG_NAME;
+		this._special = 0;
+	}
+};
+
+Tokenizer.prototype._stateAfterCloseingTagName = function (c) {
+	//skip everything until ">"
+	if(c === ">"){
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	}
+};
+
+Tokenizer.prototype._stateBeforeAttributeName = function (c) {
+	if(c === ">"){
+		this._state = TEXT;
+		this._cbs.onopentagend();
+		this._sectionStart = this._index + 1;
+	} else if(c === "/"){
+		this._cbs.onselfclosingtag();
+		this._state = AFTER_CLOSING_TAG_NAME;
+	} else if(!whitespace(c)){
+		this._state = IN_ATTRIBUTE_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInAttributeName = function (c) {
+	if(c === "="){
+		this._emitIfToken("onattribname");
+		this._state = BEFORE_ATTRIBUTE_VALUE;
+	} else if(whitespace(c)){
+		this._emitIfToken("onattribname");
+		this._state = AFTER_ATTRIBUTE_NAME;
+	} else if(c === "/" || c === ">"){
+		this._emitIfToken("onattribname");
+		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._reconsume = true;
+	}
+};
+
+Tokenizer.prototype._stateAfterAttributeName = function (c) {
+	if(c === "="){
+		this._state = BEFORE_ATTRIBUTE_VALUE;
+	} else if(c === "/" || c === ">"){
+		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._reconsume = true;
+	} else if(!whitespace(c)){
+		this._state = IN_ATTRIBUTE_NAME;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateBeforeAttributeValue = function (c) {
+	if(c === "\""){
+		this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
+		this._sectionStart = this._index + 1;
+	} else if(c === "'"){
+		this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
+		this._sectionStart = this._index + 1;
+	} else if(!whitespace(c)){
+		this._state = IN_ATTRIBUTE_VALUE_NO_QUOTES;
+		this._sectionStart = this._index;
+	}
+};
+
+Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
+	if(c === "\""){
+		this._emitToken("onattribvalue");
+		this._state = BEFORE_ATTRIBUTE_NAME;
+	}
+};
+
+Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
+	if(c === "'"){
+		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._emitToken("onattribvalue");
+	}
+};
+
+Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
+	if(c === ">"){
+		this._emitToken("onattribvalue");
+		this._state = TEXT;
+		this._cbs.onopentagend();
+		this._sectionStart = this._index + 1;
+	} else if(whitespace(c)){
+		this._emitToken("onattribvalue");
+		this._state = BEFORE_ATTRIBUTE_NAME;
+	}
+};
+
+Tokenizer.prototype._stateBeforeDeclaration = function (c) {
+	if(c === "[") this._state = BEFORE_CDATA_1;
+	else if(c === "-") this._state = BEFORE_COMMENT;
+	else this._state = IN_DECLARATION;
+};
+
+Tokenizer.prototype._stateInDeclaration = function (c) {
+	if(c === ">"){
+		this._emitToken("ondeclaration");
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	}
+};
+
+Tokenizer.prototype._stateInProcessingInstruction = function (c) {
+	if(c === ">"){
+		this._emitToken("onprocessinginstruction");
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	}
+};
+
+Tokenizer.prototype._stateBeforeComment = function (c) {
+	if(c === "-"){
+		this._state = IN_COMMENT;
+		this._sectionStart = this._index + 1;
+	} else {
+		this._state = IN_DECLARATION;
+	}
+};
+
+Tokenizer.prototype._stateInComment = function (c) {
+	if(c === "-") this._state = AFTER_COMMENT_1;
+};
+
+Tokenizer.prototype._stateAfterComment1 = function (c) {
+	if(c === "-") this._state = AFTER_COMMENT_2;
+	else this._state = IN_COMMENT;
+};
+
+Tokenizer.prototype._stateAfterComment2 = function (c) {
+	if(c === ">"){
+		//remove 2 trailing chars
+		this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	} else if (c !== "-") {
+		this._state = IN_COMMENT;
+	}
+	// else: stay in AFTER_COMMENT_2 (`--->`)
+};
+
+Tokenizer.prototype._stateBeforeCdata1 = function (c) {
+	if(c === "C") this._state = BEFORE_CDATA_2;
+	else this._state = IN_DECLARATION;
+};
+
+Tokenizer.prototype._stateBeforeCdata2 = function (c) {
+	if(c === "D") this._state = BEFORE_CDATA_3;
+	else this._state = IN_DECLARATION;
+};
+
+Tokenizer.prototype._stateBeforeCdata3 = function (c) {
+	if(c === "A") this._state = BEFORE_CDATA_4;
+	else this._state = IN_DECLARATION;
+};
+
+Tokenizer.prototype._stateBeforeCdata4 = function (c) {
+	if(c === "T") this._state = BEFORE_CDATA_5;
+	else this._state = IN_DECLARATION;
+};
+
+Tokenizer.prototype._stateBeforeCdata5 = function (c) {
+	if(c === "A") this._state = BEFORE_CDATA_6;
+	else this._state = IN_DECLARATION;
+};
+
+Tokenizer.prototype._stateBeforeCdata6 = function (c) {
+	if(c === "["){
+		this._state = IN_CDATA;
+		this._sectionStart = this._index + 1;
+	} else {
+		this._state = IN_DECLARATION;
+	}
+};
+
+Tokenizer.prototype._stateInCdata = function (c) {
+	if(c === "]") this._state = AFTER_CDATA_1;
+};
+
+Tokenizer.prototype._stateAfterCdata1 = function (c) {
+	if(c === "]") this._state = AFTER_CDATA_2;
+	else this._state = IN_CDATA;
+};
+
+Tokenizer.prototype._stateAfterCdata2 = function (c) {
+	if(c === ">"){
+		//remove 2 trailing chars
+		this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+	} else if (c !== "]") {
+		this._state = IN_CDATA;
+	}
+	//else: stay in AFTER_CDATA_2 (`]]]>`)
+};
+
+Tokenizer.prototype._stateBeforeSpecial = function (c) {
+  if(c === "c" || c === "C"){
+		this._state = BEFORE_SCRIPT_1;
+	} else if(c === "t" || c === "T"){
+		this._state = BEFORE_STYLE_1;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._reconsume = true; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeSpecialEnd = function (c) {
+	if(this._special === 1 && (c === "c" || c === "C")){
+		this._state = AFTER_SCRIPT_1;
+	} else if(this._special === 2 && (c === "t" || c === "T")){
+		this._state = AFTER_STYLE_1;
+	}
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateBeforeScript1 = function (c) {
+	if(c === "r" || c === "R"){
+		this._state = BEFORE_SCRIPT_2;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._reconsume = true; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeScript2 = function (c) {
+	if(c === "i" || c === "I"){
+		this._state = BEFORE_SCRIPT_3;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._reconsume = true; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeScript3 = function (c) {
+	if(c === "p" || c === "P"){
+		this._state = BEFORE_SCRIPT_4;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._reconsume = true; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeScript4 = function (c) {
+	if(c === "t" || c === "T"){
+		this._state = BEFORE_SCRIPT_5;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._reconsume = true; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeScript5 = function (c) {
+	if(c === "/" || c === ">" || whitespace(c)){
+		this._special = 1;
+	}
+	this._state = IN_TAG_NAME;
+	this._reconsume = true; //consume the token again
+};
+
+Tokenizer.prototype._stateAfterScript1 = function (c) {
+	if(c === "r" || c === "R") this._state = AFTER_SCRIPT_2;
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateAfterScript2 = function (c) {
+	if(c === "i" || c === "I") this._state = AFTER_SCRIPT_3;
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateAfterScript3 = function (c) {
+	if(c === "p" || c === "P") this._state = AFTER_SCRIPT_4;
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateAfterScript4 = function (c) {
+	if(c === "t" || c === "T") this._state = AFTER_SCRIPT_5;
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateAfterScript5 = function (c) {
+	if(c === ">" || whitespace(c)){
+		this._state = IN_CLOSING_TAG_NAME;
+		this._sectionStart = this._index - 6;
+		this._reconsume = true; //reconsume the token
+	}
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateBeforeStyle1 = function (c) {
+	if(c === "y" || c === "Y"){
+		this._state = BEFORE_STYLE_2;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._reconsume = true; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeStyle2 = function (c) {
+	if(c === "l" || c === "L"){
+		this._state = BEFORE_STYLE_3;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._reconsume = true; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeStyle3 = function (c) {
+	if(c === "e" || c === "E"){
+		this._state = BEFORE_STYLE_4;
+	} else {
+		this._state = IN_TAG_NAME;
+		this._reconsume = true; //consume the token again
+	}
+};
+
+Tokenizer.prototype._stateBeforeStyle4 = function (c) {
+	if(c === "/" || c === ">" || whitespace(c)){
+		this._special = 2;
+	}
+	this._state = IN_TAG_NAME;
+	this._reconsume = true; //consume the token again
+};
+
+Tokenizer.prototype._stateAfterStyle1 = function (c) {
+	if(c === "y" || c === "Y") this._state = AFTER_STYLE_2;
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateAfterStyle2 = function (c) {
+	if(c === "l" || c === "L") this._state = AFTER_STYLE_3;
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateAfterStyle3 = function (c) {
+	if(c === "e" || c === "E") this._state = AFTER_STYLE_4;
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._stateAfterStyle4 = function (c) {
+	if(c === ">" || whitespace(c)){
+		this._state = IN_CLOSING_TAG_NAME;
+		this._sectionStart = this._index - 5;
+		this._reconsume = true; //reconsume the token
+	}
+	else this._state = TEXT;
+};
+
+Tokenizer.prototype._cleanup = function () {
+  if(this._sectionStart === -1){
+		this._buffer = "";
+		this._index = 0;
+	} else {
+		if(this._state === TEXT){
+			if(this._sectionStart !== this._index){
+				this._cbs.ontext(this._buffer.substr(this._sectionStart));
+			}
+			this._buffer = "";
+			this._index = 0;
+		} else if(this._sectionStart === this._index){
+			//the section just started
+			this._buffer = "";
+			this._index = 0;
+		} else if(this._sectionStart > 0){
+			//remove everything unnecessary
+			this._buffer = this._buffer.substr(this._sectionStart);
+			this._index -= this._sectionStart;
+		}
+
+		this._sectionStart = 0;
+	}
+};
+
 //TODO make events conditional
 Tokenizer.prototype.write = function(chunk){
 	this._buffer += chunk;
 
 	while(this._index < this._buffer.length && this._running){
 		var c = this._buffer.charAt(this._index);
-		if(this._state === TEXT){
-			if(c === "<"){
-				this._emitIfToken("ontext");
-				this._state = BEFORE_TAG_NAME;
-				this._sectionStart = this._index;
-			}
-		} else if(this._state === BEFORE_TAG_NAME){
-			if(c === "/"){
-				this._state = BEFORE_CLOSING_TAG_NAME;
-			} else if(c === ">" || this._special > 0 || whitespace(c)) {
-				this._state = TEXT;
-			} else {
-				if(whitespace(c));
-				else if(c === "!"){
-					this._state = BEFORE_DECLARATION;
-					this._sectionStart = this._index + 1;
-				} else if(c === "?"){
-					this._state = IN_PROCESSING_INSTRUCTION;
-					this._sectionStart = this._index + 1;
-				} else if(
-					!(this._options && this._options.xmlMode) &&
-					(c === "s" || c === "S")
-				){
-					this._state = BEFORE_SPECIAL;
-					this._sectionStart = this._index;
-				} else {
-					this._state = IN_TAG_NAME;
-					this._sectionStart = this._index;
-				}
-			}
-		} else if(this._state === IN_TAG_NAME){
-			if(c === "/"){
-				this._emitToken("onopentagname");
-				this._cbs.onselfclosingtag();
-				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(c === ">"){
-				this._emitToken("onopentagname");
-				this._cbs.onopentagend();
-				this._state = TEXT;
-				this._sectionStart = this._index + 1;
-			} else if(whitespace(c)){
-				this._emitToken("onopentagname");
-				this._state = BEFORE_ATTRIBUTE_NAME;
-			}
+		if(this._state === TEXT) {
+      this._stateText(c);
+    } else if(this._state === BEFORE_TAG_NAME){
+      this._stateBeforeTagName(c);
+		} else if(this._state === IN_TAG_NAME) {
+      this._stateInTagName(c);
 		} else if(this._state === BEFORE_CLOSING_TAG_NAME){
-			if(whitespace(c));
-			else if(c === ">"){
-				this._state = TEXT;
-			} else if(this._special > 0){
-				if(c === "s" || c === "S"){
-					this._state = BEFORE_SPECIAL_END;
-				} else {
-					this._state = TEXT;
-					continue;
-				}
-			} else {
-				this._state = IN_CLOSING_TAG_NAME;
-				this._sectionStart = this._index;
-			}
+      this._stateBeforeCloseingTagName(c);
 		} else if(this._state === IN_CLOSING_TAG_NAME){
-			if(c === ">"){
-				this._emitToken("onclosetag");
-				this._state = TEXT;
-				this._sectionStart = this._index + 1;
-				this._special = 0;
-			} else if(whitespace(c)){
-				this._emitToken("onclosetag");
-				this._state = AFTER_CLOSING_TAG_NAME;
-				this._special = 0;
-			}
+      this._stateInCloseingTagName(c);
 		} else if(this._state === AFTER_CLOSING_TAG_NAME){
-			//skip everything until ">"
-			if(c === ">"){
-				this._state = TEXT;
-				this._sectionStart = this._index + 1;
-			}
+      this._stateAfterCloseingTagName(c);
 		}
 
 		/*
 		*	attributes
 		*/
 		else if(this._state === BEFORE_ATTRIBUTE_NAME){
-			if(c === ">"){
-				this._state = TEXT;
-				this._cbs.onopentagend();
-				this._sectionStart = this._index + 1;
-			} else if(c === "/"){
-				this._cbs.onselfclosingtag();
-				this._state = AFTER_CLOSING_TAG_NAME;
-			} else if(!whitespace(c)){
-				this._state = IN_ATTRIBUTE_NAME;
-				this._sectionStart = this._index;
-			}
+      this._stateBeforeAttributeName(c);
 		} else if(this._state === IN_ATTRIBUTE_NAME){
-			if(c === "="){
-				this._emitIfToken("onattribname");
-				this._state = BEFORE_ATTRIBUTE_VALUE;
-			} else if(whitespace(c)){
-				this._emitIfToken("onattribname");
-				this._state = AFTER_ATTRIBUTE_NAME;
-			} else if(c === "/" || c === ">"){
-				this._emitIfToken("onattribname");
-				this._state = BEFORE_ATTRIBUTE_NAME;
-				continue;
-			}
+      this._stateInAttributeName(c);
 		} else if(this._state === AFTER_ATTRIBUTE_NAME){
-			if(c === "="){
-				this._state = BEFORE_ATTRIBUTE_VALUE;
-			} else if(c === "/" || c === ">"){
-				this._state = BEFORE_ATTRIBUTE_NAME;
-				continue;
-			} else if(!whitespace(c)){
-				this._state = IN_ATTRIBUTE_NAME;
-				this._sectionStart = this._index;
-			}
+      this._stateAfterAttributeName(c);
 		} else if(this._state === BEFORE_ATTRIBUTE_VALUE){
-			if(c === "\""){
-				this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
-				this._sectionStart = this._index + 1;
-			} else if(c === "'"){
-				this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
-				this._sectionStart = this._index + 1;
-			} else if(!whitespace(c)){
-				this._state = IN_ATTRIBUTE_VALUE_NO_QUOTES;
-				this._sectionStart = this._index;
-			}
+      this._stateBeforeAttributeValue(c);
 		} else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){
-			if(c === "\""){
-				this._emitToken("onattribvalue");
-				this._state = BEFORE_ATTRIBUTE_NAME;
-			}
+      this._stateInAttributeValueDoubleQuotes(c);
 		} else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){
-			if(c === "'"){
-				this._state = BEFORE_ATTRIBUTE_NAME;	
-				this._emitToken("onattribvalue");
-			}
+      this._stateInAttributeValueSingleQuotes(c);
 		} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
-			if(c === ">"){
-				this._emitToken("onattribvalue");
-				this._state = TEXT;
-				this._cbs.onopentagend();
-				this._sectionStart = this._index + 1;
-			} else if(whitespace(c)){
-				this._emitToken("onattribvalue");
-				this._state = BEFORE_ATTRIBUTE_NAME;
-			}
+      this._stateInAttributeValueNoQuotes(c);
 		}
 
 		/*
 		*	declarations
 		*/
 		else if(this._state === BEFORE_DECLARATION){
-			if(c === "[") this._state = BEFORE_CDATA_1;
-			else if(c === "-") this._state = BEFORE_COMMENT;
-			else this._state = IN_DECLARATION;
+      this._stateBeforeDeclaration(c);
 		} else if(this._state === IN_DECLARATION){
-			if(c === ">"){
-				this._emitToken("ondeclaration");
-				this._state = TEXT;
-				this._sectionStart = this._index + 1;
-			}
+			this._stateInDeclaration(c);
 		}
 
 		/*
 		*	processing instructions
 		*/
 		else if(this._state === IN_PROCESSING_INSTRUCTION){
-			if(c === ">"){
-				this._emitToken("onprocessinginstruction");
-				this._state = TEXT;
-				this._sectionStart = this._index + 1;
-			}
+			this._stateInProcessingInstruction(c);
 		}
 
 		/*
 		*	comments
 		*/
 		else if(this._state === BEFORE_COMMENT){
-			if(c === "-"){
-				this._state = IN_COMMENT;
-				this._sectionStart = this._index + 1;
-			} else {
-				this._state = IN_DECLARATION;
-			}
+      this._stateBeforeComment(c);
 		} else if(this._state === IN_COMMENT){
-			if(c === "-") this._state = AFTER_COMMENT_1;
+			this._stateInComment(c);
 		} else if(this._state === AFTER_COMMENT_1){
-			if(c === "-") this._state = AFTER_COMMENT_2;
-			else this._state = IN_COMMENT;
+			this._stateAfterComment1(c);
 		} else if(this._state === AFTER_COMMENT_2){
-			if(c === ">"){
-				//remove 2 trailing chars
-				this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
-				this._state = TEXT;
-				this._sectionStart = this._index + 1;
-			} else if (c !== "-") {
-				this._state = IN_COMMENT;
-			}
-			// else: stay in AFTER_COMMENT_2 (`--->`)
+      this._stateAfterComment2(c);
 		}
 
 		/*
 		*	cdata
 		*/
 		else if(this._state === BEFORE_CDATA_1){
-			if(c === "C") this._state = BEFORE_CDATA_2;
-			else this._state = IN_DECLARATION;
+			this._stateBeforeCdata1(c);
 		} else if(this._state === BEFORE_CDATA_2){
-			if(c === "D") this._state = BEFORE_CDATA_3;
-			else this._state = IN_DECLARATION;
+			this._stateBeforeCdata2(c);
 		} else if(this._state === BEFORE_CDATA_3){
-			if(c === "A") this._state = BEFORE_CDATA_4;
-			else this._state = IN_DECLARATION;
+			this._stateBeforeCdata3(c);
 		} else if(this._state === BEFORE_CDATA_4){
-			if(c === "T") this._state = BEFORE_CDATA_5;
-			else this._state = IN_DECLARATION;
+			this._stateBeforeCdata4(c);
 		} else if(this._state === BEFORE_CDATA_5){
-			if(c === "A") this._state = BEFORE_CDATA_6;
-			else this._state = IN_DECLARATION;
+			this._stateBeforeCdata5(c);
 		} else if(this._state === BEFORE_CDATA_6){
-			if(c === "["){
-				this._state = IN_CDATA;
-				this._sectionStart = this._index + 1;
-			} else {
-				this._state = IN_DECLARATION;
-			}
+			this._stateBeforeCdata6(c);
 		} else if(this._state === IN_CDATA){
-			if(c === "]") this._state = AFTER_CDATA_1;
+			this._stateInCdata(c);
 		} else if(this._state === AFTER_CDATA_1){
-			if(c === "]") this._state = AFTER_CDATA_2;
-			else this._state = IN_CDATA;
+			this._stateAfterCdata1(c);
 		} else if(this._state === AFTER_CDATA_2){
-			if(c === ">"){
-				//remove 2 trailing chars
-				this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
-				this._state = TEXT;
-				this._sectionStart = this._index + 1;
-			} else if (c !== "]") {
-				this._state = IN_CDATA;
-			}
-			//else: stay in AFTER_CDATA_2 (`]]]>`)
+      this._stateAfterCdata2(c);
 		}
 
 		/*
 		* special tags
 		*/
 		else if(this._state === BEFORE_SPECIAL){
-			if(c === "c" || c === "C"){
-				this._state = BEFORE_SCRIPT_1;
-			} else if(c === "t" || c === "T"){
-				this._state = BEFORE_STYLE_1;
-			} else {
-				this._state = IN_TAG_NAME;
-				continue; //consume the token again
-			}
+			this._stateBeforeSpecial(c);
 		} else if(this._state === BEFORE_SPECIAL_END){
-			if(this._special === 1 && (c === "c" || c === "C")){
-				this._state = AFTER_SCRIPT_1;
-			} else if(this._special === 2 && (c === "t" || c === "T")){
-				this._state = AFTER_STYLE_1;
-			} 
-			else this._state = TEXT;
+      this._stateBeforeSpecialEnd(c);
 		}
 
 		/*
 		* script
 		*/
 		else if(this._state === BEFORE_SCRIPT_1){
-			if(c === "r" || c === "R"){
-				this._state = BEFORE_SCRIPT_2;
-			} else {
-				this._state = IN_TAG_NAME;
-				continue; //consume the token again
-			}
+      this._stateBeforeScript1(c);
 		} else if(this._state === BEFORE_SCRIPT_2){
-			if(c === "i" || c === "I"){
-				this._state = BEFORE_SCRIPT_3;
-			} else {
-				this._state = IN_TAG_NAME;
-				continue; //consume the token again
-			}
+      this._stateBeforeScript2(c);
 		} else if(this._state === BEFORE_SCRIPT_3){
-			if(c === "p" || c === "P"){
-				this._state = BEFORE_SCRIPT_4;
-			} else {
-				this._state = IN_TAG_NAME;
-				continue; //consume the token again
-			}
+      this._stateBeforeScript3(c);
 		} else if(this._state === BEFORE_SCRIPT_4){
-			if(c === "t" || c === "T"){
-				this._state = BEFORE_SCRIPT_5;
-			} else {
-				this._state = IN_TAG_NAME;
-				continue; //consume the token again
-			}
+      this._stateBeforeScript4(c);
 		} else if(this._state === BEFORE_SCRIPT_5){
-			if(c === "/" || c === ">" || whitespace(c)){
-				this._special = 1;
-			}
-			this._state = IN_TAG_NAME;
-			continue; //consume the token again
+      this._stateBeforeScript5(c);
 		}
 
 		else if(this._state === AFTER_SCRIPT_1){
-			if(c === "r" || c === "R"){
-				this._state = AFTER_SCRIPT_2;
-			} 
-			else this._state = TEXT;
+      this._stateAfterScript1(c);
 		} else if(this._state === AFTER_SCRIPT_2){
-			if(c === "i" || c === "I"){
-				this._state = AFTER_SCRIPT_3;
-			} 
-			else this._state = TEXT;
+      this._stateAfterScript2(c);
 		} else if(this._state === AFTER_SCRIPT_3){
-			if(c === "p" || c === "P"){
-				this._state = AFTER_SCRIPT_4;
-			} 
-			else this._state = TEXT;
+      this._stateAfterScript3(c);
 		} else if(this._state === AFTER_SCRIPT_4){
-			if(c === "t" || c === "T"){
-				this._state = AFTER_SCRIPT_5;
-			} 
-			else this._state = TEXT;
+      this._stateAfterScript4(c);
 		} else if(this._state === AFTER_SCRIPT_5){
-			if(c === ">" || whitespace(c)){
-				this._state = IN_CLOSING_TAG_NAME;
-				this._sectionStart = this._index - 6;
-				continue; //reconsume the token
-			} 
-			else this._state = TEXT;
+      this._stateAfterScript5(c);
 		}
 
 		/*
 		* style
 		*/
 		else if(this._state === BEFORE_STYLE_1){
-			if(c === "y" || c === "Y"){
-				this._state = BEFORE_STYLE_2;
-			} else {
-				this._state = IN_TAG_NAME;
-				continue; //consume the token again
-			}
+      this._stateBeforeStyle1(c);
 		} else if(this._state === BEFORE_STYLE_2){
-			if(c === "l" || c === "L"){
-				this._state = BEFORE_STYLE_3;
-			} else {
-				this._state = IN_TAG_NAME;
-				continue; //consume the token again
-			}
+      this._stateBeforeStyle2(c);
 		} else if(this._state === BEFORE_STYLE_3){
-			if(c === "e" || c === "E"){
-				this._state = BEFORE_STYLE_4;
-			} else {
-				this._state = IN_TAG_NAME;
-				continue; //consume the token again
-			}
+      this._stateBeforeStyle3(c);
 		} else if(this._state === BEFORE_STYLE_4){
-			if(c === "/" || c === ">" || whitespace(c)){
-				this._special = 2;
-			}
-			this._state = IN_TAG_NAME;
-			continue; //consume the token again
+      this._stateBeforeStyle4(c);
 		}
 
 		else if(this._state === AFTER_STYLE_1){
-			if(c === "y" || c === "Y"){
-				this._state = AFTER_STYLE_2;
-			} 
-			else this._state = TEXT;
+      this._stateAfterStyle1(c);
 		} else if(this._state === AFTER_STYLE_2){
-			if(c === "l" || c === "L"){
-				this._state = AFTER_STYLE_3;
-			} 
-			else this._state = TEXT;
+      this._stateAfterStyle2(c);
 		} else if(this._state === AFTER_STYLE_3){
-			if(c === "e" || c === "E"){
-				this._state = AFTER_STYLE_4;
-			} 
-			else this._state = TEXT;
+      this._stateAfterStyle3(c);
 		} else if(this._state === AFTER_STYLE_4){
-			if(c === ">" || whitespace(c)){
-				this._state = IN_CLOSING_TAG_NAME;
-				this._sectionStart = this._index - 5;
-				continue; //reconsume the token
-			} 
-			else this._state = TEXT;
+      this._stateAfterStyle4(c);
 		}
 
-
 		else {
-			this._cbs.onerror(Error("unknown state"), this._state);
+			this._cbs.onerror(Error("unknown _state"), this._state);
 		}
 
-		this._index++;
+    if (this._reconsume) {
+      this._reconsume = false;
+    } else {
+      this._index++;
+    }
 	}
 
 	//cleanup
-	if(this._sectionStart === -1){
-		this._buffer = "";
-		this._index = 0;
-	} else {
-		if(this._state === TEXT){
-			if(this._sectionStart !== this._index){
-				this._cbs.ontext(this._buffer.substr(this._sectionStart));
-			}
-			this._buffer = "";
-			this._index = 0;
-		} else if(this._sectionStart === this._index){
-			//the section just started
-			this._buffer = "";
-			this._index = 0;
-		} else if(this._sectionStart > 0){
-			//remove everything unnecessary
-			this._buffer = this._buffer.substr(this._sectionStart);
-			this._index -= this._sectionStart;
-		}
-
-		this._sectionStart = 0;
-	}
+  this._cleanup();
 };
 
 Tokenizer.prototype.pause = function(){

From 0219e3ab4317679355ca7d3db0b896f4fac29c7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:07:39 +0200
Subject: [PATCH 373/450] [tokenizer] don't save the options object

---
 lib/Tokenizer.js | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index a43c99f..25d803c 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -76,12 +76,11 @@ function Tokenizer(options, cbs){
 	this._buffer = "";
 	this._sectionStart = 0;
 	this._index = 0;
-	this._options = options;
 	this._special = 0; // 1 for script, 2 for style
 	this._cbs = cbs;
 	this._running = true;
 	this._reconsume = false;
-	this._xmlMode = this._options && this._options.xmlMode;
+	this._xmlMode = !!(options && options.xmlMode);
 }
 
 Tokenizer.prototype._stateText = function (c) {
@@ -727,7 +726,7 @@ Tokenizer.prototype.end = function(chunk){
 };
 
 Tokenizer.prototype.reset = function(){
-	Tokenizer.call(this, this._options, this._cbs);
+	Tokenizer.call(this, {xmlMode: this._xmlMode}, this._cbs);
 };
 
 Tokenizer.prototype._emitToken = function(name){

From 2aae96f42c6436695f89456f4c0f5361461767d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:08:24 +0200
Subject: [PATCH 374/450] [tokenizer] use ternary expressions for simple states

---
 lib/Tokenizer.js | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 25d803c..0b06dad 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -282,8 +282,7 @@ Tokenizer.prototype._stateInComment = function (c) {
 };
 
 Tokenizer.prototype._stateAfterComment1 = function (c) {
-	if(c === "-") this._state = AFTER_COMMENT_2;
-	else this._state = IN_COMMENT;
+	this._state = c === "-" ? AFTER_COMMENT_2 : IN_COMMENT;
 };
 
 Tokenizer.prototype._stateAfterComment2 = function (c) {
@@ -299,28 +298,23 @@ Tokenizer.prototype._stateAfterComment2 = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeCdata1 = function (c) {
-	if(c === "C") this._state = BEFORE_CDATA_2;
-	else this._state = IN_DECLARATION;
+	this._state = c === "C" ? BEFORE_CDATA_2 : IN_DECLARATION;
 };
 
 Tokenizer.prototype._stateBeforeCdata2 = function (c) {
-	if(c === "D") this._state = BEFORE_CDATA_3;
-	else this._state = IN_DECLARATION;
+	this._state = c === "D" ? BEFORE_CDATA_3 : IN_DECLARATION;
 };
 
 Tokenizer.prototype._stateBeforeCdata3 = function (c) {
-	if(c === "A") this._state = BEFORE_CDATA_4;
-	else this._state = IN_DECLARATION;
+	this._state = c === "A" ? BEFORE_CDATA_4 : IN_DECLARATION;
 };
 
 Tokenizer.prototype._stateBeforeCdata4 = function (c) {
-	if(c === "T") this._state = BEFORE_CDATA_5;
-	else this._state = IN_DECLARATION;
+	this._state = c === "T" ? BEFORE_CDATA_5 : IN_DECLARATION;
 };
 
 Tokenizer.prototype._stateBeforeCdata5 = function (c) {
-	if(c === "A") this._state = BEFORE_CDATA_6;
-	else this._state = IN_DECLARATION;
+	this._state = c === "A" ? BEFORE_CDATA_6 : IN_DECLARATION;
 };
 
 Tokenizer.prototype._stateBeforeCdata6 = function (c) {
@@ -337,8 +331,7 @@ Tokenizer.prototype._stateInCdata = function (c) {
 };
 
 Tokenizer.prototype._stateAfterCdata1 = function (c) {
-	if(c === "]") this._state = AFTER_CDATA_2;
-	else this._state = IN_CDATA;
+	this._state = c === "]" ? AFTER_CDATA_2 : IN_CDATA;
 };
 
 Tokenizer.prototype._stateAfterCdata2 = function (c) {

From f6e21dd6d423fa00cca84b253f388b451503b8e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:09:21 +0200
Subject: [PATCH 375/450] [tokenizer] added variables for states of _special

---
 lib/Tokenizer.js | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 0b06dad..8608e31 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -64,7 +64,11 @@ var i = 0,
     AFTER_STYLE_1 = i++, //T
     AFTER_STYLE_2 = i++, //Y
     AFTER_STYLE_3 = i++, //L
-    AFTER_STYLE_4 = i++; //E
+    AFTER_STYLE_4 = i++, //E
+
+    SPECIAL_NONE = 0,
+    SPECIAL_SCRIPT = 1,
+    SPECIAL_STYLE = 2;
 
 
 function whitespace(c){
@@ -76,7 +80,7 @@ function Tokenizer(options, cbs){
 	this._buffer = "";
 	this._sectionStart = 0;
 	this._index = 0;
-	this._special = 0; // 1 for script, 2 for style
+	this._special = SPECIAL_NONE;
 	this._cbs = cbs;
 	this._running = true;
 	this._reconsume = false;
@@ -94,7 +98,7 @@ Tokenizer.prototype._stateText = function (c) {
 Tokenizer.prototype._stateBeforeTagName = function (c) {
 	if(c === "/"){
 		this._state = BEFORE_CLOSING_TAG_NAME;
-	} else if(c === ">" || this._special > 0 || whitespace(c)) {
+	} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
 		this._state = TEXT;
 	} else if(whitespace(c)) {
 		// skip
@@ -133,7 +137,7 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
 	if(whitespace(c));
 	else if(c === ">"){
 		this._state = TEXT;
-	} else if(this._special > 0){
+	} else if(this._special !== SPECIAL_NONE){
 		if(c === "s" || c === "S"){
 			this._state = BEFORE_SPECIAL_END;
 		} else {
@@ -151,11 +155,11 @@ Tokenizer.prototype._stateInCloseingTagName = function (c) {
 		this._emitToken("onclosetag");
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
-		this._special = 0;
+		this._special = SPECIAL_NONE;
 	} else if(whitespace(c)){
 		this._emitToken("onclosetag");
 		this._state = AFTER_CLOSING_TAG_NAME;
-		this._special = 0;
+		this._special = SPECIAL_NONE;
 	}
 };
 
@@ -358,9 +362,9 @@ Tokenizer.prototype._stateBeforeSpecial = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeSpecialEnd = function (c) {
-	if(this._special === 1 && (c === "c" || c === "C")){
+	if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){
 		this._state = AFTER_SCRIPT_1;
-	} else if(this._special === 2 && (c === "t" || c === "T")){
+	} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){
 		this._state = AFTER_STYLE_1;
 	}
 	else this._state = TEXT;
@@ -404,7 +408,7 @@ Tokenizer.prototype._stateBeforeScript4 = function (c) {
 
 Tokenizer.prototype._stateBeforeScript5 = function (c) {
 	if(c === "/" || c === ">" || whitespace(c)){
-		this._special = 1;
+		this._special = SPECIAL_SCRIPT;
 	}
 	this._state = IN_TAG_NAME;
 	this._reconsume = true; //consume the token again
@@ -468,7 +472,7 @@ Tokenizer.prototype._stateBeforeStyle3 = function (c) {
 
 Tokenizer.prototype._stateBeforeStyle4 = function (c) {
 	if(c === "/" || c === ">" || whitespace(c)){
-		this._special = 2;
+		this._special = SPECIAL_STYLE;
 	}
 	this._state = IN_TAG_NAME;
 	this._reconsume = true; //consume the token again

From f3fb8d71042bc0dd571bd3a1aadc745000823325 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:09:43 +0200
Subject: [PATCH 376/450] [tokenizer] fixed whitespace

---
 lib/Tokenizer.js | 87 ++++++++++++++++++++++++------------------------
 1 file changed, 43 insertions(+), 44 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 8608e31..83efe64 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -534,43 +534,43 @@ Tokenizer.prototype.write = function(chunk){
 	while(this._index < this._buffer.length && this._running){
 		var c = this._buffer.charAt(this._index);
 		if(this._state === TEXT) {
-      this._stateText(c);
-    } else if(this._state === BEFORE_TAG_NAME){
-      this._stateBeforeTagName(c);
+			this._stateText(c);
+		} else if(this._state === BEFORE_TAG_NAME){
+			this._stateBeforeTagName(c);
 		} else if(this._state === IN_TAG_NAME) {
-      this._stateInTagName(c);
+			this._stateInTagName(c);
 		} else if(this._state === BEFORE_CLOSING_TAG_NAME){
-      this._stateBeforeCloseingTagName(c);
+			this._stateBeforeCloseingTagName(c);
 		} else if(this._state === IN_CLOSING_TAG_NAME){
-      this._stateInCloseingTagName(c);
+			this._stateInCloseingTagName(c);
 		} else if(this._state === AFTER_CLOSING_TAG_NAME){
-      this._stateAfterCloseingTagName(c);
+			this._stateAfterCloseingTagName(c);
 		}
 
 		/*
 		*	attributes
 		*/
 		else if(this._state === BEFORE_ATTRIBUTE_NAME){
-      this._stateBeforeAttributeName(c);
+			this._stateBeforeAttributeName(c);
 		} else if(this._state === IN_ATTRIBUTE_NAME){
-      this._stateInAttributeName(c);
+			this._stateInAttributeName(c);
 		} else if(this._state === AFTER_ATTRIBUTE_NAME){
-      this._stateAfterAttributeName(c);
+			this._stateAfterAttributeName(c);
 		} else if(this._state === BEFORE_ATTRIBUTE_VALUE){
-      this._stateBeforeAttributeValue(c);
+			this._stateBeforeAttributeValue(c);
 		} else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){
-      this._stateInAttributeValueDoubleQuotes(c);
+			this._stateInAttributeValueDoubleQuotes(c);
 		} else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){
-      this._stateInAttributeValueSingleQuotes(c);
+			this._stateInAttributeValueSingleQuotes(c);
 		} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
-      this._stateInAttributeValueNoQuotes(c);
+			this._stateInAttributeValueNoQuotes(c);
 		}
 
 		/*
 		*	declarations
 		*/
 		else if(this._state === BEFORE_DECLARATION){
-      this._stateBeforeDeclaration(c);
+			this._stateBeforeDeclaration(c);
 		} else if(this._state === IN_DECLARATION){
 			this._stateInDeclaration(c);
 		}
@@ -586,13 +586,13 @@ Tokenizer.prototype.write = function(chunk){
 		*	comments
 		*/
 		else if(this._state === BEFORE_COMMENT){
-      this._stateBeforeComment(c);
+			this._stateBeforeComment(c);
 		} else if(this._state === IN_COMMENT){
 			this._stateInComment(c);
 		} else if(this._state === AFTER_COMMENT_1){
 			this._stateAfterComment1(c);
 		} else if(this._state === AFTER_COMMENT_2){
-      this._stateAfterComment2(c);
+			this._stateAfterComment2(c);
 		}
 
 		/*
@@ -615,7 +615,7 @@ Tokenizer.prototype.write = function(chunk){
 		} else if(this._state === AFTER_CDATA_1){
 			this._stateAfterCdata1(c);
 		} else if(this._state === AFTER_CDATA_2){
-      this._stateAfterCdata2(c);
+			this._stateAfterCdata2(c);
 		}
 
 		/*
@@ -624,72 +624,71 @@ Tokenizer.prototype.write = function(chunk){
 		else if(this._state === BEFORE_SPECIAL){
 			this._stateBeforeSpecial(c);
 		} else if(this._state === BEFORE_SPECIAL_END){
-      this._stateBeforeSpecialEnd(c);
+			this._stateBeforeSpecialEnd(c);
 		}
 
 		/*
 		* script
 		*/
 		else if(this._state === BEFORE_SCRIPT_1){
-      this._stateBeforeScript1(c);
+			this._stateBeforeScript1(c);
 		} else if(this._state === BEFORE_SCRIPT_2){
-      this._stateBeforeScript2(c);
+			this._stateBeforeScript2(c);
 		} else if(this._state === BEFORE_SCRIPT_3){
-      this._stateBeforeScript3(c);
+			this._stateBeforeScript3(c);
 		} else if(this._state === BEFORE_SCRIPT_4){
-      this._stateBeforeScript4(c);
+			this._stateBeforeScript4(c);
 		} else if(this._state === BEFORE_SCRIPT_5){
-      this._stateBeforeScript5(c);
+			this._stateBeforeScript5(c);
 		}
 
 		else if(this._state === AFTER_SCRIPT_1){
-      this._stateAfterScript1(c);
+			this._stateAfterScript1(c);
 		} else if(this._state === AFTER_SCRIPT_2){
-      this._stateAfterScript2(c);
+			this._stateAfterScript2(c);
 		} else if(this._state === AFTER_SCRIPT_3){
-      this._stateAfterScript3(c);
+			this._stateAfterScript3(c);
 		} else if(this._state === AFTER_SCRIPT_4){
-      this._stateAfterScript4(c);
+			this._stateAfterScript4(c);
 		} else if(this._state === AFTER_SCRIPT_5){
-      this._stateAfterScript5(c);
+			this._stateAfterScript5(c);
 		}
 
 		/*
 		* style
 		*/
 		else if(this._state === BEFORE_STYLE_1){
-      this._stateBeforeStyle1(c);
+			this._stateBeforeStyle1(c);
 		} else if(this._state === BEFORE_STYLE_2){
-      this._stateBeforeStyle2(c);
+			this._stateBeforeStyle2(c);
 		} else if(this._state === BEFORE_STYLE_3){
-      this._stateBeforeStyle3(c);
+			this._stateBeforeStyle3(c);
 		} else if(this._state === BEFORE_STYLE_4){
-      this._stateBeforeStyle4(c);
+			this._stateBeforeStyle4(c);
 		}
 
 		else if(this._state === AFTER_STYLE_1){
-      this._stateAfterStyle1(c);
+			this._stateAfterStyle1(c);
 		} else if(this._state === AFTER_STYLE_2){
-      this._stateAfterStyle2(c);
+			this._stateAfterStyle2(c);
 		} else if(this._state === AFTER_STYLE_3){
-      this._stateAfterStyle3(c);
+			this._stateAfterStyle3(c);
 		} else if(this._state === AFTER_STYLE_4){
-      this._stateAfterStyle4(c);
+			this._stateAfterStyle4(c);
 		}
 
 		else {
 			this._cbs.onerror(Error("unknown _state"), this._state);
 		}
 
-    if (this._reconsume) {
-      this._reconsume = false;
-    } else {
-      this._index++;
-    }
+		if (this._reconsume) {
+    		this._reconsume = false;
+		} else {
+			this._index++;
+		}
 	}
 
-	//cleanup
-  this._cleanup();
+	this._cleanup();
 };
 
 Tokenizer.prototype.pause = function(){

From bf0eaa4620c8b27b4dd1b45a56fed50056657c2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:10:27 +0200
Subject: [PATCH 377/450] [tokenizer] more ternaries

---
 lib/Tokenizer.js | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 83efe64..9cb0969 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -233,8 +233,8 @@ Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
 
 Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
 	if(c === "'"){
-		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._emitToken("onattribvalue");
+		this._state = BEFORE_ATTRIBUTE_NAME;
 	}
 };
 
@@ -251,9 +251,9 @@ Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeDeclaration = function (c) {
-	if(c === "[") this._state = BEFORE_CDATA_1;
-	else if(c === "-") this._state = BEFORE_COMMENT;
-	else this._state = IN_DECLARATION;
+	this._state = c === "[" ? BEFORE_CDATA_1 :
+					c === "-" ? BEFORE_COMMENT :
+						IN_DECLARATION;
 };
 
 Tokenizer.prototype._stateInDeclaration = function (c) {
@@ -415,23 +415,19 @@ Tokenizer.prototype._stateBeforeScript5 = function (c) {
 };
 
 Tokenizer.prototype._stateAfterScript1 = function (c) {
-	if(c === "r" || c === "R") this._state = AFTER_SCRIPT_2;
-	else this._state = TEXT;
+	this._state = (c === "r" || c === "R") ? AFTER_SCRIPT_2 : TEXT;
 };
 
 Tokenizer.prototype._stateAfterScript2 = function (c) {
-	if(c === "i" || c === "I") this._state = AFTER_SCRIPT_3;
-	else this._state = TEXT;
+	this._state = (c === "i" || c === "I") ? AFTER_SCRIPT_3 : TEXT;
 };
 
 Tokenizer.prototype._stateAfterScript3 = function (c) {
-	if(c === "p" || c === "P") this._state = AFTER_SCRIPT_4;
-	else this._state = TEXT;
+	this._state = (c === "p" || c === "P") ? AFTER_SCRIPT_4 : TEXT;
 };
 
 Tokenizer.prototype._stateAfterScript4 = function (c) {
-	if(c === "t" || c === "T") this._state = AFTER_SCRIPT_5;
-	else this._state = TEXT;
+	this._state = (c === "t" || c === "T") ? AFTER_SCRIPT_5 : TEXT;
 };
 
 Tokenizer.prototype._stateAfterScript5 = function (c) {
@@ -479,13 +475,11 @@ Tokenizer.prototype._stateBeforeStyle4 = function (c) {
 };
 
 Tokenizer.prototype._stateAfterStyle1 = function (c) {
-	if(c === "y" || c === "Y") this._state = AFTER_STYLE_2;
-	else this._state = TEXT;
+	this._state = (c === "y" || c === "Y") ? AFTER_STYLE_2 : TEXT;
 };
 
 Tokenizer.prototype._stateAfterStyle2 = function (c) {
-	if(c === "l" || c === "L") this._state = AFTER_STYLE_3;
-	else this._state = TEXT;
+	this._state = (c === "l" || c === "L") ? AFTER_STYLE_3 : TEXT;
 };
 
 Tokenizer.prototype._stateAfterStyle3 = function (c) {

From 57eb9859a747c01e31cb3778fc18b6b81a1dcde4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:11:06 +0200
Subject: [PATCH 378/450] [tokenizer] simplified _cleanup a bit

---
 lib/Tokenizer.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 9cb0969..76c0132 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -497,7 +497,7 @@ Tokenizer.prototype._stateAfterStyle4 = function (c) {
 };
 
 Tokenizer.prototype._cleanup = function () {
-  if(this._sectionStart === -1){
+	if(this._sectionStart < 0){
 		this._buffer = "";
 		this._index = 0;
 	} else {
@@ -511,7 +511,7 @@ Tokenizer.prototype._cleanup = function () {
 			//the section just started
 			this._buffer = "";
 			this._index = 0;
-		} else if(this._sectionStart > 0){
+		} else {
 			//remove everything unnecessary
 			this._buffer = this._buffer.substr(this._sectionStart);
 			this._index -= this._sectionStart;

From 917ecf05fce44e39cc330e5012d7530b396e0097 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:12:30 +0200
Subject: [PATCH 379/450] [tokenizer] united some branches

---
 lib/Tokenizer.js | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 76c0132..e105c46 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -100,19 +100,15 @@ Tokenizer.prototype._stateBeforeTagName = function (c) {
 		this._state = BEFORE_CLOSING_TAG_NAME;
 	} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
 		this._state = TEXT;
-	} else if(whitespace(c)) {
-		// skip
 	} else if(c === "!"){
 		this._state = BEFORE_DECLARATION;
 		this._sectionStart = this._index + 1;
 	} else if(c === "?"){
 		this._state = IN_PROCESSING_INSTRUCTION;
 		this._sectionStart = this._index + 1;
-	} else if(!this._xmlMode && (c === "s" || c === "S")){
-		this._state = BEFORE_SPECIAL;
-		this._sectionStart = this._index;
 	} else {
-		this._state = IN_TAG_NAME;
+		this._state = (!this._xmlMode && (c === "s" || c === "S")) ?
+						BEFORE_SPECIAL : IN_TAG_NAME;
 		this._sectionStart = this._index;
 	}
 };
@@ -186,15 +182,9 @@ Tokenizer.prototype._stateBeforeAttributeName = function (c) {
 };
 
 Tokenizer.prototype._stateInAttributeName = function (c) {
-	if(c === "="){
-		this._emitIfToken("onattribname");
-		this._state = BEFORE_ATTRIBUTE_VALUE;
-	} else if(whitespace(c)){
+	if(c === "=" || c === "/" || c === ">" || whitespace(c)){
 		this._emitIfToken("onattribname");
 		this._state = AFTER_ATTRIBUTE_NAME;
-	} else if(c === "/" || c === ">"){
-		this._emitIfToken("onattribname");
-		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._reconsume = true;
 	}
 };

From 7f9082c81a483b37462f7d6a3224ed3e8839790e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:14:12 +0200
Subject: [PATCH 380/450] [tokenizer] get rid of _reconsume

use _index-- instead
---
 lib/Tokenizer.js | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index e105c46..85c147a 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -83,7 +83,6 @@ function Tokenizer(options, cbs){
 	this._special = SPECIAL_NONE;
 	this._cbs = cbs;
 	this._running = true;
-	this._reconsume = false;
 	this._xmlMode = !!(options && options.xmlMode);
 }
 
@@ -138,7 +137,7 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
 			this._state = BEFORE_SPECIAL_END;
 		} else {
 			this._state = TEXT;
-			this._reconsume = true;
+			this._index--;
 		}
 	} else {
 		this._state = IN_CLOSING_TAG_NAME;
@@ -185,7 +184,7 @@ Tokenizer.prototype._stateInAttributeName = function (c) {
 	if(c === "=" || c === "/" || c === ">" || whitespace(c)){
 		this._emitIfToken("onattribname");
 		this._state = AFTER_ATTRIBUTE_NAME;
-		this._reconsume = true;
+		this._index--;
 	}
 };
 
@@ -194,7 +193,7 @@ Tokenizer.prototype._stateAfterAttributeName = function (c) {
 		this._state = BEFORE_ATTRIBUTE_VALUE;
 	} else if(c === "/" || c === ">"){
 		this._state = BEFORE_ATTRIBUTE_NAME;
-		this._reconsume = true;
+		this._index--;
 	} else if(!whitespace(c)){
 		this._state = IN_ATTRIBUTE_NAME;
 		this._sectionStart = this._index;
@@ -347,7 +346,7 @@ Tokenizer.prototype._stateBeforeSpecial = function (c) {
 		this._state = BEFORE_STYLE_1;
 	} else {
 		this._state = IN_TAG_NAME;
-		this._reconsume = true; //consume the token again
+		this._index--; //consume the token again
 	}
 };
 
@@ -365,7 +364,7 @@ Tokenizer.prototype._stateBeforeScript1 = function (c) {
 		this._state = BEFORE_SCRIPT_2;
 	} else {
 		this._state = IN_TAG_NAME;
-		this._reconsume = true; //consume the token again
+		this._index--; //consume the token again
 	}
 };
 
@@ -374,7 +373,7 @@ Tokenizer.prototype._stateBeforeScript2 = function (c) {
 		this._state = BEFORE_SCRIPT_3;
 	} else {
 		this._state = IN_TAG_NAME;
-		this._reconsume = true; //consume the token again
+		this._index--; //consume the token again
 	}
 };
 
@@ -383,7 +382,7 @@ Tokenizer.prototype._stateBeforeScript3 = function (c) {
 		this._state = BEFORE_SCRIPT_4;
 	} else {
 		this._state = IN_TAG_NAME;
-		this._reconsume = true; //consume the token again
+		this._index--; //consume the token again
 	}
 };
 
@@ -392,7 +391,7 @@ Tokenizer.prototype._stateBeforeScript4 = function (c) {
 		this._state = BEFORE_SCRIPT_5;
 	} else {
 		this._state = IN_TAG_NAME;
-		this._reconsume = true; //consume the token again
+		this._index--; //consume the token again
 	}
 };
 
@@ -401,7 +400,7 @@ Tokenizer.prototype._stateBeforeScript5 = function (c) {
 		this._special = SPECIAL_SCRIPT;
 	}
 	this._state = IN_TAG_NAME;
-	this._reconsume = true; //consume the token again
+	this._index--; //consume the token again
 };
 
 Tokenizer.prototype._stateAfterScript1 = function (c) {
@@ -424,7 +423,7 @@ Tokenizer.prototype._stateAfterScript5 = function (c) {
 	if(c === ">" || whitespace(c)){
 		this._state = IN_CLOSING_TAG_NAME;
 		this._sectionStart = this._index - 6;
-		this._reconsume = true; //reconsume the token
+		this._index--; //reconsume the token
 	}
 	else this._state = TEXT;
 };
@@ -434,7 +433,7 @@ Tokenizer.prototype._stateBeforeStyle1 = function (c) {
 		this._state = BEFORE_STYLE_2;
 	} else {
 		this._state = IN_TAG_NAME;
-		this._reconsume = true; //consume the token again
+		this._index--; //consume the token again
 	}
 };
 
@@ -443,7 +442,7 @@ Tokenizer.prototype._stateBeforeStyle2 = function (c) {
 		this._state = BEFORE_STYLE_3;
 	} else {
 		this._state = IN_TAG_NAME;
-		this._reconsume = true; //consume the token again
+		this._index--; //consume the token again
 	}
 };
 
@@ -452,7 +451,7 @@ Tokenizer.prototype._stateBeforeStyle3 = function (c) {
 		this._state = BEFORE_STYLE_4;
 	} else {
 		this._state = IN_TAG_NAME;
-		this._reconsume = true; //consume the token again
+		this._index--; //consume the token again
 	}
 };
 
@@ -461,7 +460,7 @@ Tokenizer.prototype._stateBeforeStyle4 = function (c) {
 		this._special = SPECIAL_STYLE;
 	}
 	this._state = IN_TAG_NAME;
-	this._reconsume = true; //consume the token again
+	this._index--; //consume the token again
 };
 
 Tokenizer.prototype._stateAfterStyle1 = function (c) {
@@ -481,7 +480,7 @@ Tokenizer.prototype._stateAfterStyle4 = function (c) {
 	if(c === ">" || whitespace(c)){
 		this._state = IN_CLOSING_TAG_NAME;
 		this._sectionStart = this._index - 5;
-		this._reconsume = true; //reconsume the token
+		this._index--; //reconsume the token
 	}
 	else this._state = TEXT;
 };
@@ -665,11 +664,7 @@ Tokenizer.prototype.write = function(chunk){
 			this._cbs.onerror(Error("unknown _state"), this._state);
 		}
 
-		if (this._reconsume) {
-    		this._reconsume = false;
-		} else {
-			this._index++;
-		}
+		this._index++;
 	}
 
 	this._cleanup();

From 4bc1ec47cf4a31ce5ebe2f5628f3dcf9928a4cc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 12:20:14 +0200
Subject: [PATCH 381/450] [tokenizer] even more ternaries

---
 lib/Tokenizer.js | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 85c147a..ca3c8d3 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -351,12 +351,9 @@ Tokenizer.prototype._stateBeforeSpecial = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeSpecialEnd = function (c) {
-	if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){
-		this._state = AFTER_SCRIPT_1;
-	} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){
-		this._state = AFTER_STYLE_1;
-	}
-	else this._state = TEXT;
+	this._state = (this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")) ?
+		AFTER_SCRIPT_1 : (this._special === SPECIAL_STYLE && (c === "t" || c === "T")) ?
+			AFTER_STYLE_1 : TEXT;
 };
 
 Tokenizer.prototype._stateBeforeScript1 = function (c) {
@@ -396,10 +393,7 @@ Tokenizer.prototype._stateBeforeScript4 = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeScript5 = function (c) {
-	if(c === "/" || c === ">" || whitespace(c)){
-		this._special = SPECIAL_SCRIPT;
-	}
-	this._state = IN_TAG_NAME;
+	this._state = (c === "/" || c === ">" || whitespace(c)) ? SPECIAL_SCRIPT : IN_TAG_NAME;
 	this._index--; //consume the token again
 };
 
@@ -456,10 +450,7 @@ Tokenizer.prototype._stateBeforeStyle3 = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeStyle4 = function (c) {
-	if(c === "/" || c === ">" || whitespace(c)){
-		this._special = SPECIAL_STYLE;
-	}
-	this._state = IN_TAG_NAME;
+	this._state = (c === "/" || c === ">" || whitespace(c)) ? SPECIAL_STYLE : IN_TAG_NAME;
 	this._index--; //consume the token again
 };
 

From 24bbf86e438791a50ed01bfbbc77cfa0c0353d33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 15:48:18 +0200
Subject: [PATCH 382/450] [tokenizer] added abstractions for common state
 types, fixed previous regression

---
 lib/Tokenizer.js | 180 ++++++++++++++++-------------------------------
 1 file changed, 62 insertions(+), 118 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index ca3c8d3..9dc1a90 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -75,6 +75,33 @@ function whitespace(c){
 	return c === " " || c === "\n" || c === "\t" || c === "\f";
 }
 
+function ifElseState(upper, SUCCESS, FAILURE){
+	var lower = upper.toLowerCase();
+
+	if(upper === lower){
+		return function(c){
+			this._state = c === lower ? SUCCESS : FAILURE;
+		};
+	} else {
+		return function(c){
+			this._state = (c === lower || c === upper) ? SUCCESS : FAILURE;
+		};
+	}
+}
+
+function consumeSpecialNameChar(upper, NEXT_STATE){
+	var lower = upper.toLowerCase();
+
+	return function(c){
+		if(c === lower || c === upper){
+			this._state = NEXT_STATE;
+		} else {
+			this._state = IN_TAG_NAME;
+			this._index--; //consume the token again
+		}
+	};
+}
+
 function Tokenizer(options, cbs){
 	this._state = TEXT;
 	this._buffer = "";
@@ -274,9 +301,7 @@ Tokenizer.prototype._stateInComment = function (c) {
 	if(c === "-") this._state = AFTER_COMMENT_1;
 };
 
-Tokenizer.prototype._stateAfterComment1 = function (c) {
-	this._state = c === "-" ? AFTER_COMMENT_2 : IN_COMMENT;
-};
+Tokenizer.prototype._stateAfterComment1 = ifElseState("-", AFTER_COMMENT_2, IN_COMMENT);
 
 Tokenizer.prototype._stateAfterComment2 = function (c) {
 	if(c === ">"){
@@ -290,25 +315,11 @@ Tokenizer.prototype._stateAfterComment2 = function (c) {
 	// else: stay in AFTER_COMMENT_2 (`--->`)
 };
 
-Tokenizer.prototype._stateBeforeCdata1 = function (c) {
-	this._state = c === "C" ? BEFORE_CDATA_2 : IN_DECLARATION;
-};
-
-Tokenizer.prototype._stateBeforeCdata2 = function (c) {
-	this._state = c === "D" ? BEFORE_CDATA_3 : IN_DECLARATION;
-};
-
-Tokenizer.prototype._stateBeforeCdata3 = function (c) {
-	this._state = c === "A" ? BEFORE_CDATA_4 : IN_DECLARATION;
-};
-
-Tokenizer.prototype._stateBeforeCdata4 = function (c) {
-	this._state = c === "T" ? BEFORE_CDATA_5 : IN_DECLARATION;
-};
-
-Tokenizer.prototype._stateBeforeCdata5 = function (c) {
-	this._state = c === "A" ? BEFORE_CDATA_6 : IN_DECLARATION;
-};
+Tokenizer.prototype._stateBeforeCdata1 = ifElseState("C", BEFORE_CDATA_2, IN_DECLARATION);
+Tokenizer.prototype._stateBeforeCdata2 = ifElseState("D", BEFORE_CDATA_3, IN_DECLARATION);
+Tokenizer.prototype._stateBeforeCdata3 = ifElseState("A", BEFORE_CDATA_4, IN_DECLARATION);
+Tokenizer.prototype._stateBeforeCdata4 = ifElseState("T", BEFORE_CDATA_5, IN_DECLARATION);
+Tokenizer.prototype._stateBeforeCdata5 = ifElseState("A", BEFORE_CDATA_6, IN_DECLARATION);
 
 Tokenizer.prototype._stateBeforeCdata6 = function (c) {
 	if(c === "["){
@@ -323,9 +334,7 @@ Tokenizer.prototype._stateInCdata = function (c) {
 	if(c === "]") this._state = AFTER_CDATA_1;
 };
 
-Tokenizer.prototype._stateAfterCdata1 = function (c) {
-	this._state = c === "]" ? AFTER_CDATA_2 : IN_CDATA;
-};
+Tokenizer.prototype._stateAfterCdata1 = ifElseState("]", AFTER_CDATA_2, IN_CDATA);
 
 Tokenizer.prototype._stateAfterCdata2 = function (c) {
 	if(c === ">"){
@@ -340,7 +349,7 @@ Tokenizer.prototype._stateAfterCdata2 = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeSpecial = function (c) {
-  if(c === "c" || c === "C"){
+	if(c === "c" || c === "C"){
 		this._state = BEFORE_SCRIPT_1;
 	} else if(c === "t" || c === "T"){
 		this._state = BEFORE_STYLE_1;
@@ -351,67 +360,31 @@ Tokenizer.prototype._stateBeforeSpecial = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeSpecialEnd = function (c) {
-	this._state = (this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")) ?
-		AFTER_SCRIPT_1 : (this._special === SPECIAL_STYLE && (c === "t" || c === "T")) ?
-			AFTER_STYLE_1 : TEXT;
-};
-
-Tokenizer.prototype._stateBeforeScript1 = function (c) {
-	if(c === "r" || c === "R"){
-		this._state = BEFORE_SCRIPT_2;
-	} else {
-		this._state = IN_TAG_NAME;
-		this._index--; //consume the token again
-	}
-};
-
-Tokenizer.prototype._stateBeforeScript2 = function (c) {
-	if(c === "i" || c === "I"){
-		this._state = BEFORE_SCRIPT_3;
-	} else {
-		this._state = IN_TAG_NAME;
-		this._index--; //consume the token again
-	}
-};
-
-Tokenizer.prototype._stateBeforeScript3 = function (c) {
-	if(c === "p" || c === "P"){
-		this._state = BEFORE_SCRIPT_4;
-	} else {
-		this._state = IN_TAG_NAME;
-		this._index--; //consume the token again
+	if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){
+		this._state = AFTER_SCRIPT_1;
+	} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){
+		this._state = AFTER_STYLE_1;
 	}
+	else this._state = TEXT;
 };
 
-Tokenizer.prototype._stateBeforeScript4 = function (c) {
-	if(c === "t" || c === "T"){
-		this._state = BEFORE_SCRIPT_5;
-	} else {
-		this._state = IN_TAG_NAME;
-		this._index--; //consume the token again
-	}
-};
+Tokenizer.prototype._stateBeforeScript1 = consumeSpecialNameChar("R", BEFORE_SCRIPT_2);
+Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar("I", BEFORE_SCRIPT_3);
+Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar("P", BEFORE_SCRIPT_4);
+Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("T", BEFORE_SCRIPT_5);
 
 Tokenizer.prototype._stateBeforeScript5 = function (c) {
-	this._state = (c === "/" || c === ">" || whitespace(c)) ? SPECIAL_SCRIPT : IN_TAG_NAME;
+	if(c === "/" || c === ">" || whitespace(c)){
+		this._special = SPECIAL_SCRIPT;
+	}
+	this._state = IN_TAG_NAME;
 	this._index--; //consume the token again
 };
 
-Tokenizer.prototype._stateAfterScript1 = function (c) {
-	this._state = (c === "r" || c === "R") ? AFTER_SCRIPT_2 : TEXT;
-};
-
-Tokenizer.prototype._stateAfterScript2 = function (c) {
-	this._state = (c === "i" || c === "I") ? AFTER_SCRIPT_3 : TEXT;
-};
-
-Tokenizer.prototype._stateAfterScript3 = function (c) {
-	this._state = (c === "p" || c === "P") ? AFTER_SCRIPT_4 : TEXT;
-};
-
-Tokenizer.prototype._stateAfterScript4 = function (c) {
-	this._state = (c === "t" || c === "T") ? AFTER_SCRIPT_5 : TEXT;
-};
+Tokenizer.prototype._stateAfterScript1 = ifElseState("R", AFTER_SCRIPT_2, TEXT);
+Tokenizer.prototype._stateAfterScript2 = ifElseState("I", AFTER_SCRIPT_3, TEXT);
+Tokenizer.prototype._stateAfterScript3 = ifElseState("P", AFTER_SCRIPT_4, TEXT);
+Tokenizer.prototype._stateAfterScript4 = ifElseState("T", AFTER_SCRIPT_5, TEXT);
 
 Tokenizer.prototype._stateAfterScript5 = function (c) {
 	if(c === ">" || whitespace(c)){
@@ -422,50 +395,21 @@ Tokenizer.prototype._stateAfterScript5 = function (c) {
 	else this._state = TEXT;
 };
 
-Tokenizer.prototype._stateBeforeStyle1 = function (c) {
-	if(c === "y" || c === "Y"){
-		this._state = BEFORE_STYLE_2;
-	} else {
-		this._state = IN_TAG_NAME;
-		this._index--; //consume the token again
-	}
-};
-
-Tokenizer.prototype._stateBeforeStyle2 = function (c) {
-	if(c === "l" || c === "L"){
-		this._state = BEFORE_STYLE_3;
-	} else {
-		this._state = IN_TAG_NAME;
-		this._index--; //consume the token again
-	}
-};
-
-Tokenizer.prototype._stateBeforeStyle3 = function (c) {
-	if(c === "e" || c === "E"){
-		this._state = BEFORE_STYLE_4;
-	} else {
-		this._state = IN_TAG_NAME;
-		this._index--; //consume the token again
-	}
-};
+Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar("Y", BEFORE_STYLE_2);
+Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar("L", BEFORE_STYLE_3);
+Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("E", BEFORE_STYLE_4);
 
 Tokenizer.prototype._stateBeforeStyle4 = function (c) {
-	this._state = (c === "/" || c === ">" || whitespace(c)) ? SPECIAL_STYLE : IN_TAG_NAME;
+	if(c === "/" || c === ">" || whitespace(c)){
+		this._special = SPECIAL_STYLE;
+	}
+	this._state = IN_TAG_NAME;
 	this._index--; //consume the token again
 };
 
-Tokenizer.prototype._stateAfterStyle1 = function (c) {
-	this._state = (c === "y" || c === "Y") ? AFTER_STYLE_2 : TEXT;
-};
-
-Tokenizer.prototype._stateAfterStyle2 = function (c) {
-	this._state = (c === "l" || c === "L") ? AFTER_STYLE_3 : TEXT;
-};
-
-Tokenizer.prototype._stateAfterStyle3 = function (c) {
-	if(c === "e" || c === "E") this._state = AFTER_STYLE_4;
-	else this._state = TEXT;
-};
+Tokenizer.prototype._stateAfterStyle1 = ifElseState("Y", AFTER_STYLE_2, TEXT);
+Tokenizer.prototype._stateAfterStyle2 = ifElseState("L", AFTER_STYLE_3, TEXT);
+Tokenizer.prototype._stateAfterStyle3 = ifElseState("E", AFTER_STYLE_4, TEXT);
 
 Tokenizer.prototype._stateAfterStyle4 = function (c) {
 	if(c === ">" || whitespace(c)){

From ce87df1d308b211dcd9aad76eb59557513d0c069 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 16:37:11 +0200
Subject: [PATCH 383/450] [tokenizer] added _getSection, completely inlined
 _emitIfToken, partly inlined _emitToken

---
 lib/Tokenizer.js | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 9dc1a90..655216b 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -115,7 +115,9 @@ function Tokenizer(options, cbs){
 
 Tokenizer.prototype._stateText = function (c) {
 	if(c === "<"){
-		this._emitIfToken("ontext");
+		if(this._index > this._sectionStart){
+			this._cbs.ontext(this._getSection());
+		}
 		this._state = BEFORE_TAG_NAME;
 		this._sectionStart = this._index;
 	}
@@ -145,7 +147,7 @@ Tokenizer.prototype._stateInTagName = function (c) {
 		this._cbs.onselfclosingtag();
 		this._state = AFTER_CLOSING_TAG_NAME;
 	} else if(c === ">"){
-		this._emitToken("onopentagname");
+		this._cbs.onopentagname(this._getSection());
 		this._cbs.onopentagend();
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
@@ -174,7 +176,7 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
 
 Tokenizer.prototype._stateInCloseingTagName = function (c) {
 	if(c === ">"){
-		this._emitToken("onclosetag");
+		this._cbs.onclosetag(this._getSection());
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
 		this._special = SPECIAL_NONE;
@@ -209,7 +211,10 @@ Tokenizer.prototype._stateBeforeAttributeName = function (c) {
 
 Tokenizer.prototype._stateInAttributeName = function (c) {
 	if(c === "=" || c === "/" || c === ">" || whitespace(c)){
-		this._emitIfToken("onattribname");
+		if(this._index > this._sectionStart){
+			this._cbs.onattribname(this._getSection());
+		}
+		this._sectionStart = -1;
 		this._state = AFTER_ATTRIBUTE_NAME;
 		this._index--;
 	}
@@ -256,7 +261,7 @@ Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
 
 Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
 	if(c === ">"){
-		this._emitToken("onattribvalue");
+		this._cbs.onattribvalue(this._getSection());
 		this._state = TEXT;
 		this._cbs.onopentagend();
 		this._sectionStart = this._index + 1;
@@ -274,7 +279,7 @@ Tokenizer.prototype._stateBeforeDeclaration = function (c) {
 
 Tokenizer.prototype._stateInDeclaration = function (c) {
 	if(c === ">"){
-		this._emitToken("ondeclaration");
+		this._cbs.ondeclaration(this._getSection());
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
 	}
@@ -282,7 +287,7 @@ Tokenizer.prototype._stateInDeclaration = function (c) {
 
 Tokenizer.prototype._stateInProcessingInstruction = function (c) {
 	if(c === ">"){
-		this._emitToken("onprocessinginstruction");
+		this._cbs.onprocessinginstruction(this._getSection());
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
 	}
@@ -639,14 +644,11 @@ Tokenizer.prototype.reset = function(){
 	Tokenizer.call(this, {xmlMode: this._xmlMode}, this._cbs);
 };
 
-Tokenizer.prototype._emitToken = function(name){
-	this._cbs[name](this._buffer.substring(this._sectionStart, this._index));
-	this._sectionStart = -1;
+Tokenizer.prototype._getSection = function(){
+	return this._buffer.substring(this._sectionStart, this._index);
 };
 
-Tokenizer.prototype._emitIfToken = function(name){
-	if(this._index > this._sectionStart){
-		this._cbs[name](this._buffer.substring(this._sectionStart, this._index));
-	}
+Tokenizer.prototype._emitToken = function(name){
+	this._cbs[name](this._getSection());
 	this._sectionStart = -1;
 };

From 607c81a77f9fee32f774a6c5cf44a5ccb95d3892 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 16:45:55 +0200
Subject: [PATCH 384/450] [tokenizer] simplified _stateInTagName

---
 lib/Tokenizer.js | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 655216b..2b7fa40 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -142,18 +142,10 @@ Tokenizer.prototype._stateBeforeTagName = function (c) {
 };
 
 Tokenizer.prototype._stateInTagName = function (c) {
-	if(c === "/"){
-		this._emitToken("onopentagname");
-		this._cbs.onselfclosingtag();
-		this._state = AFTER_CLOSING_TAG_NAME;
-	} else if(c === ">"){
-		this._cbs.onopentagname(this._getSection());
-		this._cbs.onopentagend();
-		this._state = TEXT;
-		this._sectionStart = this._index + 1;
-	} else if(whitespace(c)){
+	if(c === "/" || c === ">" || whitespace(c)){
 		this._emitToken("onopentagname");
 		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._index--;
 	}
 };
 

From 5b8955a50437a7a667ef65d5ad36d87e713ee45e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 16:53:06 +0200
Subject: [PATCH 385/450] [tokenizer] simplified
 _stateInAttributeValueNoQuotes, reordered _stateBeforeAttributeName

---
 lib/Tokenizer.js | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 2b7fa40..d5b905d 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -167,15 +167,11 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
 };
 
 Tokenizer.prototype._stateInCloseingTagName = function (c) {
-	if(c === ">"){
-		this._cbs.onclosetag(this._getSection());
-		this._state = TEXT;
-		this._sectionStart = this._index + 1;
-		this._special = SPECIAL_NONE;
-	} else if(whitespace(c)){
+	if(c === ">" || whitespace(c)){
 		this._emitToken("onclosetag");
 		this._state = AFTER_CLOSING_TAG_NAME;
 		this._special = SPECIAL_NONE;
+		this._index--;
 	}
 };
 
@@ -188,14 +184,16 @@ Tokenizer.prototype._stateAfterCloseingTagName = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeAttributeName = function (c) {
-	if(c === ">"){
+	if(whitespace(c)){
+		/* noop */
+	} else if(c === ">"){
 		this._state = TEXT;
 		this._cbs.onopentagend();
 		this._sectionStart = this._index + 1;
 	} else if(c === "/"){
 		this._cbs.onselfclosingtag();
 		this._state = AFTER_CLOSING_TAG_NAME;
-	} else if(!whitespace(c)){
+	} else {
 		this._state = IN_ATTRIBUTE_NAME;
 		this._sectionStart = this._index;
 	}
@@ -252,14 +250,10 @@ Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
 };
 
 Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
-	if(c === ">"){
-		this._cbs.onattribvalue(this._getSection());
-		this._state = TEXT;
-		this._cbs.onopentagend();
-		this._sectionStart = this._index + 1;
-	} else if(whitespace(c)){
+	if(whitespace(c) || c === ">"){
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._index--;
 	}
 };
 

From bd63b0b1a6e326e1017b36b866b269aa880b8020 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 17:01:54 +0200
Subject: [PATCH 386/450] 3.1.6

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 0be5a0c..b886f6a 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.1.5",
+	"version": "3.1.6",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 4589ecdeb43a592994f45a078f1855b1f0de8bbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 17:38:15 +0200
Subject: [PATCH 387/450] [tests] added test for second occurance of same
 attribute

fixes #42
---
 tests/Events/16-double_attribs.json | 45 +++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)
 create mode 100644 tests/Events/16-double_attribs.json

diff --git a/tests/Events/16-double_attribs.json b/tests/Events/16-double_attribs.json
new file mode 100644
index 0000000..bed1d8f
--- /dev/null
+++ b/tests/Events/16-double_attribs.json
@@ -0,0 +1,45 @@
+{
+  "name": "double attribute",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<h1 class=test class=boo></h1>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "h1"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "test"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "class",
+        "boo"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "h1",
+        {
+          "class": "test"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "h1"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 9eea8988f7ea5b69a7dc433e326ad80fe5c33522 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 20:08:45 +0200
Subject: [PATCH 388/450] [tokenizer] started adding support for HTML entities

TODO: so far, only numeric entities are decoded
---
 lib/Tokenizer.js | 142 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 131 insertions(+), 11 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index d5b905d..974ca62 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -51,6 +51,7 @@ var i = 0,
     BEFORE_SCRIPT_3 = i++, //I
     BEFORE_SCRIPT_4 = i++, //P
     BEFORE_SCRIPT_5 = i++, //T
+    IN_SCRIPT = i++,
     AFTER_SCRIPT_1 = i++, //C
     AFTER_SCRIPT_2 = i++, //R
     AFTER_SCRIPT_3 = i++, //I
@@ -61,14 +62,17 @@ var i = 0,
     BEFORE_STYLE_2 = i++, //Y
     BEFORE_STYLE_3 = i++, //L
     BEFORE_STYLE_4 = i++, //E
+    IN_STYLE = i++,
     AFTER_STYLE_1 = i++, //T
     AFTER_STYLE_2 = i++, //Y
     AFTER_STYLE_3 = i++, //L
     AFTER_STYLE_4 = i++, //E
 
-    SPECIAL_NONE = 0,
-    SPECIAL_SCRIPT = 1,
-    SPECIAL_STYLE = 2;
+    BEFORE_ENTITY = i++,
+    BEFORE_NUMERIC_ENTITY = i++,
+    IN_NAMED_ENTITY = i++,
+    IN_NUMERIC_ENTITY = i++,
+    IN_HEX_ENTITY = i++;
 
 
 function whitespace(c){
@@ -107,10 +111,11 @@ function Tokenizer(options, cbs){
 	this._buffer = "";
 	this._sectionStart = 0;
 	this._index = 0;
-	this._special = SPECIAL_NONE;
+	this._baseState = TEXT;
 	this._cbs = cbs;
 	this._running = true;
 	this._xmlMode = !!(options && options.xmlMode);
+	this._decodeEntities = !!(options && options.decodeEntities);
 }
 
 Tokenizer.prototype._stateText = function (c) {
@@ -120,13 +125,19 @@ Tokenizer.prototype._stateText = function (c) {
 		}
 		this._state = BEFORE_TAG_NAME;
 		this._sectionStart = this._index;
+	} else if(this._decodeEntities && c === "&"){
+		if(this._index > this._sectionStart){
+			this._cbs.ontext(this._getSection());
+		}
+		this._state = BEFORE_ENTITY;
+		this._sectionStart = this._index;
 	}
 };
 
 Tokenizer.prototype._stateBeforeTagName = function (c) {
 	if(c === "/"){
 		this._state = BEFORE_CLOSING_TAG_NAME;
-	} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
+	} else if(c === ">" || this._baseState !== TEXT || whitespace(c)) {
 		this._state = TEXT;
 	} else if(c === "!"){
 		this._state = BEFORE_DECLARATION;
@@ -153,7 +164,7 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
 	if(whitespace(c));
 	else if(c === ">"){
 		this._state = TEXT;
-	} else if(this._special !== SPECIAL_NONE){
+	} else if(this._baseState !== TEXT){
 		if(c === "s" || c === "S"){
 			this._state = BEFORE_SPECIAL_END;
 		} else {
@@ -170,7 +181,7 @@ Tokenizer.prototype._stateInCloseingTagName = function (c) {
 	if(c === ">" || whitespace(c)){
 		this._emitToken("onclosetag");
 		this._state = AFTER_CLOSING_TAG_NAME;
-		this._special = SPECIAL_NONE;
+		this._baseState = TEXT;
 		this._index--;
 	}
 };
@@ -190,6 +201,10 @@ Tokenizer.prototype._stateBeforeAttributeName = function (c) {
 		this._state = TEXT;
 		this._cbs.onopentagend();
 		this._sectionStart = this._index + 1;
+
+		if(this._baseState !== IN_SCRIPT && this._baseState !== IN_STYLE){
+			this._baseState = TEXT;
+		}
 	} else if(c === "/"){
 		this._cbs.onselfclosingtag();
 		this._state = AFTER_CLOSING_TAG_NAME;
@@ -239,6 +254,10 @@ Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
 	if(c === "\""){
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
+	} else if(this._decodeEntities && c === "&"){
+		this._baseState = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
+		this._state = BEFORE_ENTITY;
+		this._sectionStart = this._index;
 	}
 };
 
@@ -246,6 +265,10 @@ Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
 	if(c === "'"){
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
+	} else if(this._decodeEntities && c === "&"){
+		this._baseState = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
+		this._state = BEFORE_ENTITY;
+		this._sectionStart = this._index;
 	}
 };
 
@@ -254,6 +277,10 @@ Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._index--;
+	} else if(this._decodeEntities && c === "&"){
+		this._baseState = IN_ATTRIBUTE_VALUE_NO_QUOTES;
+		this._state = BEFORE_ENTITY;
+		this._sectionStart = this._index;
 	}
 };
 
@@ -351,9 +378,9 @@ Tokenizer.prototype._stateBeforeSpecial = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeSpecialEnd = function (c) {
-	if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){
+	if(this._baseState === IN_SCRIPT && (c === "c" || c === "C")){
 		this._state = AFTER_SCRIPT_1;
-	} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){
+	} else if(this._baseState === IN_SCRIPT && (c === "t" || c === "T")){
 		this._state = AFTER_STYLE_1;
 	}
 	else this._state = TEXT;
@@ -366,7 +393,7 @@ Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("T", BEFORE_SCR
 
 Tokenizer.prototype._stateBeforeScript5 = function (c) {
 	if(c === "/" || c === ">" || whitespace(c)){
-		this._special = SPECIAL_SCRIPT;
+		this._baseState = IN_SCRIPT;
 	}
 	this._state = IN_TAG_NAME;
 	this._index--; //consume the token again
@@ -392,7 +419,7 @@ Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("E", BEFORE_STYL
 
 Tokenizer.prototype._stateBeforeStyle4 = function (c) {
 	if(c === "/" || c === ">" || whitespace(c)){
-		this._special = SPECIAL_STYLE;
+		this._baseState = IN_STYLE;
 	}
 	this._state = IN_TAG_NAME;
 	this._index--; //consume the token again
@@ -411,6 +438,74 @@ Tokenizer.prototype._stateAfterStyle4 = function (c) {
 	else this._state = TEXT;
 };
 
+Tokenizer.prototype._stateBeforeEntity = ifElseState("#", BEFORE_NUMERIC_ENTITY, IN_NAMED_ENTITY);
+Tokenizer.prototype._stateBeforeNumericEntity = ifElseState("X", IN_HEX_ENTITY, IN_NUMERIC_ENTITY);
+
+Tokenizer.prototype._stateInNamedEntity = function(c){
+	if(c === ";"){
+		//offset 1
+		if(this._sectionStart + 1 !== this._index){
+			var entity = this._buffer.substring(this._sectionStart + 1, this._index);
+			//TODO parse entity
+		}
+		this._state = this._baseState;
+	} else if((c < "a" && c > "z") && (c < "A" && c > "Z") && (c < "0" || c > "9")){
+		if(
+			this._baseState === IN_ATTRIBUTE_VALUE_NO_QUOTES ||
+			this._baseState === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES ||
+			this._baseState === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES
+		) {
+			if(c !== "="){
+				//consume only an entity containing all characters
+			}
+		} else {
+			//TODO incrementally parse entites
+		}
+		this._state = this._baseState;
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._stateInNumericEntity = function(c){
+	if(c === ";"){
+		//offset 32(&#)
+		if(this._sectionStart + 2 !== this._index){
+			//parse entity
+			var entity = this._buffer.substring(this._sectionStart + 2, this._index);
+			var parsed = parseInt(entity, 10);
+
+			if(parsed === parsed){ //not NaN (TODO: when can this happen?)
+				this._cbs.ontext(String.fromCharCode((parsed)));
+				this._sectionStart = this._index + 1;
+			}
+		}
+		this._state = this._baseState;
+	} else if(c < "0" || c > "9"){
+		this._state = this._baseState;
+		this._index--;
+	}
+};
+
+Tokenizer.prototype._stateInHexEntity = function(c){
+	if(c === ";"){
+		//offset 3 (&#x)
+		if(this._sectionStart + 3 !== this._index){
+			//parse entity
+			var entity = this._buffer.substring(this._sectionStart + 3, this._index);
+			var parsed = parseInt(entity, 16);
+
+			if(parsed === parsed){ //not NaN
+				this._cbs.ontext(String.fromCharCode((parsed)));
+				this._sectionStart = this._index + 1;
+			}
+		}
+		this._state = this._baseState;
+	} else if((c < "a" && c > "f") && (c < "A" && c > "F") && (c < "0" || c > "9")){
+		this._state = this._baseState;
+		this._index--;
+	}
+};
+
 Tokenizer.prototype._cleanup = function () {
 	if(this._sectionStart < 0){
 		this._buffer = "";
@@ -551,6 +646,11 @@ Tokenizer.prototype.write = function(chunk){
 			this._stateBeforeScript5(c);
 		}
 
+		else if(this._state === IN_SCRIPT){
+			this._state = TEXT;
+			this._index--;
+		}
+
 		else if(this._state === AFTER_SCRIPT_1){
 			this._stateAfterScript1(c);
 		} else if(this._state === AFTER_SCRIPT_2){
@@ -576,6 +676,11 @@ Tokenizer.prototype.write = function(chunk){
 			this._stateBeforeStyle4(c);
 		}
 
+		else if(this._state === IN_STYLE){
+			this._state = TEXT;
+			this._index--;
+		}
+
 		else if(this._state === AFTER_STYLE_1){
 			this._stateAfterStyle1(c);
 		} else if(this._state === AFTER_STYLE_2){
@@ -586,6 +691,21 @@ Tokenizer.prototype.write = function(chunk){
 			this._stateAfterStyle4(c);
 		}
 
+		/*
+		* entities
+		*/
+		else if(this._state === BEFORE_ENTITY){
+			this._stateBeforeEntity(c);
+		} else if(this._state === BEFORE_NUMERIC_ENTITY){
+			this._stateBeforeNumericEntity(c);
+		} else if(this._state === IN_NAMED_ENTITY){
+			this._stateInNamedEntity(c);
+		} else if(this._state === IN_NUMERIC_ENTITY){
+			this._stateInNumericEntity(c);
+		} else if(this._state === IN_HEX_ENTITY){
+			this._stateInHexEntity(c);
+		}
+
 		else {
 			this._cbs.onerror(Error("unknown _state"), this._state);
 		}

From fac244946038f3d5a6ddf5b955c232ebe8873f04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 2 Aug 2013 21:45:20 +0200
Subject: [PATCH 389/450] [tokenizer] corrected decoding of numeric entities

---
 lib/Tokenizer.js | 61 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 974ca62..30a9e9e 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -74,6 +74,39 @@ var i = 0,
     IN_NUMERIC_ENTITY = i++,
     IN_HEX_ENTITY = i++;
 
+var decodeMap = {
+	// as described in mathiasbynens/he#4
+	// with character mapping to themselves removed
+	__proto__: null,
+	0: '\uFFFD',
+	128: '\u20AC',
+	130: '\u201A',
+	131: '\u0192',
+	132: '\u201E',
+	133: '\u2026',
+	134: '\u2020',
+	135: '\u2021',
+	136: '\u02C6',
+	137: '\u2030',
+	138: '\u0160',
+	139: '\u2039',
+	140: '\u0152',
+	142: '\u017D',
+	145: '\u2018',
+	146: '\u2019',
+	147: '\u201C',
+	148: '\u201D',
+	149: '\u2022',
+	150: '\u2013',
+	151: '\u2014',
+	152: '\u02DC',
+	153: '\u2122',
+	154: '\u0161',
+	155: '\u203A',
+	156: '\u0153',
+	158: '\u017E',
+	159: '\u0178'
+};
 
 function whitespace(c){
 	return c === " " || c === "\n" || c === "\t" || c === "\f";
@@ -466,6 +499,28 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 	}
 };
 
+// modified version of https://github.com/mathiasbynens/he/blob/master/src/he.js#L94-L119
+function decodeCodePoint(codePoint){
+	var output = "";
+
+	if((codePoint >= 0xD800 && codePoint <= 0xDFFF) || codePoint > 0x10FFFF){
+		return "\uFFFD"
+	}
+
+	if(codePoint in decodeMap){
+		codePoint = decodeMap[codePoint];
+	}
+
+	if(codePoint > 0xFFFF){
+		codePoint -= 0x10000;
+		output += String.fromCharCode(codePoint >>> 10 & 0x3FF | 0xD800);
+		codePoint = 0xDC00 | codePoint & 0x3FF;
+	}
+
+	output += String.fromCharCode(codePoint);
+	return output;
+}
+
 Tokenizer.prototype._stateInNumericEntity = function(c){
 	if(c === ";"){
 		//offset 32(&#)
@@ -475,12 +530,13 @@ Tokenizer.prototype._stateInNumericEntity = function(c){
 			var parsed = parseInt(entity, 10);
 
 			if(parsed === parsed){ //not NaN (TODO: when can this happen?)
-				this._cbs.ontext(String.fromCharCode((parsed)));
+				this._cbs.ontext(decodeCodePoint(parsed));
 				this._sectionStart = this._index + 1;
 			}
 		}
 		this._state = this._baseState;
 	} else if(c < "0" || c > "9"){
+		//TODO parse what we have
 		this._state = this._baseState;
 		this._index--;
 	}
@@ -495,12 +551,13 @@ Tokenizer.prototype._stateInHexEntity = function(c){
 			var parsed = parseInt(entity, 16);
 
 			if(parsed === parsed){ //not NaN
-				this._cbs.ontext(String.fromCharCode((parsed)));
+				this._cbs.ontext(decodeCodePoint(parsed));
 				this._sectionStart = this._index + 1;
 			}
 		}
 		this._state = this._baseState;
 	} else if((c < "a" && c > "f") && (c < "A" && c > "F") && (c < "0" || c > "9")){
+		//TODO parse what we have
 		this._state = this._baseState;
 		this._index--;
 	}

From e485fb2d4155f01da62987282df4fa7035bcdca4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 3 Aug 2013 01:20:33 +0200
Subject: [PATCH 390/450] [tokenizer] numeric entities are now decoded

TODO: attribute values aren't handled yet
---
 lib/Tokenizer.js | 65 ++++++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 30a9e9e..d9f28ed 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -482,7 +482,7 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 			//TODO parse entity
 		}
 		this._state = this._baseState;
-	} else if((c < "a" && c > "z") && (c < "A" && c > "Z") && (c < "0" || c > "9")){
+	} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
 		if(
 			this._baseState === IN_ATTRIBUTE_VALUE_NO_QUOTES ||
 			this._baseState === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES ||
@@ -504,7 +504,7 @@ function decodeCodePoint(codePoint){
 	var output = "";
 
 	if((codePoint >= 0xD800 && codePoint <= 0xDFFF) || codePoint > 0x10FFFF){
-		return "\uFFFD"
+		return "\uFFFD";
 	}
 
 	if(codePoint in decodeMap){
@@ -521,44 +521,39 @@ function decodeCodePoint(codePoint){
 	return output;
 }
 
+Tokenizer.prototype._decodeNumericEntity = function(offset, base){
+	var sectionStart = this._sectionStart + offset;
+
+	if(sectionStart !== this._index){
+		//parse entity
+		var entity = this._buffer.substring(sectionStart, this._index);
+		var parsed = parseInt(entity, base);
+
+		if(parsed === parsed){ //not NaN (TODO: when can this happen?)
+			this._cbs.ontext(decodeCodePoint(parsed));
+			this._sectionStart = this._index;
+		}
+	}
+
+	this._state = this._baseState;
+};
+
 Tokenizer.prototype._stateInNumericEntity = function(c){
 	if(c === ";"){
-		//offset 32(&#)
-		if(this._sectionStart + 2 !== this._index){
-			//parse entity
-			var entity = this._buffer.substring(this._sectionStart + 2, this._index);
-			var parsed = parseInt(entity, 10);
-
-			if(parsed === parsed){ //not NaN (TODO: when can this happen?)
-				this._cbs.ontext(decodeCodePoint(parsed));
-				this._sectionStart = this._index + 1;
-			}
-		}
-		this._state = this._baseState;
+		this._decodeNumericEntity(2, 10);
+		this._sectionStart++;
 	} else if(c < "0" || c > "9"){
-		//TODO parse what we have
-		this._state = this._baseState;
+		this._decodeNumericEntity(2, 10);
 		this._index--;
 	}
 };
 
 Tokenizer.prototype._stateInHexEntity = function(c){
 	if(c === ";"){
-		//offset 3 (&#x)
-		if(this._sectionStart + 3 !== this._index){
-			//parse entity
-			var entity = this._buffer.substring(this._sectionStart + 3, this._index);
-			var parsed = parseInt(entity, 16);
-
-			if(parsed === parsed){ //not NaN
-				this._cbs.ontext(decodeCodePoint(parsed));
-				this._sectionStart = this._index + 1;
-			}
-		}
-		this._state = this._baseState;
-	} else if((c < "a" && c > "f") && (c < "A" && c > "F") && (c < "0" || c > "9")){
-		//TODO parse what we have
-		this._state = this._baseState;
+		this._decodeNumericEntity(3, 16);
+		this._sectionStart++;
+	} else if((c < "a" || c > "f") && (c < "A" || c > "F") && (c < "0" || c > "9")){
+		this._decodeNumericEntity(3, 16);
 		this._index--;
 	}
 };
@@ -795,6 +790,12 @@ Tokenizer.prototype.end = function(chunk){
 			this._cbs.onopentagname(data);
 		} else if(this._state === IN_CLOSING_TAG_NAME){
 			this._cbs.onclosetag(data);
+		} else if(this._state === IN_NAMED_ENTITY){
+			// TODO
+		} else if(this._state === IN_NUMERIC_ENTITY){
+			this._decodeNumericEntity(2, 10);
+		} else if(this._state === IN_HEX_ENTITY){
+			this._decodeNumericEntity(3, 16);
 		} else {
 			this._cbs.ontext(data);
 		}
@@ -804,7 +805,7 @@ Tokenizer.prototype.end = function(chunk){
 };
 
 Tokenizer.prototype.reset = function(){
-	Tokenizer.call(this, {xmlMode: this._xmlMode}, this._cbs);
+	Tokenizer.call(this, {xmlMode: this._xmlMode, decodeEntities: this._decodeEntities}, this._cbs);
 };
 
 Tokenizer.prototype._getSection = function(){

From a6fb99ec50ea6378bf9a02a1635fa163deceb739 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 3 Aug 2013 01:20:48 +0200
Subject: [PATCH 391/450] [tests] added test case for numeric entities

---
 tests/Events/17-numeric_entities.json | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 tests/Events/17-numeric_entities.json

diff --git a/tests/Events/17-numeric_entities.json b/tests/Events/17-numeric_entities.json
new file mode 100644
index 0000000..865ba6d
--- /dev/null
+++ b/tests/Events/17-numeric_entities.json
@@ -0,0 +1,16 @@
+{
+  "name": "numeric entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "&#x61;&#x62&#99;&#100!",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "abcd!"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From bcd00ed1237195381a95353a8b4223b04a551e40 Mon Sep 17 00:00:00 2001
From: Forbes Lindesay <forbes@lindesay.co.uk>
Date: Tue, 6 Aug 2013 18:43:12 +0100
Subject: [PATCH 392/450] Update link to demo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9a7d218..0ea8ccb 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ A forgiving HTML/XML/RSS parser written in JS for NodeJS. The parser can handle
 ##Installing
 	npm install htmlparser2
 	
-A live demo of htmlparser2 is available at http://htmlparser.forbeslindesay.co.uk/
+A live demo of htmlparser2 is available at http://demos.forbeslindesay.co.uk/htmlparser2/
 
 ##Usage
 

From c2db3dfdbbce88e12b02686c218a0d0631a3c25a Mon Sep 17 00:00:00 2001
From: David Rousselie <david.rousselie@happycoders.org>
Date: Wed, 7 Aug 2013 17:01:37 +0200
Subject: [PATCH 393/450] Add startIndex and endIndex positional attributes to
 the parser

---
 lib/Parser.js    | 29 +++++++++++++++++++++++------
 lib/Tokenizer.js | 12 +++++++++---
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 9dbe44f..d9adb3f 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -2,7 +2,7 @@ var Tokenizer = require("./Tokenizer.js");
 
 /*
 	Options:
-	
+
 	xmlMode: Special behavior for script/style tags (true by default)
 	lowerCaseAttributeNames: call .toLowerCase for each attribute name (true if xmlMode is `false`)
 	lowerCaseTags: call .toLowerCase for each tag name (true if xmlMode is `false`)
@@ -10,7 +10,7 @@ var Tokenizer = require("./Tokenizer.js");
 
 /*
 	Callbacks:
-	
+
 	oncdataend,
 	oncdatastart,
 	onclosetag,
@@ -81,6 +81,8 @@ function Parser(cbs, options){
 	this._attribs = null;
 	this._stack = [];
 	this._done = false;
+  this.startIndex = 0;
+  this.endIndex = undefined;
 
 	this._tokenizer = new Tokenizer(options, this);
 }
@@ -89,6 +91,9 @@ require("util").inherits(Parser, require("events").EventEmitter);
 
 //Tokenizer event handlers
 Parser.prototype.ontext = function(data){
+  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
+  this.endIndex = this._tokenizer._index - 1;
+
 	if(this._cbs.ontext) this._cbs.ontext(data);
 };
 
@@ -116,7 +121,10 @@ Parser.prototype.onopentagname = function(name){
 };
 
 Parser.prototype.onopentagend = function(){
-	if(this._attribname !== "") this.onattribvalue("");
+  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
+  this.endIndex = this._tokenizer._index;
+
+  if(this._attribname !== "") this.onattribvalue("");
 	if(this._attribs){
 		if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
 		this._attribs = null;
@@ -128,6 +136,9 @@ Parser.prototype.onopentagend = function(){
 };
 
 Parser.prototype.onclosetag = function(name){
+  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
+  this.endIndex = this._tokenizer._index;
+
 	if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 		name = name.toLowerCase();
 	}
@@ -145,7 +156,7 @@ Parser.prototype.onclosetag = function(name){
 		}
 	} else if(!this._options.xmlMode && (name === "br" || name === "p")){
 		this.onopentagname(name);
-		this.onselfclosingtag();		
+		this.onselfclosingtag();
 	}
 };
 
@@ -204,12 +215,18 @@ Parser.prototype.onprocessinginstruction = function(value){
 };
 
 Parser.prototype.oncomment = function(value){
-	if(this._cbs.oncomment) this._cbs.oncomment(value);
+  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 4, 0) : this.endIndex + 1;
+  this.endIndex = this._tokenizer._index;
+
+  if(this._cbs.oncomment) this._cbs.oncomment(value);
 	if(this._cbs.oncommentend) this._cbs.oncommentend();
 };
 
 Parser.prototype.oncdata = function(value){
-	if(this._options.xmlMode){
+  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
+  this.endIndex = this._tokenizer._index;
+
+  if(this._options.xmlMode){
 		if(this._cbs.oncdatastart) this._cbs.oncdatastart();
 		if(this._cbs.ontext) this._cbs.ontext(value);
 		if(this._cbs.oncdataend) this._cbs.oncdataend();
diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index d9f28ed..c583f0e 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -5,6 +5,7 @@ var i = 0,
     TEXT = i++,
     BEFORE_TAG_NAME = i++, //after <
     IN_TAG_NAME = i++,
+    BEFORE_SELF_CLOSING_TAG_NAME = i++,
     BEFORE_CLOSING_TAG_NAME = i++,
     IN_CLOSING_TAG_NAME = i++,
     AFTER_CLOSING_TAG_NAME = i++,
@@ -231,16 +232,19 @@ Tokenizer.prototype._stateBeforeAttributeName = function (c) {
 	if(whitespace(c)){
 		/* noop */
 	} else if(c === ">"){
+    if (this._state === BEFORE_SELF_CLOSING_TAG_NAME) {
+      this._cbs.onselfclosingtag();
+    } else {
+		  this._cbs.onopentagend();
+    }
 		this._state = TEXT;
-		this._cbs.onopentagend();
 		this._sectionStart = this._index + 1;
 
 		if(this._baseState !== IN_SCRIPT && this._baseState !== IN_STYLE){
 			this._baseState = TEXT;
 		}
 	} else if(c === "/"){
-		this._cbs.onselfclosingtag();
-		this._state = AFTER_CLOSING_TAG_NAME;
+		this._state = BEFORE_SELF_CLOSING_TAG_NAME;
 	} else {
 		this._state = IN_ATTRIBUTE_NAME;
 		this._sectionStart = this._index;
@@ -601,6 +605,8 @@ Tokenizer.prototype.write = function(chunk){
 			this._stateInCloseingTagName(c);
 		} else if(this._state === AFTER_CLOSING_TAG_NAME){
 			this._stateAfterCloseingTagName(c);
+		} else if(this._state === BEFORE_SELF_CLOSING_TAG_NAME){
+			this._stateBeforeAttributeName(c);
 		}
 
 		/*

From b70b28d85165532db5cf7db5889824d23b419cec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 16 Aug 2013 21:48:12 +0200
Subject: [PATCH 394/450] [tokenizer] renamed the self-closing tags state,
 moved it to its own function

---
 lib/Tokenizer.js | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index c583f0e..bda61e8 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -5,7 +5,7 @@ var i = 0,
     TEXT = i++,
     BEFORE_TAG_NAME = i++, //after <
     IN_TAG_NAME = i++,
-    BEFORE_SELF_CLOSING_TAG_NAME = i++,
+    IN_SELF_CLOSING_TAG = i++,
     BEFORE_CLOSING_TAG_NAME = i++,
     IN_CLOSING_TAG_NAME = i++,
     AFTER_CLOSING_TAG_NAME = i++,
@@ -229,14 +229,8 @@ Tokenizer.prototype._stateAfterCloseingTagName = function (c) {
 };
 
 Tokenizer.prototype._stateBeforeAttributeName = function (c) {
-	if(whitespace(c)){
-		/* noop */
-	} else if(c === ">"){
-    if (this._state === BEFORE_SELF_CLOSING_TAG_NAME) {
-      this._cbs.onselfclosingtag();
-    } else {
-		  this._cbs.onopentagend();
-    }
+	if(c === ">"){
+		this._cbs.onopentagend();
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
 
@@ -244,13 +238,28 @@ Tokenizer.prototype._stateBeforeAttributeName = function (c) {
 			this._baseState = TEXT;
 		}
 	} else if(c === "/"){
-		this._state = BEFORE_SELF_CLOSING_TAG_NAME;
-	} else {
+		this._state = IN_SELF_CLOSING_TAG;
+	} else if(!whitespace(c)){
 		this._state = IN_ATTRIBUTE_NAME;
 		this._sectionStart = this._index;
 	}
 };
 
+Tokenizer.prototype._stateInSelfClosingTag = function(c){
+	if(c === ">"){
+		this._cbs.onselfclosingtag();
+		this._state = TEXT;
+		this._sectionStart = this._index + 1;
+
+		if(this._baseState !== IN_SCRIPT && this._baseState !== IN_STYLE){
+			this._baseState = TEXT;
+		}
+	} else if(!whitespace(c)){
+		this._state = BEFORE_ATTRIBUTE_NAME;
+		this._index--;
+	}
+}
+
 Tokenizer.prototype._stateInAttributeName = function (c) {
 	if(c === "=" || c === "/" || c === ">" || whitespace(c)){
 		if(this._index > this._sectionStart){
@@ -605,8 +614,8 @@ Tokenizer.prototype.write = function(chunk){
 			this._stateInCloseingTagName(c);
 		} else if(this._state === AFTER_CLOSING_TAG_NAME){
 			this._stateAfterCloseingTagName(c);
-		} else if(this._state === BEFORE_SELF_CLOSING_TAG_NAME){
-			this._stateBeforeAttributeName(c);
+		} else if(this._state === IN_SELF_CLOSING_TAG){
+			this._stateInSelfClosingTag(c);
 		}
 
 		/*

From ad1d8f0f8488ee2f712770a07e468b82493a7d19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 16 Aug 2013 21:50:15 +0200
Subject: [PATCH 395/450] [tokenizer] commented out support for entities in
 attributes

requires adding a new event to make this work, so delayed for now
---
 lib/Tokenizer.js | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index bda61e8..1a6f67b 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -300,22 +300,22 @@ Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
 	if(c === "\""){
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
-	} else if(this._decodeEntities && c === "&"){
+	}/* else if(this._decodeEntities && c === "&"){
 		this._baseState = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
-	}
+	}*/
 };
 
 Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
 	if(c === "'"){
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
-	} else if(this._decodeEntities && c === "&"){
+	}/* else if(this._decodeEntities && c === "&"){
 		this._baseState = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
-	}
+	}*/
 };
 
 Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
@@ -323,11 +323,11 @@ Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._index--;
-	} else if(this._decodeEntities && c === "&"){
+	}/* else if(this._decodeEntities && c === "&"){
 		this._baseState = IN_ATTRIBUTE_VALUE_NO_QUOTES;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
-	}
+	}*/
 };
 
 Tokenizer.prototype._stateBeforeDeclaration = function (c) {

From ab8926e61961807732416f6333a2cf2e97e83c6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 16 Aug 2013 22:30:28 +0200
Subject: [PATCH 396/450] [readme] updated benchmark results

switched the results to @AndreasMadsen's htmlparser-benchmark
---
 README.md | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 0ea8ccb..54cf642 100644
--- a/README.md
+++ b/README.md
@@ -55,21 +55,22 @@ new htmlparser.FeedHandler(function(<error> error, <object> feed){
 ```
 
 ##Performance
-Using a modified version of [node-expat](https://github.com/astro/node-expat)'s `bench.js`, I received the following results (on OSX):
+
+After having some artificial benchmarks for some time, __@AndreasMadsen__ published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites.
+
+At the time of writing, the latest versions of all supported parsers show the following performance characteristics on my MacBook:
 
 ```
-node-xml:     28.03 ms/el
-libxmljs:     11.11 ms/el
-sax:          26.92 ms/el
-node-expat:   07.32 ms/el
-htmlparser:   16.40 ms/el
-htmlparser2:  06.32 ms/el
-
-Winner: htmlparser2
+gumbo-parser   : 28.9543 ms/file ± 15.9772
+html-parser    : 19.0935 ms/file ± 13.4118
+htmlparser     : 48.9674 ms/file ± 293.747
+hubbub         : 30.1816 ms/file ± 16.1811
+libxmljs       : 13.0610 ms/file ± 18.6695
+sax            : 44.5736 ms/file ± 22.6353
+htmlparser2-dom: 5.27927 ms/file ± 4.80156
+htmlparser2    : 3.56451 ms/file ± 2.51882
 ```
 
-The test can be found in [`tests/bench.js`](tests/bench.js).
-
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?
 This is a fork of the `htmlparser` module. The main difference is that this is intended to be used only with node (it runs on other platforms using [browserify](https://github.com/substack/node-browserify)). `htmlparser2` was rewritten multiple times and, while it maintains an API that's compatible with `htmlparser` in most cases, the projects don't share any code anymore.
 

From e5197b391a79e9f4fbe9ae62eb515b25cb1f034d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 16 Aug 2013 22:31:49 +0200
Subject: [PATCH 397/450] [bench] removed internal benchmarks

in favor of htmlparser-benchmark
---
 tests/99-benchmark.js |  21 -------
 tests/bench.js        | 135 ------------------------------------------
 2 files changed, 156 deletions(-)
 delete mode 100644 tests/99-benchmark.js
 delete mode 100644 tests/bench.js

diff --git a/tests/99-benchmark.js b/tests/99-benchmark.js
deleted file mode 100644
index d569e42..0000000
--- a/tests/99-benchmark.js
+++ /dev/null
@@ -1,21 +0,0 @@
-var multiply = function(text){
-		return Array(5e3+1).join(text);
-	},
-	tests = {
-		self_closing: multiply("<br/>"),
-		tag: multiply("<tag foo=bar foobar> Text </tag>"),
-		comment: multiply("<!-- this is <<a> comment -->"),
-		directive: multiply("<?foo bar?>"),
-		special: multiply("<script> THIS IS <SPECIAL> </script>"),
-		xml: multiply("<!directive><tag attr='value'> text <!--Comment<>--></tag>")
-	},
-	cbs = {};
-
-var parser = new (require("../lib/Parser.js"))(cbs),
-	ben = require("ben");
-
-Object.keys(tests).forEach(function(name){
-	console.log("Test", name, "took", ben(150, function(){
-		parser.parseComplete(tests[name]);
-	}));
-});
diff --git a/tests/bench.js b/tests/bench.js
deleted file mode 100644
index 01ceeef..0000000
--- a/tests/bench.js
+++ /dev/null
@@ -1,135 +0,0 @@
-//dependencies have to be installed manually
-
-var ben = require("ben");
-
-var parsers = [];
-
-
-try{
-	var node_xml = require("node-xml");
-
-	function NodeXmlParser() {
-		var parser = new node_xml.SaxParser(function(cb) { });
-		this.parse = function(s) {
-			parser.parseString(s);
-		};
-	}
-	parsers.push([NodeXmlParser, "node-xml"]);
-} catch(e){}
-
-try{
-	var libxml = require("libxmljs");
-
-	function LibXmlJsParser() {
-		var parser = new libxml.SaxPushParser(function(cb) { });
-		this.parse = function(s) {
-			parser.push(s, false);
-		};
-	}
-
-	parsers.push([LibXmlJsParser, "libxmljs"]);
-} catch(e){}
-
-try{
-	var sax = require('sax');
-
-	function SaxParser() {
-		var parser = sax.parser();
-		this.parse = function(s) {
-			parser.write(s);
-		};
-	}
-
-	parsers.push([SaxParser, "sax"]);
-} catch(e){}
-
-try{
-	var expat = require('node-expat');
-
-	function ExpatParser() {
-		var parser = new expat.Parser();
-		this.parse = function(s) {
-			parser.parse(s, false);
-		};
-	}
-
-	parsers.push([ExpatParser, "node-expat"]);
-} catch(e){}
-
-try{
-	var htmlparser = require('htmlparser');
-
-	function HtmlParser() {
-		var handler = new htmlparser.DefaultHandler();
-		var parser = new htmlparser.Parser(handler);
-		this.parse = function(s) {
-			parser.parseComplete(s);
-		};
-	}
-
-	parsers.push([HtmlParser, "htmlparser"]);
-} catch(e){}
-
-
-try{
-	var hubbub = require('hubbub');
-
-	function Hubbub() {
-		var handler = new hubbub.DefaultHandler();
-		var parser = new hubbub.Parser(handler);
-		this.parse = function(s) {
-			parser.parseComplete(s);
-		};
-	}
-
-	parsers.push([Hubbub, "hubbub"]);
-} catch(e){}
-
-try{
-	var htmlParser = require("html-parser");
-
-	function HTMLParser() {
-		var cbs = {};
-		this.parse = function(s){
-			htmlParser.parse(s, cbs);
-		};
-	}
-
-	parsers.push([HTMLParser, "html-parser"]);
-} catch(e){}
-
-try{
-	var htmlparser2 = require('../lib/Parser.js');
-
-	function HtmlParser2() {
-		var parser = new htmlparser2();
-		this.parse = function(s) {
-			parser.write(s);
-		};
-	}
-
-	parsers.push([HtmlParser2, "htmlparser2"]);
-} catch(e){}
-
-var results = parsers.map(function(arr){
-	var p = new arr[0]();
-	var name = arr[1];
-
-	process.stdout.write(name + ":" + Array(14-name.length).join(" "));
-
-	p.parse("<r>");
-	var num = ben(1e6, function(){
-		p.parse("<foo bar='baz'>quux</foo>");
-	});
-
-	console.log((num > 0.01 ? "" : "0") + (num * 1e3).toFixed(2), "ms/el");
-
-	return [name, num];
-});
-
-console.log(
-	"\nWinner:",
-	results.sort(function(a, b){
-		return a[1] - b[1];
-	})[0][0]
-);
\ No newline at end of file

From bc193a6504d784a7cd09874fdbe124b4b7a23f42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Fri, 16 Aug 2013 22:42:56 +0200
Subject: [PATCH 398/450] [parser] fixed whitespace

---
 lib/Parser.js | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index d9adb3f..eca0c12 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -32,6 +32,7 @@ var formTags = {
 	datalist: true,
 	textarea: true
 };
+
 var openImpliesClose = {
 	tr      : { tr:true, th:true, td:true },
 	th      : { th:true },
@@ -81,8 +82,9 @@ function Parser(cbs, options){
 	this._attribs = null;
 	this._stack = [];
 	this._done = false;
-  this.startIndex = 0;
-  this.endIndex = undefined;
+
+	this.startIndex = 0;
+	this.endIndex = null;
 
 	this._tokenizer = new Tokenizer(options, this);
 }
@@ -91,8 +93,8 @@ require("util").inherits(Parser, require("events").EventEmitter);
 
 //Tokenizer event handlers
 Parser.prototype.ontext = function(data){
-  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
-  this.endIndex = this._tokenizer._index - 1;
+	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
+	this.endIndex = this._tokenizer._index - 1;
 
 	if(this._cbs.ontext) this._cbs.ontext(data);
 };
@@ -121,23 +123,26 @@ Parser.prototype.onopentagname = function(name){
 };
 
 Parser.prototype.onopentagend = function(){
-  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
-  this.endIndex = this._tokenizer._index;
+	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
+	this.endIndex = this._tokenizer._index;
 
-  if(this._attribname !== "") this.onattribvalue("");
+	if(this._attribname !== "") this.onattribvalue("");
+    
 	if(this._attribs){
 		if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
 		this._attribs = null;
 	}
+    
 	if(!this._options.xmlMode && this._cbs.onclosetag && this._tagname in voidElements){
 		this._cbs.onclosetag(this._tagname);
 	}
+    
 	this._tagname = "";
 };
 
 Parser.prototype.onclosetag = function(name){
-  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
-  this.endIndex = this._tokenizer._index;
+	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
+	this.endIndex = this._tokenizer._index;
 
 	if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 		name = name.toLowerCase();
@@ -215,18 +220,18 @@ Parser.prototype.onprocessinginstruction = function(value){
 };
 
 Parser.prototype.oncomment = function(value){
-  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 4, 0) : this.endIndex + 1;
-  this.endIndex = this._tokenizer._index;
+	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 4, 0) : this.endIndex + 1;
+	this.endIndex = this._tokenizer._index;
 
-  if(this._cbs.oncomment) this._cbs.oncomment(value);
+	if(this._cbs.oncomment) this._cbs.oncomment(value);
 	if(this._cbs.oncommentend) this._cbs.oncommentend();
 };
 
 Parser.prototype.oncdata = function(value){
-  this.startIndex = (this.endIndex === undefined) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
-  this.endIndex = this._tokenizer._index;
+	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
+	this.endIndex = this._tokenizer._index;
 
-  if(this._options.xmlMode){
+	if(this._options.xmlMode){
 		if(this._cbs.oncdatastart) this._cbs.oncdatastart();
 		if(this._cbs.ontext) this._cbs.ontext(value);
 		if(this._cbs.oncdataend) this._cbs.oncdataend();

From 2221630cf9dd215c84a9390abee7853768de1975 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 17 Aug 2013 12:56:57 +0200
Subject: [PATCH 399/450] [parser] moved common logic to _updatePosition
 function

---
 lib/Parser.js | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index eca0c12..7d3ef6d 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -91,10 +91,18 @@ function Parser(cbs, options){
 
 require("util").inherits(Parser, require("events").EventEmitter);
 
+Parser.prototype._updatePosition = function(initialOffset){
+	if(this.endIndex === null){
+		this.startIndex = this._tokenizer._sectionStart <= initialOffset ? 0 : this._tokenizer._sectionStart - initialOffset;
+	}
+	this.startIndex = this.endIndex + 1;
+	this.endIndex = this._tokenizer._index;
+};
+
 //Tokenizer event handlers
 Parser.prototype.ontext = function(data){
-	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
-	this.endIndex = this._tokenizer._index - 1;
+	this._updatePosition(1);
+	this.endIndex--;
 
 	if(this._cbs.ontext) this._cbs.ontext(data);
 };
@@ -123,8 +131,7 @@ Parser.prototype.onopentagname = function(name){
 };
 
 Parser.prototype.onopentagend = function(){
-	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
-	this.endIndex = this._tokenizer._index;
+	this._updatePosition(1);
 
 	if(this._attribname !== "") this.onattribvalue("");
     
@@ -141,8 +148,7 @@ Parser.prototype.onopentagend = function(){
 };
 
 Parser.prototype.onclosetag = function(name){
-	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
-	this.endIndex = this._tokenizer._index;
+	this._updatePosition(1);
 
 	if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 		name = name.toLowerCase();
@@ -220,16 +226,14 @@ Parser.prototype.onprocessinginstruction = function(value){
 };
 
 Parser.prototype.oncomment = function(value){
-	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 4, 0) : this.endIndex + 1;
-	this.endIndex = this._tokenizer._index;
+	this._updatePosition(4);
 
 	if(this._cbs.oncomment) this._cbs.oncomment(value);
 	if(this._cbs.oncommentend) this._cbs.oncommentend();
 };
 
 Parser.prototype.oncdata = function(value){
-	this.startIndex = (this.endIndex === null) ? Math.max(this._tokenizer._sectionStart - 1, 0) : this.endIndex + 1;
-	this.endIndex = this._tokenizer._index;
+	this._updatePosition(1);
 
 	if(this._options.xmlMode){
 		if(this._cbs.oncdatastart) this._cbs.oncdatastart();

From d26e087043086b3fddb90cb6f295e8f271feb37f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 17 Aug 2013 13:08:14 +0200
Subject: [PATCH 400/450] [tokenizer] renamed IN_ATTRIBUTE_NAME_* states,
 improved formatting

---
 lib/Tokenizer.js | 142 +++++++++++++++++++++++------------------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 1a6f67b..ac681c5 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -2,78 +2,78 @@ module.exports = Tokenizer;
 
 var i = 0,
 
-    TEXT = i++,
-    BEFORE_TAG_NAME = i++, //after <
-    IN_TAG_NAME = i++,
-    IN_SELF_CLOSING_TAG = i++,
-    BEFORE_CLOSING_TAG_NAME = i++,
-    IN_CLOSING_TAG_NAME = i++,
-    AFTER_CLOSING_TAG_NAME = i++,
+    TEXT                      = i++,
+    BEFORE_TAG_NAME           = i++, //after <
+    IN_TAG_NAME               = i++,
+    IN_SELF_CLOSING_TAG       = i++,
+    BEFORE_CLOSING_TAG_NAME   = i++,
+    IN_CLOSING_TAG_NAME       = i++,
+    AFTER_CLOSING_TAG_NAME    = i++,
 
     //attributes
-    BEFORE_ATTRIBUTE_NAME = i++,
-    IN_ATTRIBUTE_NAME = i++,
-    AFTER_ATTRIBUTE_NAME = i++,
-    BEFORE_ATTRIBUTE_VALUE = i++,
-    IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES = i++, // "
-    IN_ATTRIBUTE_VALUE_SINGLE_QUOTES = i++, // '
-    IN_ATTRIBUTE_VALUE_NO_QUOTES = i++,
+    BEFORE_ATTRIBUTE_NAME     = i++,
+    IN_ATTRIBUTE_NAME         = i++,
+    AFTER_ATTRIBUTE_NAME      = i++,
+    BEFORE_ATTRIBUTE_VALUE    = i++,
+    IN_ATTRIBUTE_VALUE_DQ     = i++, // "
+    IN_ATTRIBUTE_VALUE_SQ     = i++, // '
+    IN_ATTRIBUTE_VALUE_NQ     = i++,
 
     //declarations
-    BEFORE_DECLARATION = i++, // !
-    IN_DECLARATION = i++,
+    BEFORE_DECLARATION        = i++, // !
+    IN_DECLARATION            = i++,
 
     //processing instructions
     IN_PROCESSING_INSTRUCTION = i++, // ?
 
     //comments
-    BEFORE_COMMENT = i++,
-    IN_COMMENT = i++,
-    AFTER_COMMENT_1 = i++,
-    AFTER_COMMENT_2 = i++,
+    BEFORE_COMMENT            = i++,
+    IN_COMMENT                = i++,
+    AFTER_COMMENT_1           = i++,
+    AFTER_COMMENT_2           = i++,
 
     //cdata
-    BEFORE_CDATA_1 = i++, // [
-    BEFORE_CDATA_2 = i++, // C
-    BEFORE_CDATA_3 = i++, // D
-    BEFORE_CDATA_4 = i++, // A
-    BEFORE_CDATA_5 = i++, // T
-    BEFORE_CDATA_6 = i++, // A
-    IN_CDATA = i++,// [
-    AFTER_CDATA_1 = i++, // ]
-    AFTER_CDATA_2 = i++, // ]
+    BEFORE_CDATA_1            = i++, // [
+    BEFORE_CDATA_2            = i++, // C
+    BEFORE_CDATA_3            = i++, // D
+    BEFORE_CDATA_4            = i++, // A
+    BEFORE_CDATA_5            = i++, // T
+    BEFORE_CDATA_6            = i++, // A
+    IN_CDATA                  = i++,// [
+    AFTER_CDATA_1             = i++, // ]
+    AFTER_CDATA_2             = i++, // ]
 
     //special tags
-    BEFORE_SPECIAL = i++, //S
-    BEFORE_SPECIAL_END = i++,   //S
-
-    BEFORE_SCRIPT_1 = i++, //C
-    BEFORE_SCRIPT_2 = i++, //R
-    BEFORE_SCRIPT_3 = i++, //I
-    BEFORE_SCRIPT_4 = i++, //P
-    BEFORE_SCRIPT_5 = i++, //T
-    IN_SCRIPT = i++,
-    AFTER_SCRIPT_1 = i++, //C
-    AFTER_SCRIPT_2 = i++, //R
-    AFTER_SCRIPT_3 = i++, //I
-    AFTER_SCRIPT_4 = i++, //P
-    AFTER_SCRIPT_5 = i++, //T
-
-    BEFORE_STYLE_1 = i++, //T
-    BEFORE_STYLE_2 = i++, //Y
-    BEFORE_STYLE_3 = i++, //L
-    BEFORE_STYLE_4 = i++, //E
-    IN_STYLE = i++,
-    AFTER_STYLE_1 = i++, //T
-    AFTER_STYLE_2 = i++, //Y
-    AFTER_STYLE_3 = i++, //L
-    AFTER_STYLE_4 = i++, //E
-
-    BEFORE_ENTITY = i++,
-    BEFORE_NUMERIC_ENTITY = i++,
-    IN_NAMED_ENTITY = i++,
-    IN_NUMERIC_ENTITY = i++,
-    IN_HEX_ENTITY = i++;
+    BEFORE_SPECIAL            = i++, //S
+    BEFORE_SPECIAL_END        = i++,   //S
+
+    BEFORE_SCRIPT_1           = i++, //C
+    BEFORE_SCRIPT_2           = i++, //R
+    BEFORE_SCRIPT_3           = i++, //I
+    BEFORE_SCRIPT_4           = i++, //P
+    BEFORE_SCRIPT_5           = i++, //T
+    IN_SCRIPT                 = i++,
+    AFTER_SCRIPT_1            = i++, //C
+    AFTER_SCRIPT_2            = i++, //R
+    AFTER_SCRIPT_3            = i++, //I
+    AFTER_SCRIPT_4            = i++, //P
+    AFTER_SCRIPT_5            = i++, //T
+
+    BEFORE_STYLE_1            = i++, //T
+    BEFORE_STYLE_2            = i++, //Y
+    BEFORE_STYLE_3            = i++, //L
+    BEFORE_STYLE_4            = i++, //E
+    IN_STYLE                  = i++,
+    AFTER_STYLE_1             = i++, //T
+    AFTER_STYLE_2             = i++, //Y
+    AFTER_STYLE_3             = i++, //L
+    AFTER_STYLE_4             = i++, //E
+
+    BEFORE_ENTITY             = i++, //&
+    BEFORE_NUMERIC_ENTITY     = i++, //#
+    IN_NAMED_ENTITY           = i++,
+    IN_NUMERIC_ENTITY         = i++,
+    IN_HEX_ENTITY             = i++; //X
 
 var decodeMap = {
 	// as described in mathiasbynens/he#4
@@ -285,13 +285,13 @@ Tokenizer.prototype._stateAfterAttributeName = function (c) {
 
 Tokenizer.prototype._stateBeforeAttributeValue = function (c) {
 	if(c === "\""){
-		this._state = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
+		this._state = IN_ATTRIBUTE_VALUE_DQ;
 		this._sectionStart = this._index + 1;
 	} else if(c === "'"){
-		this._state = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
+		this._state = IN_ATTRIBUTE_VALUE_SQ;
 		this._sectionStart = this._index + 1;
 	} else if(!whitespace(c)){
-		this._state = IN_ATTRIBUTE_VALUE_NO_QUOTES;
+		this._state = IN_ATTRIBUTE_VALUE_NQ;
 		this._sectionStart = this._index;
 	}
 };
@@ -301,7 +301,7 @@ Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
 	}/* else if(this._decodeEntities && c === "&"){
-		this._baseState = IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES;
+		this._baseState = IN_ATTRIBUTE_VALUE_DQ;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
 	}*/
@@ -312,7 +312,7 @@ Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
 		this._emitToken("onattribvalue");
 		this._state = BEFORE_ATTRIBUTE_NAME;
 	}/* else if(this._decodeEntities && c === "&"){
-		this._baseState = IN_ATTRIBUTE_VALUE_SINGLE_QUOTES;
+		this._baseState = IN_ATTRIBUTE_VALUE_SQ;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
 	}*/
@@ -324,7 +324,7 @@ Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
 		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._index--;
 	}/* else if(this._decodeEntities && c === "&"){
-		this._baseState = IN_ATTRIBUTE_VALUE_NO_QUOTES;
+		this._baseState = IN_ATTRIBUTE_VALUE_NQ;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
 	}*/
@@ -497,9 +497,9 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 		this._state = this._baseState;
 	} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
 		if(
-			this._baseState === IN_ATTRIBUTE_VALUE_NO_QUOTES ||
-			this._baseState === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES ||
-			this._baseState === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES
+			this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
+			this._baseState === IN_ATTRIBUTE_VALUE_SQ ||
+			this._baseState === IN_ATTRIBUTE_VALUE_DQ
 		) {
 			if(c !== "="){
 				//consume only an entity containing all characters
@@ -629,11 +629,11 @@ Tokenizer.prototype.write = function(chunk){
 			this._stateAfterAttributeName(c);
 		} else if(this._state === BEFORE_ATTRIBUTE_VALUE){
 			this._stateBeforeAttributeValue(c);
-		} else if(this._state === IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES){
+		} else if(this._state === IN_ATTRIBUTE_VALUE_DQ){
 			this._stateInAttributeValueDoubleQuotes(c);
-		} else if(this._state === IN_ATTRIBUTE_VALUE_SINGLE_QUOTES){
+		} else if(this._state === IN_ATTRIBUTE_VALUE_SQ){
 			this._stateInAttributeValueSingleQuotes(c);
-		} else if(this._state === IN_ATTRIBUTE_VALUE_NO_QUOTES){
+		} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){
 			this._stateInAttributeValueNoQuotes(c);
 		}
 

From 163a4ce593859e1b26be8fa73d56278bea13dc6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 17 Aug 2013 13:13:59 +0200
Subject: [PATCH 401/450] [tokenizer] re-added the carriage return as
 whitespace

fixes #62

apparently Google's gumbo-parser does behave this way:
https://github.com/google/gumbo-parser/blob/101726c50e172e45be6002c51b85
e45f27f0c2c6/src/tokenizer.c#L322
---
 lib/Tokenizer.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index ac681c5..52cb603 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -110,7 +110,7 @@ var decodeMap = {
 };
 
 function whitespace(c){
-	return c === " " || c === "\n" || c === "\t" || c === "\f";
+	return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
 }
 
 function ifElseState(upper, SUCCESS, FAILURE){

From ea26f0e10d8be165d18aac0e3394d02721191bcc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 11:23:02 +0200
Subject: [PATCH 402/450] [tokenizer] fixed handling of unparsed data in end(),
 added support for several states

---
 lib/Tokenizer.js | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 52cb603..fe6e680 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -794,7 +794,7 @@ Tokenizer.prototype.end = function(chunk){
 	if(chunk) this.write(chunk);
 
 	//if there is remaining data, emit it in a reasonable way
-	if(this._sectionStart > this._index){
+	if(this._sectionStart < this._index){
 		var data = this._buffer.substr(this._sectionStart);
 
 		if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
@@ -803,6 +803,14 @@ Tokenizer.prototype.end = function(chunk){
 			this._cbs.oncomment(data);
 		} else if(this._state === IN_TAG_NAME){
 			this._cbs.onopentagname(data);
+		} else if(this._state === BEFORE_ATTRIBUTE_NAME || this._state === BEFORE_ATTRIBUTE_VALUE || this._state === AFTER_ATTRIBUTE_NAME){
+			this._cbs.onopentagend();
+		} else if(this._state === IN_ATTRIBUTE_NAME){
+			this._cbs.onattribname(data);
+		} else if(this._state === IN_ATTRIBUTE_VALUE_SQ || this._state === IN_ATTRIBUTE_VALUE_DQ){
+			this._cbs.onattribvalue(data.substr(1));
+		} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){
+			this._cbs.onattribvalue(data);
 		} else if(this._state === IN_CLOSING_TAG_NAME){
 			this._cbs.onclosetag(data);
 		} else if(this._state === IN_NAMED_ENTITY){

From 3a927961e2d7c39ba06c0a5613492a1b5599e7bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 11:54:22 +0200
Subject: [PATCH 403/450] [entities] added maps for normal & legacy entities

---
 lib/entities/entities.json | 1 +
 lib/entities/legacy.json   | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 lib/entities/entities.json
 create mode 100644 lib/entities/legacy.json

diff --git a/lib/entities/entities.json b/lib/entities/entities.json
new file mode 100644
index 0000000..7ccfcd8
--- /dev/null
+++ b/lib/entities/entities.json
@@ -0,0 +1 @@
+{"Aacute":"\u00C1","aacute":"\u00E1","Abreve":"\u0102","abreve":"\u0103","ac":"\u223E","acd":"\u223F","acE":"\u223E\u0333","Acirc":"\u00C2","acirc":"\u00E2","acute":"\u00B4","Acy":"\u0410","acy":"\u0430","AElig":"\u00C6","aelig":"\u00E6","af":"\u2061","Afr":"\uD835\uDD04","afr":"\uD835\uDD1E","Agrave":"\u00C0","agrave":"\u00E0","alefsym":"\u2135","aleph":"\u2135","Alpha":"\u0391","alpha":"\u03B1","Amacr":"\u0100","amacr":"\u0101","amalg":"\u2A3F","amp":"&","AMP":"&","andand":"\u2A55","And":"\u2A53","and":"\u2227","andd":"\u2A5C","andslope":"\u2A58","andv":"\u2A5A","ang":"\u2220","ange":"\u29A4","angle":"\u2220","angmsdaa":"\u29A8","angmsdab":"\u29A9","angmsdac":"\u29AA","angmsdad":"\u29AB","angmsdae":"\u29AC","angmsdaf":"\u29AD","angmsdag":"\u29AE","angmsdah":"\u29AF","angmsd":"\u2221","angrt":"\u221F","angrtvb":"\u22BE","angrtvbd":"\u299D","angsph":"\u2222","angst":"\u00C5","angzarr":"\u237C","Aogon":"\u0104","aogon":"\u0105","Aopf":"\uD835\uDD38","aopf":"\uD835\uDD52","apacir":"\u2A6F","ap":"\u2248","apE":"\u2A70","ape":"\u224A","apid":"\u224B","apos":"'","ApplyFunction":"\u2061","approx":"\u2248","approxeq":"\u224A","Aring":"\u00C5","aring":"\u00E5","Ascr":"\uD835\uDC9C","ascr":"\uD835\uDCB6","Assign":"\u2254","ast":"*","asymp":"\u2248","asympeq":"\u224D","Atilde":"\u00C3","atilde":"\u00E3","Auml":"\u00C4","auml":"\u00E4","awconint":"\u2233","awint":"\u2A11","backcong":"\u224C","backepsilon":"\u03F6","backprime":"\u2035","backsim":"\u223D","backsimeq":"\u22CD","Backslash":"\u2216","Barv":"\u2AE7","barvee":"\u22BD","barwed":"\u2305","Barwed":"\u2306","barwedge":"\u2305","bbrk":"\u23B5","bbrktbrk":"\u23B6","bcong":"\u224C","Bcy":"\u0411","bcy":"\u0431","bdquo":"\u201E","becaus":"\u2235","because":"\u2235","Because":"\u2235","bemptyv":"\u29B0","bepsi":"\u03F6","bernou":"\u212C","Bernoullis":"\u212C","Beta":"\u0392","beta":"\u03B2","beth":"\u2136","between":"\u226C","Bfr":"\uD835\uDD05","bfr":"\uD835\uDD1F","bigcap":"\u22C2","bigcirc":"\u25EF","bigcup":"\u22C3","bigodot":"\u2A00","bigoplus":"\u2A01","bigotimes":"\u2A02","bigsqcup":"\u2A06","bigstar":"\u2605","bigtriangledown":"\u25BD","bigtriangleup":"\u25B3","biguplus":"\u2A04","bigvee":"\u22C1","bigwedge":"\u22C0","bkarow":"\u290D","blacklozenge":"\u29EB","blacksquare":"\u25AA","blacktriangle":"\u25B4","blacktriangledown":"\u25BE","blacktriangleleft":"\u25C2","blacktriangleright":"\u25B8","blank":"\u2423","blk12":"\u2592","blk14":"\u2591","blk34":"\u2593","block":"\u2588","bne":"=\u20E5","bnequiv":"\u2261\u20E5","bNot":"\u2AED","bnot":"\u2310","Bopf":"\uD835\uDD39","bopf":"\uD835\uDD53","bot":"\u22A5","bottom":"\u22A5","bowtie":"\u22C8","boxbox":"\u29C9","boxdl":"\u2510","boxdL":"\u2555","boxDl":"\u2556","boxDL":"\u2557","boxdr":"\u250C","boxdR":"\u2552","boxDr":"\u2553","boxDR":"\u2554","boxh":"\u2500","boxH":"\u2550","boxhd":"\u252C","boxHd":"\u2564","boxhD":"\u2565","boxHD":"\u2566","boxhu":"\u2534","boxHu":"\u2567","boxhU":"\u2568","boxHU":"\u2569","boxminus":"\u229F","boxplus":"\u229E","boxtimes":"\u22A0","boxul":"\u2518","boxuL":"\u255B","boxUl":"\u255C","boxUL":"\u255D","boxur":"\u2514","boxuR":"\u2558","boxUr":"\u2559","boxUR":"\u255A","boxv":"\u2502","boxV":"\u2551","boxvh":"\u253C","boxvH":"\u256A","boxVh":"\u256B","boxVH":"\u256C","boxvl":"\u2524","boxvL":"\u2561","boxVl":"\u2562","boxVL":"\u2563","boxvr":"\u251C","boxvR":"\u255E","boxVr":"\u255F","boxVR":"\u2560","bprime":"\u2035","breve":"\u02D8","Breve":"\u02D8","brvbar":"\u00A6","bscr":"\uD835\uDCB7","Bscr":"\u212C","bsemi":"\u204F","bsim":"\u223D","bsime":"\u22CD","bsolb":"\u29C5","bsol":"\\","bsolhsub":"\u27C8","bull":"\u2022","bullet":"\u2022","bump":"\u224E","bumpE":"\u2AAE","bumpe":"\u224F","Bumpeq":"\u224E","bumpeq":"\u224F","Cacute":"\u0106","cacute":"\u0107","capand":"\u2A44","capbrcup":"\u2A49","capcap":"\u2A4B","cap":"\u2229","Cap":"\u22D2","capcup":"\u2A47","capdot":"\u2A40","CapitalDifferentialD":"\u2145","caps":"\u2229\uFE00","caret":"\u2041","caron":"\u02C7","Cayleys":"\u212D","ccaps":"\u2A4D","Ccaron":"\u010C","ccaron":"\u010D","Ccedil":"\u00C7","ccedil":"\u00E7","Ccirc":"\u0108","ccirc":"\u0109","Cconint":"\u2230","ccups":"\u2A4C","ccupssm":"\u2A50","Cdot":"\u010A","cdot":"\u010B","cedil":"\u00B8","Cedilla":"\u00B8","cemptyv":"\u29B2","cent":"\u00A2","centerdot":"\u00B7","CenterDot":"\u00B7","cfr":"\uD835\uDD20","Cfr":"\u212D","CHcy":"\u0427","chcy":"\u0447","check":"\u2713","checkmark":"\u2713","Chi":"\u03A7","chi":"\u03C7","circ":"\u02C6","circeq":"\u2257","circlearrowleft":"\u21BA","circlearrowright":"\u21BB","circledast":"\u229B","circledcirc":"\u229A","circleddash":"\u229D","CircleDot":"\u2299","circledR":"\u00AE","circledS":"\u24C8","CircleMinus":"\u2296","CirclePlus":"\u2295","CircleTimes":"\u2297","cir":"\u25CB","cirE":"\u29C3","cire":"\u2257","cirfnint":"\u2A10","cirmid":"\u2AEF","cirscir":"\u29C2","ClockwiseContourIntegral":"\u2232","CloseCurlyDoubleQuote":"\u201D","CloseCurlyQuote":"\u2019","clubs":"\u2663","clubsuit":"\u2663","colon":":","Colon":"\u2237","Colone":"\u2A74","colone":"\u2254","coloneq":"\u2254","comma":",","commat":"@","comp":"\u2201","compfn":"\u2218","complement":"\u2201","complexes":"\u2102","cong":"\u2245","congdot":"\u2A6D","Congruent":"\u2261","conint":"\u222E","Conint":"\u222F","ContourIntegral":"\u222E","copf":"\uD835\uDD54","Copf":"\u2102","coprod":"\u2210","Coproduct":"\u2210","copy":"\u00A9","COPY":"\u00A9","copysr":"\u2117","CounterClockwiseContourIntegral":"\u2233","crarr":"\u21B5","cross":"\u2717","Cross":"\u2A2F","Cscr":"\uD835\uDC9E","cscr":"\uD835\uDCB8","csub":"\u2ACF","csube":"\u2AD1","csup":"\u2AD0","csupe":"\u2AD2","ctdot":"\u22EF","cudarrl":"\u2938","cudarrr":"\u2935","cuepr":"\u22DE","cuesc":"\u22DF","cularr":"\u21B6","cularrp":"\u293D","cupbrcap":"\u2A48","cupcap":"\u2A46","CupCap":"\u224D","cup":"\u222A","Cup":"\u22D3","cupcup":"\u2A4A","cupdot":"\u228D","cupor":"\u2A45","cups":"\u222A\uFE00","curarr":"\u21B7","curarrm":"\u293C","curlyeqprec":"\u22DE","curlyeqsucc":"\u22DF","curlyvee":"\u22CE","curlywedge":"\u22CF","curren":"\u00A4","curvearrowleft":"\u21B6","curvearrowright":"\u21B7","cuvee":"\u22CE","cuwed":"\u22CF","cwconint":"\u2232","cwint":"\u2231","cylcty":"\u232D","dagger":"\u2020","Dagger":"\u2021","daleth":"\u2138","darr":"\u2193","Darr":"\u21A1","dArr":"\u21D3","dash":"\u2010","Dashv":"\u2AE4","dashv":"\u22A3","dbkarow":"\u290F","dblac":"\u02DD","Dcaron":"\u010E","dcaron":"\u010F","Dcy":"\u0414","dcy":"\u0434","ddagger":"\u2021","ddarr":"\u21CA","DD":"\u2145","dd":"\u2146","DDotrahd":"\u2911","ddotseq":"\u2A77","deg":"\u00B0","Del":"\u2207","Delta":"\u0394","delta":"\u03B4","demptyv":"\u29B1","dfisht":"\u297F","Dfr":"\uD835\uDD07","dfr":"\uD835\uDD21","dHar":"\u2965","dharl":"\u21C3","dharr":"\u21C2","DiacriticalAcute":"\u00B4","DiacriticalDot":"\u02D9","DiacriticalDoubleAcute":"\u02DD","DiacriticalGrave":"`","DiacriticalTilde":"\u02DC","diam":"\u22C4","diamond":"\u22C4","Diamond":"\u22C4","diamondsuit":"\u2666","diams":"\u2666","die":"\u00A8","DifferentialD":"\u2146","digamma":"\u03DD","disin":"\u22F2","div":"\u00F7","divide":"\u00F7","divideontimes":"\u22C7","divonx":"\u22C7","DJcy":"\u0402","djcy":"\u0452","dlcorn":"\u231E","dlcrop":"\u230D","dollar":"$","Dopf":"\uD835\uDD3B","dopf":"\uD835\uDD55","Dot":"\u00A8","dot":"\u02D9","DotDot":"\u20DC","doteq":"\u2250","doteqdot":"\u2251","DotEqual":"\u2250","dotminus":"\u2238","dotplus":"\u2214","dotsquare":"\u22A1","doublebarwedge":"\u2306","DoubleContourIntegral":"\u222F","DoubleDot":"\u00A8","DoubleDownArrow":"\u21D3","DoubleLeftArrow":"\u21D0","DoubleLeftRightArrow":"\u21D4","DoubleLeftTee":"\u2AE4","DoubleLongLeftArrow":"\u27F8","DoubleLongLeftRightArrow":"\u27FA","DoubleLongRightArrow":"\u27F9","DoubleRightArrow":"\u21D2","DoubleRightTee":"\u22A8","DoubleUpArrow":"\u21D1","DoubleUpDownArrow":"\u21D5","DoubleVerticalBar":"\u2225","DownArrowBar":"\u2913","downarrow":"\u2193","DownArrow":"\u2193","Downarrow":"\u21D3","DownArrowUpArrow":"\u21F5","DownBreve":"\u0311","downdownarrows":"\u21CA","downharpoonleft":"\u21C3","downharpoonright":"\u21C2","DownLeftRightVector":"\u2950","DownLeftTeeVector":"\u295E","DownLeftVectorBar":"\u2956","DownLeftVector":"\u21BD","DownRightTeeVector":"\u295F","DownRightVectorBar":"\u2957","DownRightVector":"\u21C1","DownTeeArrow":"\u21A7","DownTee":"\u22A4","drbkarow":"\u2910","drcorn":"\u231F","drcrop":"\u230C","Dscr":"\uD835\uDC9F","dscr":"\uD835\uDCB9","DScy":"\u0405","dscy":"\u0455","dsol":"\u29F6","Dstrok":"\u0110","dstrok":"\u0111","dtdot":"\u22F1","dtri":"\u25BF","dtrif":"\u25BE","duarr":"\u21F5","duhar":"\u296F","dwangle":"\u29A6","DZcy":"\u040F","dzcy":"\u045F","dzigrarr":"\u27FF","Eacute":"\u00C9","eacute":"\u00E9","easter":"\u2A6E","Ecaron":"\u011A","ecaron":"\u011B","Ecirc":"\u00CA","ecirc":"\u00EA","ecir":"\u2256","ecolon":"\u2255","Ecy":"\u042D","ecy":"\u044D","eDDot":"\u2A77","Edot":"\u0116","edot":"\u0117","eDot":"\u2251","ee":"\u2147","efDot":"\u2252","Efr":"\uD835\uDD08","efr":"\uD835\uDD22","eg":"\u2A9A","Egrave":"\u00C8","egrave":"\u00E8","egs":"\u2A96","egsdot":"\u2A98","el":"\u2A99","Element":"\u2208","elinters":"\u23E7","ell":"\u2113","els":"\u2A95","elsdot":"\u2A97","Emacr":"\u0112","emacr":"\u0113","empty":"\u2205","emptyset":"\u2205","EmptySmallSquare":"\u25FB","emptyv":"\u2205","EmptyVerySmallSquare":"\u25AB","emsp13":"\u2004","emsp14":"\u2005","emsp":"\u2003","ENG":"\u014A","eng":"\u014B","ensp":"\u2002","Eogon":"\u0118","eogon":"\u0119","Eopf":"\uD835\uDD3C","eopf":"\uD835\uDD56","epar":"\u22D5","eparsl":"\u29E3","eplus":"\u2A71","epsi":"\u03B5","Epsilon":"\u0395","epsilon":"\u03B5","epsiv":"\u03F5","eqcirc":"\u2256","eqcolon":"\u2255","eqsim":"\u2242","eqslantgtr":"\u2A96","eqslantless":"\u2A95","Equal":"\u2A75","equals":"=","EqualTilde":"\u2242","equest":"\u225F","Equilibrium":"\u21CC","equiv":"\u2261","equivDD":"\u2A78","eqvparsl":"\u29E5","erarr":"\u2971","erDot":"\u2253","escr":"\u212F","Escr":"\u2130","esdot":"\u2250","Esim":"\u2A73","esim":"\u2242","Eta":"\u0397","eta":"\u03B7","ETH":"\u00D0","eth":"\u00F0","Euml":"\u00CB","euml":"\u00EB","euro":"\u20AC","excl":"!","exist":"\u2203","Exists":"\u2203","expectation":"\u2130","exponentiale":"\u2147","ExponentialE":"\u2147","fallingdotseq":"\u2252","Fcy":"\u0424","fcy":"\u0444","female":"\u2640","ffilig":"\uFB03","fflig":"\uFB00","ffllig":"\uFB04","Ffr":"\uD835\uDD09","ffr":"\uD835\uDD23","filig":"\uFB01","FilledSmallSquare":"\u25FC","FilledVerySmallSquare":"\u25AA","fjlig":"fj","flat":"\u266D","fllig":"\uFB02","fltns":"\u25B1","fnof":"\u0192","Fopf":"\uD835\uDD3D","fopf":"\uD835\uDD57","forall":"\u2200","ForAll":"\u2200","fork":"\u22D4","forkv":"\u2AD9","Fouriertrf":"\u2131","fpartint":"\u2A0D","frac12":"\u00BD","frac13":"\u2153","frac14":"\u00BC","frac15":"\u2155","frac16":"\u2159","frac18":"\u215B","frac23":"\u2154","frac25":"\u2156","frac34":"\u00BE","frac35":"\u2157","frac38":"\u215C","frac45":"\u2158","frac56":"\u215A","frac58":"\u215D","frac78":"\u215E","frasl":"\u2044","frown":"\u2322","fscr":"\uD835\uDCBB","Fscr":"\u2131","gacute":"\u01F5","Gamma":"\u0393","gamma":"\u03B3","Gammad":"\u03DC","gammad":"\u03DD","gap":"\u2A86","Gbreve":"\u011E","gbreve":"\u011F","Gcedil":"\u0122","Gcirc":"\u011C","gcirc":"\u011D","Gcy":"\u0413","gcy":"\u0433","Gdot":"\u0120","gdot":"\u0121","ge":"\u2265","gE":"\u2267","gEl":"\u2A8C","gel":"\u22DB","geq":"\u2265","geqq":"\u2267","geqslant":"\u2A7E","gescc":"\u2AA9","ges":"\u2A7E","gesdot":"\u2A80","gesdoto":"\u2A82","gesdotol":"\u2A84","gesl":"\u22DB\uFE00","gesles":"\u2A94","Gfr":"\uD835\uDD0A","gfr":"\uD835\uDD24","gg":"\u226B","Gg":"\u22D9","ggg":"\u22D9","gimel":"\u2137","GJcy":"\u0403","gjcy":"\u0453","gla":"\u2AA5","gl":"\u2277","glE":"\u2A92","glj":"\u2AA4","gnap":"\u2A8A","gnapprox":"\u2A8A","gne":"\u2A88","gnE":"\u2269","gneq":"\u2A88","gneqq":"\u2269","gnsim":"\u22E7","Gopf":"\uD835\uDD3E","gopf":"\uD835\uDD58","grave":"`","GreaterEqual":"\u2265","GreaterEqualLess":"\u22DB","GreaterFullEqual":"\u2267","GreaterGreater":"\u2AA2","GreaterLess":"\u2277","GreaterSlantEqual":"\u2A7E","GreaterTilde":"\u2273","Gscr":"\uD835\uDCA2","gscr":"\u210A","gsim":"\u2273","gsime":"\u2A8E","gsiml":"\u2A90","gtcc":"\u2AA7","gtcir":"\u2A7A","gt":">","GT":">","Gt":"\u226B","gtdot":"\u22D7","gtlPar":"\u2995","gtquest":"\u2A7C","gtrapprox":"\u2A86","gtrarr":"\u2978","gtrdot":"\u22D7","gtreqless":"\u22DB","gtreqqless":"\u2A8C","gtrless":"\u2277","gtrsim":"\u2273","gvertneqq":"\u2269\uFE00","gvnE":"\u2269\uFE00","Hacek":"\u02C7","hairsp":"\u200A","half":"\u00BD","hamilt":"\u210B","HARDcy":"\u042A","hardcy":"\u044A","harrcir":"\u2948","harr":"\u2194","hArr":"\u21D4","harrw":"\u21AD","Hat":"^","hbar":"\u210F","Hcirc":"\u0124","hcirc":"\u0125","hearts":"\u2665","heartsuit":"\u2665","hellip":"\u2026","hercon":"\u22B9","hfr":"\uD835\uDD25","Hfr":"\u210C","HilbertSpace":"\u210B","hksearow":"\u2925","hkswarow":"\u2926","hoarr":"\u21FF","homtht":"\u223B","hookleftarrow":"\u21A9","hookrightarrow":"\u21AA","hopf":"\uD835\uDD59","Hopf":"\u210D","horbar":"\u2015","HorizontalLine":"\u2500","hscr":"\uD835\uDCBD","Hscr":"\u210B","hslash":"\u210F","Hstrok":"\u0126","hstrok":"\u0127","HumpDownHump":"\u224E","HumpEqual":"\u224F","hybull":"\u2043","hyphen":"\u2010","Iacute":"\u00CD","iacute":"\u00ED","ic":"\u2063","Icirc":"\u00CE","icirc":"\u00EE","Icy":"\u0418","icy":"\u0438","Idot":"\u0130","IEcy":"\u0415","iecy":"\u0435","iexcl":"\u00A1","iff":"\u21D4","ifr":"\uD835\uDD26","Ifr":"\u2111","Igrave":"\u00CC","igrave":"\u00EC","ii":"\u2148","iiiint":"\u2A0C","iiint":"\u222D","iinfin":"\u29DC","iiota":"\u2129","IJlig":"\u0132","ijlig":"\u0133","Imacr":"\u012A","imacr":"\u012B","image":"\u2111","ImaginaryI":"\u2148","imagline":"\u2110","imagpart":"\u2111","imath":"\u0131","Im":"\u2111","imof":"\u22B7","imped":"\u01B5","Implies":"\u21D2","incare":"\u2105","in":"\u2208","infin":"\u221E","infintie":"\u29DD","inodot":"\u0131","intcal":"\u22BA","int":"\u222B","Int":"\u222C","integers":"\u2124","Integral":"\u222B","intercal":"\u22BA","Intersection":"\u22C2","intlarhk":"\u2A17","intprod":"\u2A3C","InvisibleComma":"\u2063","InvisibleTimes":"\u2062","IOcy":"\u0401","iocy":"\u0451","Iogon":"\u012E","iogon":"\u012F","Iopf":"\uD835\uDD40","iopf":"\uD835\uDD5A","Iota":"\u0399","iota":"\u03B9","iprod":"\u2A3C","iquest":"\u00BF","iscr":"\uD835\uDCBE","Iscr":"\u2110","isin":"\u2208","isindot":"\u22F5","isinE":"\u22F9","isins":"\u22F4","isinsv":"\u22F3","isinv":"\u2208","it":"\u2062","Itilde":"\u0128","itilde":"\u0129","Iukcy":"\u0406","iukcy":"\u0456","Iuml":"\u00CF","iuml":"\u00EF","Jcirc":"\u0134","jcirc":"\u0135","Jcy":"\u0419","jcy":"\u0439","Jfr":"\uD835\uDD0D","jfr":"\uD835\uDD27","jmath":"\u0237","Jopf":"\uD835\uDD41","jopf":"\uD835\uDD5B","Jscr":"\uD835\uDCA5","jscr":"\uD835\uDCBF","Jsercy":"\u0408","jsercy":"\u0458","Jukcy":"\u0404","jukcy":"\u0454","Kappa":"\u039A","kappa":"\u03BA","kappav":"\u03F0","Kcedil":"\u0136","kcedil":"\u0137","Kcy":"\u041A","kcy":"\u043A","Kfr":"\uD835\uDD0E","kfr":"\uD835\uDD28","kgreen":"\u0138","KHcy":"\u0425","khcy":"\u0445","KJcy":"\u040C","kjcy":"\u045C","Kopf":"\uD835\uDD42","kopf":"\uD835\uDD5C","Kscr":"\uD835\uDCA6","kscr":"\uD835\uDCC0","lAarr":"\u21DA","Lacute":"\u0139","lacute":"\u013A","laemptyv":"\u29B4","lagran":"\u2112","Lambda":"\u039B","lambda":"\u03BB","lang":"\u27E8","Lang":"\u27EA","langd":"\u2991","langle":"\u27E8","lap":"\u2A85","Laplacetrf":"\u2112","laquo":"\u00AB","larrb":"\u21E4","larrbfs":"\u291F","larr":"\u2190","Larr":"\u219E","lArr":"\u21D0","larrfs":"\u291D","larrhk":"\u21A9","larrlp":"\u21AB","larrpl":"\u2939","larrsim":"\u2973","larrtl":"\u21A2","latail":"\u2919","lAtail":"\u291B","lat":"\u2AAB","late":"\u2AAD","lates":"\u2AAD\uFE00","lbarr":"\u290C","lBarr":"\u290E","lbbrk":"\u2772","lbrace":"{","lbrack":"[","lbrke":"\u298B","lbrksld":"\u298F","lbrkslu":"\u298D","Lcaron":"\u013D","lcaron":"\u013E","Lcedil":"\u013B","lcedil":"\u013C","lceil":"\u2308","lcub":"{","Lcy":"\u041B","lcy":"\u043B","ldca":"\u2936","ldquo":"\u201C","ldquor":"\u201E","ldrdhar":"\u2967","ldrushar":"\u294B","ldsh":"\u21B2","le":"\u2264","lE":"\u2266","LeftAngleBracket":"\u27E8","LeftArrowBar":"\u21E4","leftarrow":"\u2190","LeftArrow":"\u2190","Leftarrow":"\u21D0","LeftArrowRightArrow":"\u21C6","leftarrowtail":"\u21A2","LeftCeiling":"\u2308","LeftDoubleBracket":"\u27E6","LeftDownTeeVector":"\u2961","LeftDownVectorBar":"\u2959","LeftDownVector":"\u21C3","LeftFloor":"\u230A","leftharpoondown":"\u21BD","leftharpoonup":"\u21BC","leftleftarrows":"\u21C7","leftrightarrow":"\u2194","LeftRightArrow":"\u2194","Leftrightarrow":"\u21D4","leftrightarrows":"\u21C6","leftrightharpoons":"\u21CB","leftrightsquigarrow":"\u21AD","LeftRightVector":"\u294E","LeftTeeArrow":"\u21A4","LeftTee":"\u22A3","LeftTeeVector":"\u295A","leftthreetimes":"\u22CB","LeftTriangleBar":"\u29CF","LeftTriangle":"\u22B2","LeftTriangleEqual":"\u22B4","LeftUpDownVector":"\u2951","LeftUpTeeVector":"\u2960","LeftUpVectorBar":"\u2958","LeftUpVector":"\u21BF","LeftVectorBar":"\u2952","LeftVector":"\u21BC","lEg":"\u2A8B","leg":"\u22DA","leq":"\u2264","leqq":"\u2266","leqslant":"\u2A7D","lescc":"\u2AA8","les":"\u2A7D","lesdot":"\u2A7F","lesdoto":"\u2A81","lesdotor":"\u2A83","lesg":"\u22DA\uFE00","lesges":"\u2A93","lessapprox":"\u2A85","lessdot":"\u22D6","lesseqgtr":"\u22DA","lesseqqgtr":"\u2A8B","LessEqualGreater":"\u22DA","LessFullEqual":"\u2266","LessGreater":"\u2276","lessgtr":"\u2276","LessLess":"\u2AA1","lesssim":"\u2272","LessSlantEqual":"\u2A7D","LessTilde":"\u2272","lfisht":"\u297C","lfloor":"\u230A","Lfr":"\uD835\uDD0F","lfr":"\uD835\uDD29","lg":"\u2276","lgE":"\u2A91","lHar":"\u2962","lhard":"\u21BD","lharu":"\u21BC","lharul":"\u296A","lhblk":"\u2584","LJcy":"\u0409","ljcy":"\u0459","llarr":"\u21C7","ll":"\u226A","Ll":"\u22D8","llcorner":"\u231E","Lleftarrow":"\u21DA","llhard":"\u296B","lltri":"\u25FA","Lmidot":"\u013F","lmidot":"\u0140","lmoustache":"\u23B0","lmoust":"\u23B0","lnap":"\u2A89","lnapprox":"\u2A89","lne":"\u2A87","lnE":"\u2268","lneq":"\u2A87","lneqq":"\u2268","lnsim":"\u22E6","loang":"\u27EC","loarr":"\u21FD","lobrk":"\u27E6","longleftarrow":"\u27F5","LongLeftArrow":"\u27F5","Longleftarrow":"\u27F8","longleftrightarrow":"\u27F7","LongLeftRightArrow":"\u27F7","Longleftrightarrow":"\u27FA","longmapsto":"\u27FC","longrightarrow":"\u27F6","LongRightArrow":"\u27F6","Longrightarrow":"\u27F9","looparrowleft":"\u21AB","looparrowright":"\u21AC","lopar":"\u2985","Lopf":"\uD835\uDD43","lopf":"\uD835\uDD5D","loplus":"\u2A2D","lotimes":"\u2A34","lowast":"\u2217","lowbar":"_","LowerLeftArrow":"\u2199","LowerRightArrow":"\u2198","loz":"\u25CA","lozenge":"\u25CA","lozf":"\u29EB","lpar":"(","lparlt":"\u2993","lrarr":"\u21C6","lrcorner":"\u231F","lrhar":"\u21CB","lrhard":"\u296D","lrm":"\u200E","lrtri":"\u22BF","lsaquo":"\u2039","lscr":"\uD835\uDCC1","Lscr":"\u2112","lsh":"\u21B0","Lsh":"\u21B0","lsim":"\u2272","lsime":"\u2A8D","lsimg":"\u2A8F","lsqb":"[","lsquo":"\u2018","lsquor":"\u201A","Lstrok":"\u0141","lstrok":"\u0142","ltcc":"\u2AA6","ltcir":"\u2A79","lt":"<","LT":"<","Lt":"\u226A","ltdot":"\u22D6","lthree":"\u22CB","ltimes":"\u22C9","ltlarr":"\u2976","ltquest":"\u2A7B","ltri":"\u25C3","ltrie":"\u22B4","ltrif":"\u25C2","ltrPar":"\u2996","lurdshar":"\u294A","luruhar":"\u2966","lvertneqq":"\u2268\uFE00","lvnE":"\u2268\uFE00","macr":"\u00AF","male":"\u2642","malt":"\u2720","maltese":"\u2720","Map":"\u2905","map":"\u21A6","mapsto":"\u21A6","mapstodown":"\u21A7","mapstoleft":"\u21A4","mapstoup":"\u21A5","marker":"\u25AE","mcomma":"\u2A29","Mcy":"\u041C","mcy":"\u043C","mdash":"\u2014","mDDot":"\u223A","measuredangle":"\u2221","MediumSpace":"\u205F","Mellintrf":"\u2133","Mfr":"\uD835\uDD10","mfr":"\uD835\uDD2A","mho":"\u2127","micro":"\u00B5","midast":"*","midcir":"\u2AF0","mid":"\u2223","middot":"\u00B7","minusb":"\u229F","minus":"\u2212","minusd":"\u2238","minusdu":"\u2A2A","MinusPlus":"\u2213","mlcp":"\u2ADB","mldr":"\u2026","mnplus":"\u2213","models":"\u22A7","Mopf":"\uD835\uDD44","mopf":"\uD835\uDD5E","mp":"\u2213","mscr":"\uD835\uDCC2","Mscr":"\u2133","mstpos":"\u223E","Mu":"\u039C","mu":"\u03BC","multimap":"\u22B8","mumap":"\u22B8","nabla":"\u2207","Nacute":"\u0143","nacute":"\u0144","nang":"\u2220\u20D2","nap":"\u2249","napE":"\u2A70\u0338","napid":"\u224B\u0338","napos":"\u0149","napprox":"\u2249","natural":"\u266E","naturals":"\u2115","natur":"\u266E","nbsp":"\u00A0","nbump":"\u224E\u0338","nbumpe":"\u224F\u0338","ncap":"\u2A43","Ncaron":"\u0147","ncaron":"\u0148","Ncedil":"\u0145","ncedil":"\u0146","ncong":"\u2247","ncongdot":"\u2A6D\u0338","ncup":"\u2A42","Ncy":"\u041D","ncy":"\u043D","ndash":"\u2013","nearhk":"\u2924","nearr":"\u2197","neArr":"\u21D7","nearrow":"\u2197","ne":"\u2260","nedot":"\u2250\u0338","NegativeMediumSpace":"\u200B","NegativeThickSpace":"\u200B","NegativeThinSpace":"\u200B","NegativeVeryThinSpace":"\u200B","nequiv":"\u2262","nesear":"\u2928","nesim":"\u2242\u0338","NestedGreaterGreater":"\u226B","NestedLessLess":"\u226A","NewLine":"\n","nexist":"\u2204","nexists":"\u2204","Nfr":"\uD835\uDD11","nfr":"\uD835\uDD2B","ngE":"\u2267\u0338","nge":"\u2271","ngeq":"\u2271","ngeqq":"\u2267\u0338","ngeqslant":"\u2A7E\u0338","nges":"\u2A7E\u0338","nGg":"\u22D9\u0338","ngsim":"\u2275","nGt":"\u226B\u20D2","ngt":"\u226F","ngtr":"\u226F","nGtv":"\u226B\u0338","nharr":"\u21AE","nhArr":"\u21CE","nhpar":"\u2AF2","ni":"\u220B","nis":"\u22FC","nisd":"\u22FA","niv":"\u220B","NJcy":"\u040A","njcy":"\u045A","nlarr":"\u219A","nlArr":"\u21CD","nldr":"\u2025","nlE":"\u2266\u0338","nle":"\u2270","nleftarrow":"\u219A","nLeftarrow":"\u21CD","nleftrightarrow":"\u21AE","nLeftrightarrow":"\u21CE","nleq":"\u2270","nleqq":"\u2266\u0338","nleqslant":"\u2A7D\u0338","nles":"\u2A7D\u0338","nless":"\u226E","nLl":"\u22D8\u0338","nlsim":"\u2274","nLt":"\u226A\u20D2","nlt":"\u226E","nltri":"\u22EA","nltrie":"\u22EC","nLtv":"\u226A\u0338","nmid":"\u2224","NoBreak":"\u2060","NonBreakingSpace":"\u00A0","nopf":"\uD835\uDD5F","Nopf":"\u2115","Not":"\u2AEC","not":"\u00AC","NotCongruent":"\u2262","NotCupCap":"\u226D","NotDoubleVerticalBar":"\u2226","NotElement":"\u2209","NotEqual":"\u2260","NotEqualTilde":"\u2242\u0338","NotExists":"\u2204","NotGreater":"\u226F","NotGreaterEqual":"\u2271","NotGreaterFullEqual":"\u2267\u0338","NotGreaterGreater":"\u226B\u0338","NotGreaterLess":"\u2279","NotGreaterSlantEqual":"\u2A7E\u0338","NotGreaterTilde":"\u2275","NotHumpDownHump":"\u224E\u0338","NotHumpEqual":"\u224F\u0338","notin":"\u2209","notindot":"\u22F5\u0338","notinE":"\u22F9\u0338","notinva":"\u2209","notinvb":"\u22F7","notinvc":"\u22F6","NotLeftTriangleBar":"\u29CF\u0338","NotLeftTriangle":"\u22EA","NotLeftTriangleEqual":"\u22EC","NotLess":"\u226E","NotLessEqual":"\u2270","NotLessGreater":"\u2278","NotLessLess":"\u226A\u0338","NotLessSlantEqual":"\u2A7D\u0338","NotLessTilde":"\u2274","NotNestedGreaterGreater":"\u2AA2\u0338","NotNestedLessLess":"\u2AA1\u0338","notni":"\u220C","notniva":"\u220C","notnivb":"\u22FE","notnivc":"\u22FD","NotPrecedes":"\u2280","NotPrecedesEqual":"\u2AAF\u0338","NotPrecedesSlantEqual":"\u22E0","NotReverseElement":"\u220C","NotRightTriangleBar":"\u29D0\u0338","NotRightTriangle":"\u22EB","NotRightTriangleEqual":"\u22ED","NotSquareSubset":"\u228F\u0338","NotSquareSubsetEqual":"\u22E2","NotSquareSuperset":"\u2290\u0338","NotSquareSupersetEqual":"\u22E3","NotSubset":"\u2282\u20D2","NotSubsetEqual":"\u2288","NotSucceeds":"\u2281","NotSucceedsEqual":"\u2AB0\u0338","NotSucceedsSlantEqual":"\u22E1","NotSucceedsTilde":"\u227F\u0338","NotSuperset":"\u2283\u20D2","NotSupersetEqual":"\u2289","NotTilde":"\u2241","NotTildeEqual":"\u2244","NotTildeFullEqual":"\u2247","NotTildeTilde":"\u2249","NotVerticalBar":"\u2224","nparallel":"\u2226","npar":"\u2226","nparsl":"\u2AFD\u20E5","npart":"\u2202\u0338","npolint":"\u2A14","npr":"\u2280","nprcue":"\u22E0","nprec":"\u2280","npreceq":"\u2AAF\u0338","npre":"\u2AAF\u0338","nrarrc":"\u2933\u0338","nrarr":"\u219B","nrArr":"\u21CF","nrarrw":"\u219D\u0338","nrightarrow":"\u219B","nRightarrow":"\u21CF","nrtri":"\u22EB","nrtrie":"\u22ED","nsc":"\u2281","nsccue":"\u22E1","nsce":"\u2AB0\u0338","Nscr":"\uD835\uDCA9","nscr":"\uD835\uDCC3","nshortmid":"\u2224","nshortparallel":"\u2226","nsim":"\u2241","nsime":"\u2244","nsimeq":"\u2244","nsmid":"\u2224","nspar":"\u2226","nsqsube":"\u22E2","nsqsupe":"\u22E3","nsub":"\u2284","nsubE":"\u2AC5\u0338","nsube":"\u2288","nsubset":"\u2282\u20D2","nsubseteq":"\u2288","nsubseteqq":"\u2AC5\u0338","nsucc":"\u2281","nsucceq":"\u2AB0\u0338","nsup":"\u2285","nsupE":"\u2AC6\u0338","nsupe":"\u2289","nsupset":"\u2283\u20D2","nsupseteq":"\u2289","nsupseteqq":"\u2AC6\u0338","ntgl":"\u2279","Ntilde":"\u00D1","ntilde":"\u00F1","ntlg":"\u2278","ntriangleleft":"\u22EA","ntrianglelefteq":"\u22EC","ntriangleright":"\u22EB","ntrianglerighteq":"\u22ED","Nu":"\u039D","nu":"\u03BD","num":"#","numero":"\u2116","numsp":"\u2007","nvap":"\u224D\u20D2","nvdash":"\u22AC","nvDash":"\u22AD","nVdash":"\u22AE","nVDash":"\u22AF","nvge":"\u2265\u20D2","nvgt":">\u20D2","nvHarr":"\u2904","nvinfin":"\u29DE","nvlArr":"\u2902","nvle":"\u2264\u20D2","nvlt":"<\u20D2","nvltrie":"\u22B4\u20D2","nvrArr":"\u2903","nvrtrie":"\u22B5\u20D2","nvsim":"\u223C\u20D2","nwarhk":"\u2923","nwarr":"\u2196","nwArr":"\u21D6","nwarrow":"\u2196","nwnear":"\u2927","Oacute":"\u00D3","oacute":"\u00F3","oast":"\u229B","Ocirc":"\u00D4","ocirc":"\u00F4","ocir":"\u229A","Ocy":"\u041E","ocy":"\u043E","odash":"\u229D","Odblac":"\u0150","odblac":"\u0151","odiv":"\u2A38","odot":"\u2299","odsold":"\u29BC","OElig":"\u0152","oelig":"\u0153","ofcir":"\u29BF","Ofr":"\uD835\uDD12","ofr":"\uD835\uDD2C","ogon":"\u02DB","Ograve":"\u00D2","ograve":"\u00F2","ogt":"\u29C1","ohbar":"\u29B5","ohm":"\u03A9","oint":"\u222E","olarr":"\u21BA","olcir":"\u29BE","olcross":"\u29BB","oline":"\u203E","olt":"\u29C0","Omacr":"\u014C","omacr":"\u014D","Omega":"\u03A9","omega":"\u03C9","Omicron":"\u039F","omicron":"\u03BF","omid":"\u29B6","ominus":"\u2296","Oopf":"\uD835\uDD46","oopf":"\uD835\uDD60","opar":"\u29B7","OpenCurlyDoubleQuote":"\u201C","OpenCurlyQuote":"\u2018","operp":"\u29B9","oplus":"\u2295","orarr":"\u21BB","Or":"\u2A54","or":"\u2228","ord":"\u2A5D","order":"\u2134","orderof":"\u2134","ordf":"\u00AA","ordm":"\u00BA","origof":"\u22B6","oror":"\u2A56","orslope":"\u2A57","orv":"\u2A5B","oS":"\u24C8","Oscr":"\uD835\uDCAA","oscr":"\u2134","Oslash":"\u00D8","oslash":"\u00F8","osol":"\u2298","Otilde":"\u00D5","otilde":"\u00F5","otimesas":"\u2A36","Otimes":"\u2A37","otimes":"\u2297","Ouml":"\u00D6","ouml":"\u00F6","ovbar":"\u233D","OverBar":"\u203E","OverBrace":"\u23DE","OverBracket":"\u23B4","OverParenthesis":"\u23DC","para":"\u00B6","parallel":"\u2225","par":"\u2225","parsim":"\u2AF3","parsl":"\u2AFD","part":"\u2202","PartialD":"\u2202","Pcy":"\u041F","pcy":"\u043F","percnt":"%","period":".","permil":"\u2030","perp":"\u22A5","pertenk":"\u2031","Pfr":"\uD835\uDD13","pfr":"\uD835\uDD2D","Phi":"\u03A6","phi":"\u03C6","phiv":"\u03D5","phmmat":"\u2133","phone":"\u260E","Pi":"\u03A0","pi":"\u03C0","pitchfork":"\u22D4","piv":"\u03D6","planck":"\u210F","planckh":"\u210E","plankv":"\u210F","plusacir":"\u2A23","plusb":"\u229E","pluscir":"\u2A22","plus":"+","plusdo":"\u2214","plusdu":"\u2A25","pluse":"\u2A72","PlusMinus":"\u00B1","plusmn":"\u00B1","plussim":"\u2A26","plustwo":"\u2A27","pm":"\u00B1","Poincareplane":"\u210C","pointint":"\u2A15","popf":"\uD835\uDD61","Popf":"\u2119","pound":"\u00A3","prap":"\u2AB7","Pr":"\u2ABB","pr":"\u227A","prcue":"\u227C","precapprox":"\u2AB7","prec":"\u227A","preccurlyeq":"\u227C","Precedes":"\u227A","PrecedesEqual":"\u2AAF","PrecedesSlantEqual":"\u227C","PrecedesTilde":"\u227E","preceq":"\u2AAF","precnapprox":"\u2AB9","precneqq":"\u2AB5","precnsim":"\u22E8","pre":"\u2AAF","prE":"\u2AB3","precsim":"\u227E","prime":"\u2032","Prime":"\u2033","primes":"\u2119","prnap":"\u2AB9","prnE":"\u2AB5","prnsim":"\u22E8","prod":"\u220F","Product":"\u220F","profalar":"\u232E","profline":"\u2312","profsurf":"\u2313","prop":"\u221D","Proportional":"\u221D","Proportion":"\u2237","propto":"\u221D","prsim":"\u227E","prurel":"\u22B0","Pscr":"\uD835\uDCAB","pscr":"\uD835\uDCC5","Psi":"\u03A8","psi":"\u03C8","puncsp":"\u2008","Qfr":"\uD835\uDD14","qfr":"\uD835\uDD2E","qint":"\u2A0C","qopf":"\uD835\uDD62","Qopf":"\u211A","qprime":"\u2057","Qscr":"\uD835\uDCAC","qscr":"\uD835\uDCC6","quaternions":"\u210D","quatint":"\u2A16","quest":"?","questeq":"\u225F","quot":"\"","QUOT":"\"","rAarr":"\u21DB","race":"\u223D\u0331","Racute":"\u0154","racute":"\u0155","radic":"\u221A","raemptyv":"\u29B3","rang":"\u27E9","Rang":"\u27EB","rangd":"\u2992","range":"\u29A5","rangle":"\u27E9","raquo":"\u00BB","rarrap":"\u2975","rarrb":"\u21E5","rarrbfs":"\u2920","rarrc":"\u2933","rarr":"\u2192","Rarr":"\u21A0","rArr":"\u21D2","rarrfs":"\u291E","rarrhk":"\u21AA","rarrlp":"\u21AC","rarrpl":"\u2945","rarrsim":"\u2974","Rarrtl":"\u2916","rarrtl":"\u21A3","rarrw":"\u219D","ratail":"\u291A","rAtail":"\u291C","ratio":"\u2236","rationals":"\u211A","rbarr":"\u290D","rBarr":"\u290F","RBarr":"\u2910","rbbrk":"\u2773","rbrace":"}","rbrack":"]","rbrke":"\u298C","rbrksld":"\u298E","rbrkslu":"\u2990","Rcaron":"\u0158","rcaron":"\u0159","Rcedil":"\u0156","rcedil":"\u0157","rceil":"\u2309","rcub":"}","Rcy":"\u0420","rcy":"\u0440","rdca":"\u2937","rdldhar":"\u2969","rdquo":"\u201D","rdquor":"\u201D","rdsh":"\u21B3","real":"\u211C","realine":"\u211B","realpart":"\u211C","reals":"\u211D","Re":"\u211C","rect":"\u25AD","reg":"\u00AE","REG":"\u00AE","ReverseElement":"\u220B","ReverseEquilibrium":"\u21CB","ReverseUpEquilibrium":"\u296F","rfisht":"\u297D","rfloor":"\u230B","rfr":"\uD835\uDD2F","Rfr":"\u211C","rHar":"\u2964","rhard":"\u21C1","rharu":"\u21C0","rharul":"\u296C","Rho":"\u03A1","rho":"\u03C1","rhov":"\u03F1","RightAngleBracket":"\u27E9","RightArrowBar":"\u21E5","rightarrow":"\u2192","RightArrow":"\u2192","Rightarrow":"\u21D2","RightArrowLeftArrow":"\u21C4","rightarrowtail":"\u21A3","RightCeiling":"\u2309","RightDoubleBracket":"\u27E7","RightDownTeeVector":"\u295D","RightDownVectorBar":"\u2955","RightDownVector":"\u21C2","RightFloor":"\u230B","rightharpoondown":"\u21C1","rightharpoonup":"\u21C0","rightleftarrows":"\u21C4","rightleftharpoons":"\u21CC","rightrightarrows":"\u21C9","rightsquigarrow":"\u219D","RightTeeArrow":"\u21A6","RightTee":"\u22A2","RightTeeVector":"\u295B","rightthreetimes":"\u22CC","RightTriangleBar":"\u29D0","RightTriangle":"\u22B3","RightTriangleEqual":"\u22B5","RightUpDownVector":"\u294F","RightUpTeeVector":"\u295C","RightUpVectorBar":"\u2954","RightUpVector":"\u21BE","RightVectorBar":"\u2953","RightVector":"\u21C0","ring":"\u02DA","risingdotseq":"\u2253","rlarr":"\u21C4","rlhar":"\u21CC","rlm":"\u200F","rmoustache":"\u23B1","rmoust":"\u23B1","rnmid":"\u2AEE","roang":"\u27ED","roarr":"\u21FE","robrk":"\u27E7","ropar":"\u2986","ropf":"\uD835\uDD63","Ropf":"\u211D","roplus":"\u2A2E","rotimes":"\u2A35","RoundImplies":"\u2970","rpar":")","rpargt":"\u2994","rppolint":"\u2A12","rrarr":"\u21C9","Rrightarrow":"\u21DB","rsaquo":"\u203A","rscr":"\uD835\uDCC7","Rscr":"\u211B","rsh":"\u21B1","Rsh":"\u21B1","rsqb":"]","rsquo":"\u2019","rsquor":"\u2019","rthree":"\u22CC","rtimes":"\u22CA","rtri":"\u25B9","rtrie":"\u22B5","rtrif":"\u25B8","rtriltri":"\u29CE","RuleDelayed":"\u29F4","ruluhar":"\u2968","rx":"\u211E","Sacute":"\u015A","sacute":"\u015B","sbquo":"\u201A","scap":"\u2AB8","Scaron":"\u0160","scaron":"\u0161","Sc":"\u2ABC","sc":"\u227B","sccue":"\u227D","sce":"\u2AB0","scE":"\u2AB4","Scedil":"\u015E","scedil":"\u015F","Scirc":"\u015C","scirc":"\u015D","scnap":"\u2ABA","scnE":"\u2AB6","scnsim":"\u22E9","scpolint":"\u2A13","scsim":"\u227F","Scy":"\u0421","scy":"\u0441","sdotb":"\u22A1","sdot":"\u22C5","sdote":"\u2A66","searhk":"\u2925","searr":"\u2198","seArr":"\u21D8","searrow":"\u2198","sect":"\u00A7","semi":";","seswar":"\u2929","setminus":"\u2216","setmn":"\u2216","sext":"\u2736","Sfr":"\uD835\uDD16","sfr":"\uD835\uDD30","sfrown":"\u2322","sharp":"\u266F","SHCHcy":"\u0429","shchcy":"\u0449","SHcy":"\u0428","shcy":"\u0448","ShortDownArrow":"\u2193","ShortLeftArrow":"\u2190","shortmid":"\u2223","shortparallel":"\u2225","ShortRightArrow":"\u2192","ShortUpArrow":"\u2191","shy":"\u00AD","Sigma":"\u03A3","sigma":"\u03C3","sigmaf":"\u03C2","sigmav":"\u03C2","sim":"\u223C","simdot":"\u2A6A","sime":"\u2243","simeq":"\u2243","simg":"\u2A9E","simgE":"\u2AA0","siml":"\u2A9D","simlE":"\u2A9F","simne":"\u2246","simplus":"\u2A24","simrarr":"\u2972","slarr":"\u2190","SmallCircle":"\u2218","smallsetminus":"\u2216","smashp":"\u2A33","smeparsl":"\u29E4","smid":"\u2223","smile":"\u2323","smt":"\u2AAA","smte":"\u2AAC","smtes":"\u2AAC\uFE00","SOFTcy":"\u042C","softcy":"\u044C","solbar":"\u233F","solb":"\u29C4","sol":"/","Sopf":"\uD835\uDD4A","sopf":"\uD835\uDD64","spades":"\u2660","spadesuit":"\u2660","spar":"\u2225","sqcap":"\u2293","sqcaps":"\u2293\uFE00","sqcup":"\u2294","sqcups":"\u2294\uFE00","Sqrt":"\u221A","sqsub":"\u228F","sqsube":"\u2291","sqsubset":"\u228F","sqsubseteq":"\u2291","sqsup":"\u2290","sqsupe":"\u2292","sqsupset":"\u2290","sqsupseteq":"\u2292","square":"\u25A1","Square":"\u25A1","SquareIntersection":"\u2293","SquareSubset":"\u228F","SquareSubsetEqual":"\u2291","SquareSuperset":"\u2290","SquareSupersetEqual":"\u2292","SquareUnion":"\u2294","squarf":"\u25AA","squ":"\u25A1","squf":"\u25AA","srarr":"\u2192","Sscr":"\uD835\uDCAE","sscr":"\uD835\uDCC8","ssetmn":"\u2216","ssmile":"\u2323","sstarf":"\u22C6","Star":"\u22C6","star":"\u2606","starf":"\u2605","straightepsilon":"\u03F5","straightphi":"\u03D5","strns":"\u00AF","sub":"\u2282","Sub":"\u22D0","subdot":"\u2ABD","subE":"\u2AC5","sube":"\u2286","subedot":"\u2AC3","submult":"\u2AC1","subnE":"\u2ACB","subne":"\u228A","subplus":"\u2ABF","subrarr":"\u2979","subset":"\u2282","Subset":"\u22D0","subseteq":"\u2286","subseteqq":"\u2AC5","SubsetEqual":"\u2286","subsetneq":"\u228A","subsetneqq":"\u2ACB","subsim":"\u2AC7","subsub":"\u2AD5","subsup":"\u2AD3","succapprox":"\u2AB8","succ":"\u227B","succcurlyeq":"\u227D","Succeeds":"\u227B","SucceedsEqual":"\u2AB0","SucceedsSlantEqual":"\u227D","SucceedsTilde":"\u227F","succeq":"\u2AB0","succnapprox":"\u2ABA","succneqq":"\u2AB6","succnsim":"\u22E9","succsim":"\u227F","SuchThat":"\u220B","sum":"\u2211","Sum":"\u2211","sung":"\u266A","sup1":"\u00B9","sup2":"\u00B2","sup3":"\u00B3","sup":"\u2283","Sup":"\u22D1","supdot":"\u2ABE","supdsub":"\u2AD8","supE":"\u2AC6","supe":"\u2287","supedot":"\u2AC4","Superset":"\u2283","SupersetEqual":"\u2287","suphsol":"\u27C9","suphsub":"\u2AD7","suplarr":"\u297B","supmult":"\u2AC2","supnE":"\u2ACC","supne":"\u228B","supplus":"\u2AC0","supset":"\u2283","Supset":"\u22D1","supseteq":"\u2287","supseteqq":"\u2AC6","supsetneq":"\u228B","supsetneqq":"\u2ACC","supsim":"\u2AC8","supsub":"\u2AD4","supsup":"\u2AD6","swarhk":"\u2926","swarr":"\u2199","swArr":"\u21D9","swarrow":"\u2199","swnwar":"\u292A","szlig":"\u00DF","Tab":"\t","target":"\u2316","Tau":"\u03A4","tau":"\u03C4","tbrk":"\u23B4","Tcaron":"\u0164","tcaron":"\u0165","Tcedil":"\u0162","tcedil":"\u0163","Tcy":"\u0422","tcy":"\u0442","tdot":"\u20DB","telrec":"\u2315","Tfr":"\uD835\uDD17","tfr":"\uD835\uDD31","there4":"\u2234","therefore":"\u2234","Therefore":"\u2234","Theta":"\u0398","theta":"\u03B8","thetasym":"\u03D1","thetav":"\u03D1","thickapprox":"\u2248","thicksim":"\u223C","ThickSpace":"\u205F\u200A","ThinSpace":"\u2009","thinsp":"\u2009","thkap":"\u2248","thksim":"\u223C","THORN":"\u00DE","thorn":"\u00FE","tilde":"\u02DC","Tilde":"\u223C","TildeEqual":"\u2243","TildeFullEqual":"\u2245","TildeTilde":"\u2248","timesbar":"\u2A31","timesb":"\u22A0","times":"\u00D7","timesd":"\u2A30","tint":"\u222D","toea":"\u2928","topbot":"\u2336","topcir":"\u2AF1","top":"\u22A4","Topf":"\uD835\uDD4B","topf":"\uD835\uDD65","topfork":"\u2ADA","tosa":"\u2929","tprime":"\u2034","trade":"\u2122","TRADE":"\u2122","triangle":"\u25B5","triangledown":"\u25BF","triangleleft":"\u25C3","trianglelefteq":"\u22B4","triangleq":"\u225C","triangleright":"\u25B9","trianglerighteq":"\u22B5","tridot":"\u25EC","trie":"\u225C","triminus":"\u2A3A","TripleDot":"\u20DB","triplus":"\u2A39","trisb":"\u29CD","tritime":"\u2A3B","trpezium":"\u23E2","Tscr":"\uD835\uDCAF","tscr":"\uD835\uDCC9","TScy":"\u0426","tscy":"\u0446","TSHcy":"\u040B","tshcy":"\u045B","Tstrok":"\u0166","tstrok":"\u0167","twixt":"\u226C","twoheadleftarrow":"\u219E","twoheadrightarrow":"\u21A0","Uacute":"\u00DA","uacute":"\u00FA","uarr":"\u2191","Uarr":"\u219F","uArr":"\u21D1","Uarrocir":"\u2949","Ubrcy":"\u040E","ubrcy":"\u045E","Ubreve":"\u016C","ubreve":"\u016D","Ucirc":"\u00DB","ucirc":"\u00FB","Ucy":"\u0423","ucy":"\u0443","udarr":"\u21C5","Udblac":"\u0170","udblac":"\u0171","udhar":"\u296E","ufisht":"\u297E","Ufr":"\uD835\uDD18","ufr":"\uD835\uDD32","Ugrave":"\u00D9","ugrave":"\u00F9","uHar":"\u2963","uharl":"\u21BF","uharr":"\u21BE","uhblk":"\u2580","ulcorn":"\u231C","ulcorner":"\u231C","ulcrop":"\u230F","ultri":"\u25F8","Umacr":"\u016A","umacr":"\u016B","uml":"\u00A8","UnderBar":"_","UnderBrace":"\u23DF","UnderBracket":"\u23B5","UnderParenthesis":"\u23DD","Union":"\u22C3","UnionPlus":"\u228E","Uogon":"\u0172","uogon":"\u0173","Uopf":"\uD835\uDD4C","uopf":"\uD835\uDD66","UpArrowBar":"\u2912","uparrow":"\u2191","UpArrow":"\u2191","Uparrow":"\u21D1","UpArrowDownArrow":"\u21C5","updownarrow":"\u2195","UpDownArrow":"\u2195","Updownarrow":"\u21D5","UpEquilibrium":"\u296E","upharpoonleft":"\u21BF","upharpoonright":"\u21BE","uplus":"\u228E","UpperLeftArrow":"\u2196","UpperRightArrow":"\u2197","upsi":"\u03C5","Upsi":"\u03D2","upsih":"\u03D2","Upsilon":"\u03A5","upsilon":"\u03C5","UpTeeArrow":"\u21A5","UpTee":"\u22A5","upuparrows":"\u21C8","urcorn":"\u231D","urcorner":"\u231D","urcrop":"\u230E","Uring":"\u016E","uring":"\u016F","urtri":"\u25F9","Uscr":"\uD835\uDCB0","uscr":"\uD835\uDCCA","utdot":"\u22F0","Utilde":"\u0168","utilde":"\u0169","utri":"\u25B5","utrif":"\u25B4","uuarr":"\u21C8","Uuml":"\u00DC","uuml":"\u00FC","uwangle":"\u29A7","vangrt":"\u299C","varepsilon":"\u03F5","varkappa":"\u03F0","varnothing":"\u2205","varphi":"\u03D5","varpi":"\u03D6","varpropto":"\u221D","varr":"\u2195","vArr":"\u21D5","varrho":"\u03F1","varsigma":"\u03C2","varsubsetneq":"\u228A\uFE00","varsubsetneqq":"\u2ACB\uFE00","varsupsetneq":"\u228B\uFE00","varsupsetneqq":"\u2ACC\uFE00","vartheta":"\u03D1","vartriangleleft":"\u22B2","vartriangleright":"\u22B3","vBar":"\u2AE8","Vbar":"\u2AEB","vBarv":"\u2AE9","Vcy":"\u0412","vcy":"\u0432","vdash":"\u22A2","vDash":"\u22A8","Vdash":"\u22A9","VDash":"\u22AB","Vdashl":"\u2AE6","veebar":"\u22BB","vee":"\u2228","Vee":"\u22C1","veeeq":"\u225A","vellip":"\u22EE","verbar":"|","Verbar":"\u2016","vert":"|","Vert":"\u2016","VerticalBar":"\u2223","VerticalLine":"|","VerticalSeparator":"\u2758","VerticalTilde":"\u2240","VeryThinSpace":"\u200A","Vfr":"\uD835\uDD19","vfr":"\uD835\uDD33","vltri":"\u22B2","vnsub":"\u2282\u20D2","vnsup":"\u2283\u20D2","Vopf":"\uD835\uDD4D","vopf":"\uD835\uDD67","vprop":"\u221D","vrtri":"\u22B3","Vscr":"\uD835\uDCB1","vscr":"\uD835\uDCCB","vsubnE":"\u2ACB\uFE00","vsubne":"\u228A\uFE00","vsupnE":"\u2ACC\uFE00","vsupne":"\u228B\uFE00","Vvdash":"\u22AA","vzigzag":"\u299A","Wcirc":"\u0174","wcirc":"\u0175","wedbar":"\u2A5F","wedge":"\u2227","Wedge":"\u22C0","wedgeq":"\u2259","weierp":"\u2118","Wfr":"\uD835\uDD1A","wfr":"\uD835\uDD34","Wopf":"\uD835\uDD4E","wopf":"\uD835\uDD68","wp":"\u2118","wr":"\u2240","wreath":"\u2240","Wscr":"\uD835\uDCB2","wscr":"\uD835\uDCCC","xcap":"\u22C2","xcirc":"\u25EF","xcup":"\u22C3","xdtri":"\u25BD","Xfr":"\uD835\uDD1B","xfr":"\uD835\uDD35","xharr":"\u27F7","xhArr":"\u27FA","Xi":"\u039E","xi":"\u03BE","xlarr":"\u27F5","xlArr":"\u27F8","xmap":"\u27FC","xnis":"\u22FB","xodot":"\u2A00","Xopf":"\uD835\uDD4F","xopf":"\uD835\uDD69","xoplus":"\u2A01","xotime":"\u2A02","xrarr":"\u27F6","xrArr":"\u27F9","Xscr":"\uD835\uDCB3","xscr":"\uD835\uDCCD","xsqcup":"\u2A06","xuplus":"\u2A04","xutri":"\u25B3","xvee":"\u22C1","xwedge":"\u22C0","Yacute":"\u00DD","yacute":"\u00FD","YAcy":"\u042F","yacy":"\u044F","Ycirc":"\u0176","ycirc":"\u0177","Ycy":"\u042B","ycy":"\u044B","yen":"\u00A5","Yfr":"\uD835\uDD1C","yfr":"\uD835\uDD36","YIcy":"\u0407","yicy":"\u0457","Yopf":"\uD835\uDD50","yopf":"\uD835\uDD6A","Yscr":"\uD835\uDCB4","yscr":"\uD835\uDCCE","YUcy":"\u042E","yucy":"\u044E","yuml":"\u00FF","Yuml":"\u0178","Zacute":"\u0179","zacute":"\u017A","Zcaron":"\u017D","zcaron":"\u017E","Zcy":"\u0417","zcy":"\u0437","Zdot":"\u017B","zdot":"\u017C","zeetrf":"\u2128","ZeroWidthSpace":"\u200B","Zeta":"\u0396","zeta":"\u03B6","zfr":"\uD835\uDD37","Zfr":"\u2128","ZHcy":"\u0416","zhcy":"\u0436","zigrarr":"\u21DD","zopf":"\uD835\uDD6B","Zopf":"\u2124","Zscr":"\uD835\uDCB5","zscr":"\uD835\uDCCF","zwj":"\u200D","zwnj":"\u200C"}
\ No newline at end of file
diff --git a/lib/entities/legacy.json b/lib/entities/legacy.json
new file mode 100644
index 0000000..f0e82a4
--- /dev/null
+++ b/lib/entities/legacy.json
@@ -0,0 +1 @@
+{"Aacute":"\u00C1","aacute":"\u00E1","Acirc":"\u00C2","acirc":"\u00E2","acute":"\u00B4","AElig":"\u00C6","aelig":"\u00E6","Agrave":"\u00C0","agrave":"\u00E0","amp":"&","AMP":"&","Aring":"\u00C5","aring":"\u00E5","Atilde":"\u00C3","atilde":"\u00E3","Auml":"\u00C4","auml":"\u00E4","brvbar":"\u00A6","Ccedil":"\u00C7","ccedil":"\u00E7","cedil":"\u00B8","cent":"\u00A2","copy":"\u00A9","COPY":"\u00A9","curren":"\u00A4","deg":"\u00B0","divide":"\u00F7","Eacute":"\u00C9","eacute":"\u00E9","Ecirc":"\u00CA","ecirc":"\u00EA","Egrave":"\u00C8","egrave":"\u00E8","ETH":"\u00D0","eth":"\u00F0","Euml":"\u00CB","euml":"\u00EB","frac12":"\u00BD","frac14":"\u00BC","frac34":"\u00BE","gt":">","GT":">","Iacute":"\u00CD","iacute":"\u00ED","Icirc":"\u00CE","icirc":"\u00EE","iexcl":"\u00A1","Igrave":"\u00CC","igrave":"\u00EC","iquest":"\u00BF","Iuml":"\u00CF","iuml":"\u00EF","laquo":"\u00AB","lt":"<","LT":"<","macr":"\u00AF","micro":"\u00B5","middot":"\u00B7","nbsp":"\u00A0","not":"\u00AC","Ntilde":"\u00D1","ntilde":"\u00F1","Oacute":"\u00D3","oacute":"\u00F3","Ocirc":"\u00D4","ocirc":"\u00F4","Ograve":"\u00D2","ograve":"\u00F2","ordf":"\u00AA","ordm":"\u00BA","Oslash":"\u00D8","oslash":"\u00F8","Otilde":"\u00D5","otilde":"\u00F5","Ouml":"\u00D6","ouml":"\u00F6","para":"\u00B6","plusmn":"\u00B1","pound":"\u00A3","quot":"\"","QUOT":"\"","raquo":"\u00BB","reg":"\u00AE","REG":"\u00AE","sect":"\u00A7","shy":"\u00AD","sup1":"\u00B9","sup2":"\u00B2","sup3":"\u00B3","szlig":"\u00DF","THORN":"\u00DE","thorn":"\u00FE","times":"\u00D7","Uacute":"\u00DA","uacute":"\u00FA","Ucirc":"\u00DB","ucirc":"\u00FB","Ugrave":"\u00D9","ugrave":"\u00F9","uml":"\u00A8","Uuml":"\u00DC","uuml":"\u00FC","Yacute":"\u00DD","yacute":"\u00FD","yen":"\u00A5","yuml":"\u00FF"}
\ No newline at end of file

From ba3c1c73515cc3c5955ecf57df9eedccddeb5517 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 11:59:27 +0200
Subject: [PATCH 404/450] [tokenizer] added support for decoding HTML entities
 in `ontext` events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is still a number of TODOs:
• support decoding entities in attributes
• when in XML mode, only decode XML entities (skip legacy entities)
• move the decodeMap to a JSON file
---
 lib/Tokenizer.js | 71 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 58 insertions(+), 13 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index fe6e680..a63af95 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -1,6 +1,9 @@
 module.exports = Tokenizer;
 
-var i = 0,
+var entityMap = require("./entities/entities.json"),
+    legacyMap = require("./entities/legacy.json"),
+
+    i = 0,
 
     TEXT                      = i++,
     BEFORE_TAG_NAME           = i++, //after <
@@ -77,7 +80,6 @@ var i = 0,
 
 var decodeMap = {
 	// as described in mathiasbynens/he#4
-	// with character mapping to themselves removed
 	__proto__: null,
 	0: '\uFFFD',
 	128: '\u20AC',
@@ -258,7 +260,7 @@ Tokenizer.prototype._stateInSelfClosingTag = function(c){
 		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._index--;
 	}
-}
+};
 
 Tokenizer.prototype._stateInAttributeName = function (c) {
 	if(c === "=" || c === "/" || c === ">" || whitespace(c)){
@@ -487,25 +489,63 @@ Tokenizer.prototype._stateAfterStyle4 = function (c) {
 Tokenizer.prototype._stateBeforeEntity = ifElseState("#", BEFORE_NUMERIC_ENTITY, IN_NAMED_ENTITY);
 Tokenizer.prototype._stateBeforeNumericEntity = ifElseState("X", IN_HEX_ENTITY, IN_NUMERIC_ENTITY);
 
+//for entities within attributes
+Tokenizer.prototype._parseNamedEntityStrict = function(){
+	//offset = 1
+	if(this._sectionStart + 1 < this._index){
+		var entity = this._buffer.substring(this._sectionStart + 1, this._index);
+
+		if(entityMap.hasOwnProperty(entity)){
+			//TODO attributes
+			this._cbs.ontext(entityMap[entity]);
+			this._sectionStart = this._index;
+		}
+	}
+};
+
+
+//parses legacy entities (without trailing semicolon)
+Tokenizer.prototype._parseLegacyEntity = function(){
+	var start = this._sectionStart + 1,
+	    limit = this._index - start;
+
+	if(limit > 6) limit = 6; //the max length of legacy entities is 6
+
+	while(limit >= 2){ //the min length of legacy entities is 2
+		var entity = this._buffer.substr(start, limit);
+
+		if(legacyMap.hasOwnProperty(entity)){
+			this._cbs.ontext(legacyMap[entity]);
+			this._sectionStart += limit + 1;
+			break;
+		} else {
+			limit--;
+		}
+	}
+};
+
+//first tries to parse a strict entity, otherwise tries to parse a legacy entity
+Tokenizer.prototype._parseNamedEntity = function(){
+	this._parseNamedEntityStrict();
+	this._parseLegacyEntity();
+};
+
 Tokenizer.prototype._stateInNamedEntity = function(c){
 	if(c === ";"){
-		//offset 1
-		if(this._sectionStart + 1 !== this._index){
-			var entity = this._buffer.substring(this._sectionStart + 1, this._index);
-			//TODO parse entity
-		}
+		this._parseNamedEntity();
 		this._state = this._baseState;
+		this._sectionStart++;
 	} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
-		if(
+		/*if(
 			this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
 			this._baseState === IN_ATTRIBUTE_VALUE_SQ ||
 			this._baseState === IN_ATTRIBUTE_VALUE_DQ
 		) {
 			if(c !== "="){
-				//consume only an entity containing all characters
+				this._parseNamedEntityStrict(); //TODO
 			}
-		} else {
-			//TODO incrementally parse entites
+		} else*/ {
+			this._parseLegacyEntity();
 		}
 		this._state = this._baseState;
 		this._index--;
@@ -543,6 +583,7 @@ Tokenizer.prototype._decodeNumericEntity = function(offset, base){
 		var parsed = parseInt(entity, base);
 
 		if(parsed === parsed){ //not NaN (TODO: when can this happen?)
+			//TODO what about attributes?
 			this._cbs.ontext(decodeCodePoint(parsed));
 			this._sectionStart = this._index;
 		}
@@ -814,7 +855,11 @@ Tokenizer.prototype.end = function(chunk){
 		} else if(this._state === IN_CLOSING_TAG_NAME){
 			this._cbs.onclosetag(data);
 		} else if(this._state === IN_NAMED_ENTITY){
-			// TODO
+			this._parseLegacyEntity();
+			if(this._sectionStart < this._index){
+				this._state = this._baseState;
+				return this.end();
+			}
 		} else if(this._state === IN_NUMERIC_ENTITY){
 			this._decodeNumericEntity(2, 10);
 		} else if(this._state === IN_HEX_ENTITY){

From e9a8496239d6edbc7c196800a43659fdd0071fc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 12:00:45 +0200
Subject: [PATCH 405/450] [tests] added test cases for decoding legacy & named
 entities

both containing one of the longest available entities, to ensure they
are propperly decoded (especially relevant for legacy entities)
---
 tests/Events/18-legacy_entities.json | 16 ++++++++++++++++
 tests/Events/19-named_entities.json  | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 tests/Events/18-legacy_entities.json
 create mode 100644 tests/Events/19-named_entities.json

diff --git a/tests/Events/18-legacy_entities.json b/tests/Events/18-legacy_entities.json
new file mode 100644
index 0000000..e0be699
--- /dev/null
+++ b/tests/Events/18-legacy_entities.json
@@ -0,0 +1,16 @@
+{
+  "name": "legacy entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "&AMPel&iacutees&lter",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "&el\u00EDes<er"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tests/Events/19-named_entities.json b/tests/Events/19-named_entities.json
new file mode 100644
index 0000000..33db898
--- /dev/null
+++ b/tests/Events/19-named_entities.json
@@ -0,0 +1,16 @@
+{
+  "name": "named entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "&amp;el&lt;er&CounterClockwiseContourIntegral;foo",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "&el<er\u2233foo"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 927a9e990ea13b2e6148d7ff01dca095307940c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 12:33:31 +0200
Subject: [PATCH 406/450] [entities] added map for XML entities

---
 lib/entities/xml.json | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 lib/entities/xml.json

diff --git a/lib/entities/xml.json b/lib/entities/xml.json
new file mode 100644
index 0000000..de8db10
--- /dev/null
+++ b/lib/entities/xml.json
@@ -0,0 +1 @@
+{"amp":"&","apos":"'","gt":">","lt":"<","quot":"\""}

From 7adb0537f1d03df27c688905f2eddedd7250787e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 12:34:48 +0200
Subject: [PATCH 407/450] [tokenizer] added support for XML entities

also moved handling of trailing data to _handleTrailingData() (as it
has to be called recursively now)
---
 lib/Tokenizer.js | 108 ++++++++++++++++++++++++++++-------------------
 1 file changed, 64 insertions(+), 44 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index a63af95..ffeaa46 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -2,6 +2,7 @@ module.exports = Tokenizer;
 
 var entityMap = require("./entities/entities.json"),
     legacyMap = require("./entities/legacy.json"),
+    xmlMap    = require("./entities/xml.json"),
 
     i = 0,
 
@@ -493,11 +494,12 @@ Tokenizer.prototype._stateBeforeNumericEntity = ifElseState("X", IN_HEX_ENTITY,
 Tokenizer.prototype._parseNamedEntityStrict = function(){
 	//offset = 1
 	if(this._sectionStart + 1 < this._index){
-		var entity = this._buffer.substring(this._sectionStart + 1, this._index);
+		var entity = this._buffer.substring(this._sectionStart + 1, this._index),
+		    map = this._xmlMode ? xmlMap : entityMap;
 
-		if(entityMap.hasOwnProperty(entity)){
+		if(map.hasOwnProperty(entity)){
 			//TODO attributes
-			this._cbs.ontext(entityMap[entity]);
+			this._cbs.ontext(map[entity]);
 			this._sectionStart = this._index;
 		}
 	}
@@ -524,19 +526,17 @@ Tokenizer.prototype._parseLegacyEntity = function(){
 	}
 };
 
-//first tries to parse a strict entity, otherwise tries to parse a legacy entity
-Tokenizer.prototype._parseNamedEntity = function(){
-	this._parseNamedEntityStrict();
-	this._parseLegacyEntity();
-};
-
 Tokenizer.prototype._stateInNamedEntity = function(c){
 	if(c === ";"){
-		this._parseNamedEntity();
+		this._parseNamedEntityStrict();
+		if(this._sectionStart + 1 < this._index && !this._xmlMode){
+			this._parseLegacyEntity();
+		}
 		this._state = this._baseState;
 		this._sectionStart++;
 	} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
-		/*if(
+		if(this._xmlMode);
+		/*else if(
 			this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
 			this._baseState === IN_ATTRIBUTE_VALUE_SQ ||
 			this._baseState === IN_ATTRIBUTE_VALUE_DQ
@@ -597,7 +597,11 @@ Tokenizer.prototype._stateInNumericEntity = function(c){
 		this._decodeNumericEntity(2, 10);
 		this._sectionStart++;
 	} else if(c < "0" || c > "9"){
-		this._decodeNumericEntity(2, 10);
+		if(!this._xmlMode){
+			this._decodeNumericEntity(2, 10);
+		} else {
+			this._state = this._baseState;
+		}
 		this._index--;
 	}
 };
@@ -607,7 +611,11 @@ Tokenizer.prototype._stateInHexEntity = function(c){
 		this._decodeNumericEntity(3, 16);
 		this._sectionStart++;
 	} else if((c < "a" || c > "f") && (c < "A" || c > "F") && (c < "0" || c > "9")){
-		this._decodeNumericEntity(3, 16);
+		if(!this._xmlMode){
+			this._decodeNumericEntity(3, 16);
+		} else {
+			this._state = this._baseState;
+		}
 		this._index--;
 	}
 };
@@ -836,42 +844,54 @@ Tokenizer.prototype.end = function(chunk){
 
 	//if there is remaining data, emit it in a reasonable way
 	if(this._sectionStart < this._index){
-		var data = this._buffer.substr(this._sectionStart);
-
-		if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
-			this._cbs.oncdata(data);
-		} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
-			this._cbs.oncomment(data);
-		} else if(this._state === IN_TAG_NAME){
-			this._cbs.onopentagname(data);
-		} else if(this._state === BEFORE_ATTRIBUTE_NAME || this._state === BEFORE_ATTRIBUTE_VALUE || this._state === AFTER_ATTRIBUTE_NAME){
-			this._cbs.onopentagend();
-		} else if(this._state === IN_ATTRIBUTE_NAME){
-			this._cbs.onattribname(data);
-		} else if(this._state === IN_ATTRIBUTE_VALUE_SQ || this._state === IN_ATTRIBUTE_VALUE_DQ){
-			this._cbs.onattribvalue(data.substr(1));
-		} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){
-			this._cbs.onattribvalue(data);
-		} else if(this._state === IN_CLOSING_TAG_NAME){
-			this._cbs.onclosetag(data);
-		} else if(this._state === IN_NAMED_ENTITY){
-			this._parseLegacyEntity();
-			if(this._sectionStart < this._index){
-				this._state = this._baseState;
-				return this.end();
-			}
-		} else if(this._state === IN_NUMERIC_ENTITY){
-			this._decodeNumericEntity(2, 10);
-		} else if(this._state === IN_HEX_ENTITY){
-			this._decodeNumericEntity(3, 16);
-		} else {
-			this._cbs.ontext(data);
-		}
+		this._handleTrailingData();
 	}
 
 	this._cbs.onend();
 };
 
+Tokenizer.prototype._handleTrailingData = function(){
+	var data = this._buffer.substr(this._sectionStart);
+
+	if(this._state === IN_CDATA || this._state === AFTER_CDATA_1 || this._state === AFTER_CDATA_2){
+		this._cbs.oncdata(data);
+	} else if(this._state === IN_COMMENT || this._state === AFTER_COMMENT_1 || this._state === AFTER_COMMENT_2){
+		this._cbs.oncomment(data);
+	} else if(this._state === IN_TAG_NAME){
+		this._cbs.onopentagname(data);
+	} else if(this._state === BEFORE_ATTRIBUTE_NAME || this._state === BEFORE_ATTRIBUTE_VALUE || this._state === AFTER_ATTRIBUTE_NAME){
+		this._cbs.onopentagend();
+	} else if(this._state === IN_ATTRIBUTE_NAME){
+		this._cbs.onattribname(data);
+	} else if(this._state === IN_ATTRIBUTE_VALUE_SQ || this._state === IN_ATTRIBUTE_VALUE_DQ){
+		this._cbs.onattribvalue(data.substr(1));
+	} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){
+		this._cbs.onattribvalue(data);
+	} else if(this._state === IN_CLOSING_TAG_NAME){
+		this._cbs.onclosetag(data);
+	} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
+		this._parseLegacyEntity();
+		if(this._sectionStart < this._index){
+			this._state = this._baseState;
+			this._handleTrailingData();
+		}
+	} else if(this._state === IN_NUMERIC_ENTITY && !this._xmlMode){
+		this._decodeNumericEntity(2, 10);
+		if(this._sectionStart < this._index){
+			this._state = this._baseState;
+			this._handleTrailingData();
+		}
+	} else if(this._state === IN_HEX_ENTITY && !this._xmlMode){
+		this._decodeNumericEntity(3, 16);
+		if(this._sectionStart < this._index){
+			this._state = this._baseState;
+			this._handleTrailingData();
+		}
+	} else {
+		this._cbs.ontext(data);
+	}
+};
+
 Tokenizer.prototype.reset = function(){
 	Tokenizer.call(this, {xmlMode: this._xmlMode, decodeEntities: this._decodeEntities}, this._cbs);
 };

From b60cf0492e304515af216dac9e3c8634fc173885 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 12:35:25 +0200
Subject: [PATCH 408/450] [tests] also test trailing data support in the
 numeric entity test

---
 tests/Events/17-numeric_entities.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/Events/17-numeric_entities.json b/tests/Events/17-numeric_entities.json
index 865ba6d..c76221c 100644
--- a/tests/Events/17-numeric_entities.json
+++ b/tests/Events/17-numeric_entities.json
@@ -4,12 +4,12 @@
     "handler": {},
     "parser": {"decodeEntities": true}
   },
-  "html": "&#x61;&#x62&#99;&#100!",
+  "html": "&#x61;&#x62&#99;&#100",
   "expected": [
     {
       "event": "text",
       "data": [
-        "abcd!"
+        "abcd"
       ]
     }
   ]

From e45e4ecf76b1a94bfec87e70846c268518a76484 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 13:01:13 +0200
Subject: [PATCH 409/450] [tokenizer] fixed handling non-existent entities

---
 lib/Tokenizer.js | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index ffeaa46..8b58cc4 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -500,7 +500,7 @@ Tokenizer.prototype._parseNamedEntityStrict = function(){
 		if(map.hasOwnProperty(entity)){
 			//TODO attributes
 			this._cbs.ontext(map[entity]);
-			this._sectionStart = this._index;
+			this._sectionStart = this._index + 1;
 		}
 	}
 };
@@ -518,7 +518,7 @@ Tokenizer.prototype._parseLegacyEntity = function(){
 
 		if(legacyMap.hasOwnProperty(entity)){
 			this._cbs.ontext(legacyMap[entity]);
-			this._sectionStart += limit + 1;
+			this._sectionStart += limit + 2;
 			break;
 		} else {
 			limit--;
@@ -533,7 +533,6 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 			this._parseLegacyEntity();
 		}
 		this._state = this._baseState;
-		this._sectionStart++;
 	} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
 		if(this._xmlMode);
 		/*else if(
@@ -543,9 +542,11 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 		) {
 			if(c !== "="){
 				this._parseNamedEntityStrict(); //TODO
+				this._sectionStart--; //include the current character in the section
 			}
-		} else*/ {
+		}*/ else {
 			this._parseLegacyEntity();
+			this._sectionStart--;
 		}
 		this._state = this._baseState;
 		this._index--;
@@ -871,7 +872,7 @@ Tokenizer.prototype._handleTrailingData = function(){
 		this._cbs.onclosetag(data);
 	} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){
 		this._parseLegacyEntity();
-		if(this._sectionStart < this._index){
+		if(--this._sectionStart < this._index){
 			this._state = this._baseState;
 			this._handleTrailingData();
 		}

From 12edc94d31ce39644b67c937cdd78af2b82b6a6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 13:01:43 +0200
Subject: [PATCH 410/450] [tests] added test case for XML entities

---
 tests/Events/20-xml_entities.json | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 tests/Events/20-xml_entities.json

diff --git a/tests/Events/20-xml_entities.json b/tests/Events/20-xml_entities.json
new file mode 100644
index 0000000..0f7dece
--- /dev/null
+++ b/tests/Events/20-xml_entities.json
@@ -0,0 +1,16 @@
+{
+  "name": "xml entities",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true, "xmlMode": true}
+  },
+  "html": "&amp;&gt;&amp&lt;&uuml;&#x61;&#x62&#99;&#100",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "&>&amp<&uuml;a&#x62c&#100"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 271dee283a7ec71315b461e978ca1ba0c4fd518b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 13:18:06 +0200
Subject: [PATCH 411/450] [tokenizer] added _emitEntity

as a preparation for supporting decoding entities in attribute values
---
 lib/Tokenizer.js | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 8b58cc4..9d940b3 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -498,8 +498,7 @@ Tokenizer.prototype._parseNamedEntityStrict = function(){
 		    map = this._xmlMode ? xmlMap : entityMap;
 
 		if(map.hasOwnProperty(entity)){
-			//TODO attributes
-			this._cbs.ontext(map[entity]);
+			this._emitEntity(map[entity]);
 			this._sectionStart = this._index + 1;
 		}
 	}
@@ -517,7 +516,7 @@ Tokenizer.prototype._parseLegacyEntity = function(){
 		var entity = this._buffer.substr(start, limit);
 
 		if(legacyMap.hasOwnProperty(entity)){
-			this._cbs.ontext(legacyMap[entity]);
+			this._emitEntity(legacyMap[entity]);
 			this._sectionStart += limit + 2;
 			break;
 		} else {
@@ -541,7 +540,7 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 			this._baseState === IN_ATTRIBUTE_VALUE_DQ
 		) {
 			if(c !== "="){
-				this._parseNamedEntityStrict(); //TODO
+				this._parseNamedEntityStrict();
 				this._sectionStart--; //include the current character in the section
 			}
 		}*/ else {
@@ -584,8 +583,7 @@ Tokenizer.prototype._decodeNumericEntity = function(offset, base){
 		var parsed = parseInt(entity, base);
 
 		if(parsed === parsed){ //not NaN (TODO: when can this happen?)
-			//TODO what about attributes?
-			this._cbs.ontext(decodeCodePoint(parsed));
+			this._emitEntity(decodeCodePoint(parsed));
 			this._sectionStart = this._index;
 		}
 	}
@@ -905,3 +903,16 @@ Tokenizer.prototype._emitToken = function(name){
 	this._cbs[name](this._getSection());
 	this._sectionStart = -1;
 };
+
+Tokenizer.prototype._emitEntity = function(value){
+	/*if(
+		this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
+		this._baseState === IN_ATTRIBUTE_VALUE_SQ ||
+		this._baseState === IN_ATTRIBUTE_VALUE_DQ
+	) {
+		this._cbs.onattributedata(value); //TODO implement the new event
+	} else*/
+	{
+		this._cbs.ontext(value);
+	}
+};

From 076fcf7b7450c632d8aee7e274bf89fc7bc9d63a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 13:18:32 +0200
Subject: [PATCH 412/450] 3.2.0

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index b886f6a..0bb44d6 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.1.6",
+	"version": "3.2.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From f46765d9eb3bbe0c4c88393c0e1ad20bb8d431d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 13:49:04 +0200
Subject: [PATCH 413/450] [tokenizer] moved decodeMap to entities/decode.json

---
 lib/Tokenizer.js         | 34 +---------------------------------
 lib/entities/decode.json |  1 +
 2 files changed, 2 insertions(+), 33 deletions(-)
 create mode 100644 lib/entities/decode.json

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 9d940b3..380521a 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -3,6 +3,7 @@ module.exports = Tokenizer;
 var entityMap = require("./entities/entities.json"),
     legacyMap = require("./entities/legacy.json"),
     xmlMap    = require("./entities/xml.json"),
+    decodeMap = require("./entities/decode.json"),
 
     i = 0,
 
@@ -79,39 +80,6 @@ var entityMap = require("./entities/entities.json"),
     IN_NUMERIC_ENTITY         = i++,
     IN_HEX_ENTITY             = i++; //X
 
-var decodeMap = {
-	// as described in mathiasbynens/he#4
-	__proto__: null,
-	0: '\uFFFD',
-	128: '\u20AC',
-	130: '\u201A',
-	131: '\u0192',
-	132: '\u201E',
-	133: '\u2026',
-	134: '\u2020',
-	135: '\u2021',
-	136: '\u02C6',
-	137: '\u2030',
-	138: '\u0160',
-	139: '\u2039',
-	140: '\u0152',
-	142: '\u017D',
-	145: '\u2018',
-	146: '\u2019',
-	147: '\u201C',
-	148: '\u201D',
-	149: '\u2022',
-	150: '\u2013',
-	151: '\u2014',
-	152: '\u02DC',
-	153: '\u2122',
-	154: '\u0161',
-	155: '\u203A',
-	156: '\u0153',
-	158: '\u017E',
-	159: '\u0178'
-};
-
 function whitespace(c){
 	return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
 }
diff --git a/lib/entities/decode.json b/lib/entities/decode.json
new file mode 100644
index 0000000..4ed2e8b
--- /dev/null
+++ b/lib/entities/decode.json
@@ -0,0 +1 @@
+{"0":"\uFFFD","128":"\u20AC","130":"\u201A","131":"\u0192","132":"\u201E","133":"\u2026","134":"\u2020","135":"\u2021","136":"\u02C6","137":"\u2030","138":"\u0160","139":"\u2039","140":"\u0152","142":"\u017D","145":"\u2018","146":"\u2019","147":"\u201C","148":"\u201D","149":"\u2022","150":"\u2013","151":"\u2014","152":"\u02DC","153":"\u2122","154":"\u0161","155":"\u203A","156":"\u0153","158":"\u017E","159":"\u0178"}

From 389102d30e188141f909097db190beeed4f1c656 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 13:55:31 +0200
Subject: [PATCH 414/450] [tokenizer] renamed _emitEntity to _emitPartial

---
 lib/Tokenizer.js | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 380521a..2768a36 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -466,7 +466,7 @@ Tokenizer.prototype._parseNamedEntityStrict = function(){
 		    map = this._xmlMode ? xmlMap : entityMap;
 
 		if(map.hasOwnProperty(entity)){
-			this._emitEntity(map[entity]);
+			this._emitPartial(map[entity]);
 			this._sectionStart = this._index + 1;
 		}
 	}
@@ -484,7 +484,7 @@ Tokenizer.prototype._parseLegacyEntity = function(){
 		var entity = this._buffer.substr(start, limit);
 
 		if(legacyMap.hasOwnProperty(entity)){
-			this._emitEntity(legacyMap[entity]);
+			this._emitPartial(legacyMap[entity]);
 			this._sectionStart += limit + 2;
 			break;
 		} else {
@@ -551,7 +551,7 @@ Tokenizer.prototype._decodeNumericEntity = function(offset, base){
 		var parsed = parseInt(entity, base);
 
 		if(parsed === parsed){ //not NaN (TODO: when can this happen?)
-			this._emitEntity(decodeCodePoint(parsed));
+			this._emitPartial(decodeCodePoint(parsed));
 			this._sectionStart = this._index;
 		}
 	}
@@ -872,7 +872,7 @@ Tokenizer.prototype._emitToken = function(name){
 	this._sectionStart = -1;
 };
 
-Tokenizer.prototype._emitEntity = function(value){
+Tokenizer.prototype._emitPartial = function(value){
 	/*if(
 		this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
 		this._baseState === IN_ATTRIBUTE_VALUE_SQ ||

From 6ca87fff5279c2590fed6e0103d84c99c373e7df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 18:48:39 +0200
Subject: [PATCH 415/450] [index] statically export Parser, Tokenizer and
 DomHandler

---
 lib/index.js | 52 +++++++++++++++++++++++-----------------------------
 1 file changed, 23 insertions(+), 29 deletions(-)

diff --git a/lib/index.js b/lib/index.js
index 86d2392..7f6f649 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -1,3 +1,6 @@
+var Parser = require("./Parser.js"),
+    DomHandler = require("domhandler");
+
 function defineProp(name, value){
 	delete module.exports[name];
 	module.exports[name] = value;
@@ -5,33 +8,13 @@ function defineProp(name, value){
 }
 
 module.exports = {
-	parseDOM: function (html) {
-		var handler = new module.exports.DomHandler();
-		var parser = new module.exports.Parser(handler);
-		parser.parseComplete(html);
-		return handler.dom;
-	},
-	parseFeed: function (feed) {
-		var handler = new module.exports.FeedHandler();
-		var parser = new module.exports.Parser(handler);
-		parser.parseComplete(feed);
-		return handler.dom;
-	},
-	get Parser(){
-		return defineProp("Parser", require("./Parser.js"));
-	},
-	get DomHandler(){
-		return defineProp("DomHandler", require("domhandler"));
-	},
+	Parser: Parser,
+	Tokenizer: require("./Tokenizer.js"),
+	ElementType: require("domelementtype"),
+	DomHandler: DomHandler,
 	get FeedHandler(){
 		return defineProp("FeedHandler", require("./FeedHandler.js"));
 	},
-	get Tokenizer(){
-		return defineProp("Tokenizer", require("./Tokenizer.js"));
-	},
-	get ElementType(){
-		return defineProp("ElementType", require("domelementtype"));
-	},
 	get Stream(){
 		return defineProp("Stream", require("./Stream.js"));
 	},
@@ -48,15 +31,26 @@ module.exports = {
 		return defineProp("CollectingHandler", require("./CollectingHandler.js"));
 	},
 	// For legacy support
-	get DefaultHandler(){
-		return defineProp("DefaultHandler", this.DomHandler);
-	},
+	DefaultHandler: DomHandler,
 	get RssHandler(){
 		return defineProp("RssHandler", this.FeedHandler);
 	},
+	//helper methods
+	parseDOM: function(data, options) {
+		var handler = new DomHandler(options);
+		var parser = new Parser(handler, options);
+		parser.end(data);
+		return handler.dom;
+	},
+	parseFeed: function(feed, options){
+		var handler = new module.exports.FeedHandler();
+		var parser = new Parser(handler);
+		parser.end(feed);
+		return handler.dom;
+	},
 	createDomStream: function(cb, options, elementCb){
-		var handler = new module.exports.DomHandler(cb, options, elementCb);
-		return new module.exports.Parser(handler, options);
+		var handler = new DomHandler(cb, options, elementCb);
+		return new Parser(handler, options);
 	},
 	// List of all events that the parser emits
 	EVENTS: { /* Format: eventname: number of arguments */

From 1c8600b2ed78dbf95a1c9e21eff18848968b632e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 18:50:05 +0200
Subject: [PATCH 416/450] [parser] use String#search and String#substr instead
 of String#split

vastly improves performance of onprocessinginstruction and ondeclaration
---
 lib/Parser.js | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 7d3ef6d..e5fad7a 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -73,6 +73,8 @@ var voidElements = {
 	wbr: true
 };
 
+var re_nameEnd = /\s|\//;
+
 function Parser(cbs, options){
 	this._options = options || {};
 	this._cbs = cbs || {};
@@ -207,7 +209,9 @@ Parser.prototype.onattribvalue = function attribValue(value){
 
 Parser.prototype.ondeclaration = function(value){
 	if(this._cbs.onprocessinginstruction){
-		var name = value.split(/\s|\//, 1)[0];
+		var idx = value.search(re_nameEnd),
+		    name = idx < 0 ? value : value.substr(0, idx);
+
 		if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 			name = name.toLowerCase();
 		}
@@ -217,7 +221,9 @@ Parser.prototype.ondeclaration = function(value){
 
 Parser.prototype.onprocessinginstruction = function(value){
 	if(this._cbs.onprocessinginstruction){
-		var name = value.split(/\s|\//, 1)[0];
+		var idx = value.search(re_nameEnd),
+		    name = idx < 0 ? value : value.substr(0, idx);
+
 		if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 			name = name.toLowerCase();
 		}

From e3a75dd476d7597954e9d33bee38d282d01c1a1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 19:29:45 +0200
Subject: [PATCH 417/450] [parser] added onattribdata and onattribend events,
 dropped onattribvalue

---
 lib/Parser.js | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index e5fad7a..30e4cd2 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -81,6 +81,7 @@ function Parser(cbs, options){
 
 	this._tagname = "";
 	this._attribname = "";
+	this._attribvalue = "";
 	this._attribs = null;
 	this._stack = [];
 	this._done = false;
@@ -134,8 +135,6 @@ Parser.prototype.onopentagname = function(name){
 
 Parser.prototype.onopentagend = function(){
 	this._updatePosition(1);
-
-	if(this._attribname !== "") this.onattribvalue("");
     
 	if(this._attribs){
 		if(this._cbs.onopentag) this._cbs.onopentag(this._tagname, this._attribs);
@@ -189,22 +188,26 @@ Parser.prototype.onselfclosingtag = function(){
 };
 
 Parser.prototype.onattribname = function(name){
-	if(this._attribname !== "") this.onattribvalue("");
 	if(!(this._options.xmlMode || "lowerCaseAttributeNames" in this._options) || this._options.lowerCaseAttributeNames){
 		name = name.toLowerCase();
 	}
 	this._attribname = name;
 };
 
-Parser.prototype.onattribvalue = function attribValue(value){
-	if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, value);
+Parser.prototype.onattribdata = function(value){
+	this._attribvalue += value;
+};
+
+Parser.prototype.onattribend = function(){
+	if(this._cbs.onattribute) this._cbs.onattribute(this._attribname, this._attribvalue);
 	if(
 		this._attribs &&
 		!Object.prototype.hasOwnProperty.call(this._attribs, this._attribname)
 	){
-		this._attribs[this._attribname] = value;
+		this._attribs[this._attribname] = this._attribvalue;
 	}
 	this._attribname = "";
+	this._attribvalue = "";
 };
 
 Parser.prototype.ondeclaration = function(value){

From 8494b03ddcd1634f2f18ddc7855608f376e0ab64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 19:30:38 +0200
Subject: [PATCH 418/450] [tokenizer] enable support for decoding entities in
 attributes, added onattribend and onattribdata events, removed onattribvalue

---
 lib/Tokenizer.js | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 2768a36..b80cbef 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -246,9 +246,11 @@ Tokenizer.prototype._stateAfterAttributeName = function (c) {
 	if(c === "="){
 		this._state = BEFORE_ATTRIBUTE_VALUE;
 	} else if(c === "/" || c === ">"){
+		this._cbs.onattribend();
 		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._index--;
 	} else if(!whitespace(c)){
+		this._cbs.onattribend();
 		this._state = IN_ATTRIBUTE_NAME;
 		this._sectionStart = this._index;
 	}
@@ -269,36 +271,42 @@ Tokenizer.prototype._stateBeforeAttributeValue = function (c) {
 
 Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
 	if(c === "\""){
-		this._emitToken("onattribvalue");
+		this._emitToken("onattribdata");
+		this._cbs.onattribend();
 		this._state = BEFORE_ATTRIBUTE_NAME;
-	}/* else if(this._decodeEntities && c === "&"){
-		this._baseState = IN_ATTRIBUTE_VALUE_DQ;
+	} else if(this._decodeEntities && c === "&"){
+		this._emitToken("onattribdata");
+		this._baseState = this._state;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
-	}*/
+	}
 };
 
 Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
 	if(c === "'"){
-		this._emitToken("onattribvalue");
+		this._emitToken("onattribdata");
+		this._cbs.onattribend();
 		this._state = BEFORE_ATTRIBUTE_NAME;
-	}/* else if(this._decodeEntities && c === "&"){
-		this._baseState = IN_ATTRIBUTE_VALUE_SQ;
+	} else if(this._decodeEntities && c === "&"){
+		this._emitToken("onattribdata");
+		this._baseState = this._state;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
-	}*/
+	}
 };
 
 Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
 	if(whitespace(c) || c === ">"){
-		this._emitToken("onattribvalue");
+		this._emitToken("onattribdata");
+		this._cbs.onattribend();
 		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._index--;
-	}/* else if(this._decodeEntities && c === "&"){
-		this._baseState = IN_ATTRIBUTE_VALUE_NQ;
+	} else if(this._decodeEntities && c === "&"){
+		this._emitToken("onattribdata");
+		this._baseState = this._state;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
-	}*/
+	}
 };
 
 Tokenizer.prototype._stateBeforeDeclaration = function (c) {
@@ -344,7 +352,7 @@ Tokenizer.prototype._stateAfterComment2 = function (c) {
 		this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
-	} else if (c !== "-") {
+	} else if(c !== "-"){
 		this._state = IN_COMMENT;
 	}
 	// else: stay in AFTER_COMMENT_2 (`--->`)
@@ -502,7 +510,7 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 		this._state = this._baseState;
 	} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
 		if(this._xmlMode);
-		/*else if(
+		else if(
 			this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
 			this._baseState === IN_ATTRIBUTE_VALUE_SQ ||
 			this._baseState === IN_ATTRIBUTE_VALUE_DQ
@@ -511,7 +519,7 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 				this._parseNamedEntityStrict();
 				this._sectionStart--; //include the current character in the section
 			}
-		}*/ else {
+		} else {
 			this._parseLegacyEntity();
 			this._sectionStart--;
 		}
@@ -873,14 +881,13 @@ Tokenizer.prototype._emitToken = function(name){
 };
 
 Tokenizer.prototype._emitPartial = function(value){
-	/*if(
+	if(
 		this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
 		this._baseState === IN_ATTRIBUTE_VALUE_SQ ||
 		this._baseState === IN_ATTRIBUTE_VALUE_DQ
 	) {
-		this._cbs.onattributedata(value); //TODO implement the new event
-	} else*/
-	{
+		this._cbs.onattribdata(value); //TODO implement the new event
+	} else {
 		this._cbs.ontext(value);
 	}
 };

From feafd9dda9c2808f8c59d4534c354b3d21383778 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 19:30:52 +0200
Subject: [PATCH 419/450] [tests] added test case for entities in attributes

---
 tests/Events/21-entity_in_attribute.json | 38 ++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 tests/Events/21-entity_in_attribute.json

diff --git a/tests/Events/21-entity_in_attribute.json b/tests/Events/21-entity_in_attribute.json
new file mode 100644
index 0000000..77eb4f3
--- /dev/null
+++ b/tests/Events/21-entity_in_attribute.json
@@ -0,0 +1,38 @@
+{
+  "name": "entity in attribute",
+  "options": {
+    "handler": {},
+    "parser": {"decodeEntities": true}
+  },
+  "html": "<a href='http://example.com/p&#x61;ge?param=value&param2=&lt;val'>",
+  "expected": [
+    {
+      "event": "opentagname",
+      "data": [
+        "a"
+      ]
+    },
+    {
+      "event": "attribute",
+      "data": [
+        "href",
+        "http://example.com/page?param=value&param2=<val"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "a",
+        {
+          "href": "http://example.com/page?param=value&param2=<val"
+        }
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "a"
+      ]
+    }
+  ]
+}
\ No newline at end of file

From 311e48e29f8cb9e01493c967c2f3368271b17614 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 19:31:14 +0200
Subject: [PATCH 420/450] 3.2.1

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 0bb44d6..b3d5d63 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.2.0",
+	"version": "3.2.1",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From e2fa4854d9e334d99d4220c51469d88340997822 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 19:58:21 +0200
Subject: [PATCH 421/450] [tokenizer] don't decode entities in special tags

---
 lib/Tokenizer.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index b80cbef..f6834d3 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -130,7 +130,7 @@ Tokenizer.prototype._stateText = function (c) {
 		}
 		this._state = BEFORE_TAG_NAME;
 		this._sectionStart = this._index;
-	} else if(this._decodeEntities && c === "&"){
+	} else if(this._decodeEntities && c === "&" && this._baseState === TEXT){
 		if(this._index > this._sectionStart){
 			this._cbs.ontext(this._getSection());
 		}

From 36ee76e96e454e10bd6a82c576b16af36a554f91 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 20:07:22 +0200
Subject: [PATCH 422/450] 3.2.2

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index b3d5d63..ed0396b 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.2.1",
+	"version": "3.2.2",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From cce466cf8475d8164276480a07ea3c600b4ff286 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 22:16:10 +0200
Subject: [PATCH 423/450] [tokenizer] reintroduced _special, removed IN_SCRIPT
 and IN_STYLE

also fixed some semantics
---
 lib/Tokenizer.js | 115 ++++++++++++++++++++---------------------------
 1 file changed, 48 insertions(+), 67 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index f6834d3..afd20ee 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -57,7 +57,6 @@ var entityMap = require("./entities/entities.json"),
     BEFORE_SCRIPT_3           = i++, //I
     BEFORE_SCRIPT_4           = i++, //P
     BEFORE_SCRIPT_5           = i++, //T
-    IN_SCRIPT                 = i++,
     AFTER_SCRIPT_1            = i++, //C
     AFTER_SCRIPT_2            = i++, //R
     AFTER_SCRIPT_3            = i++, //I
@@ -68,7 +67,6 @@ var entityMap = require("./entities/entities.json"),
     BEFORE_STYLE_2            = i++, //Y
     BEFORE_STYLE_3            = i++, //L
     BEFORE_STYLE_4            = i++, //E
-    IN_STYLE                  = i++,
     AFTER_STYLE_1             = i++, //T
     AFTER_STYLE_2             = i++, //Y
     AFTER_STYLE_3             = i++, //L
@@ -78,7 +76,13 @@ var entityMap = require("./entities/entities.json"),
     BEFORE_NUMERIC_ENTITY     = i++, //#
     IN_NAMED_ENTITY           = i++,
     IN_NUMERIC_ENTITY         = i++,
-    IN_HEX_ENTITY             = i++; //X
+    IN_HEX_ENTITY             = i++, //X
+
+    j = 0,
+
+    SPECIAL_NONE              = j++,
+    SPECIAL_SCRIPT            = j++,
+    SPECIAL_STYLE             = j++;
 
 function whitespace(c){
 	return c === " " || c === "\n" || c === "\t" || c === "\f" || c === "\r";
@@ -117,32 +121,34 @@ function Tokenizer(options, cbs){
 	this._sectionStart = 0;
 	this._index = 0;
 	this._baseState = TEXT;
+	this._special = SPECIAL_NONE;
 	this._cbs = cbs;
 	this._running = true;
 	this._xmlMode = !!(options && options.xmlMode);
 	this._decodeEntities = !!(options && options.decodeEntities);
 }
 
-Tokenizer.prototype._stateText = function (c) {
+Tokenizer.prototype._stateText = function(c){
 	if(c === "<"){
 		if(this._index > this._sectionStart){
 			this._cbs.ontext(this._getSection());
 		}
 		this._state = BEFORE_TAG_NAME;
 		this._sectionStart = this._index;
-	} else if(this._decodeEntities && c === "&" && this._baseState === TEXT){
+	} else if(this._decodeEntities && this._special === SPECIAL_NONE && c === "&"){
 		if(this._index > this._sectionStart){
 			this._cbs.ontext(this._getSection());
 		}
+		this._baseState = TEXT;
 		this._state = BEFORE_ENTITY;
 		this._sectionStart = this._index;
 	}
 };
 
-Tokenizer.prototype._stateBeforeTagName = function (c) {
+Tokenizer.prototype._stateBeforeTagName = function(c){
 	if(c === "/"){
 		this._state = BEFORE_CLOSING_TAG_NAME;
-	} else if(c === ">" || this._baseState !== TEXT || whitespace(c)) {
+	} else if(c === ">" || this._special !== SPECIAL_NONE || whitespace(c)) {
 		this._state = TEXT;
 	} else if(c === "!"){
 		this._state = BEFORE_DECLARATION;
@@ -157,7 +163,7 @@ Tokenizer.prototype._stateBeforeTagName = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateInTagName = function (c) {
+Tokenizer.prototype._stateInTagName = function(c){
 	if(c === "/" || c === ">" || whitespace(c)){
 		this._emitToken("onopentagname");
 		this._state = BEFORE_ATTRIBUTE_NAME;
@@ -165,11 +171,11 @@ Tokenizer.prototype._stateInTagName = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
+Tokenizer.prototype._stateBeforeCloseingTagName = function(c){
 	if(whitespace(c));
 	else if(c === ">"){
 		this._state = TEXT;
-	} else if(this._baseState !== TEXT){
+	} else if(this._special !== SPECIAL_NONE){
 		if(c === "s" || c === "S"){
 			this._state = BEFORE_SPECIAL_END;
 		} else {
@@ -182,16 +188,15 @@ Tokenizer.prototype._stateBeforeCloseingTagName = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateInCloseingTagName = function (c) {
+Tokenizer.prototype._stateInCloseingTagName = function(c){
 	if(c === ">" || whitespace(c)){
 		this._emitToken("onclosetag");
 		this._state = AFTER_CLOSING_TAG_NAME;
-		this._baseState = TEXT;
 		this._index--;
 	}
 };
 
-Tokenizer.prototype._stateAfterCloseingTagName = function (c) {
+Tokenizer.prototype._stateAfterCloseingTagName = function(c){
 	//skip everything until ">"
 	if(c === ">"){
 		this._state = TEXT;
@@ -199,15 +204,11 @@ Tokenizer.prototype._stateAfterCloseingTagName = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateBeforeAttributeName = function (c) {
+Tokenizer.prototype._stateBeforeAttributeName = function(c){
 	if(c === ">"){
 		this._cbs.onopentagend();
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
-
-		if(this._baseState !== IN_SCRIPT && this._baseState !== IN_STYLE){
-			this._baseState = TEXT;
-		}
 	} else if(c === "/"){
 		this._state = IN_SELF_CLOSING_TAG;
 	} else if(!whitespace(c)){
@@ -221,17 +222,13 @@ Tokenizer.prototype._stateInSelfClosingTag = function(c){
 		this._cbs.onselfclosingtag();
 		this._state = TEXT;
 		this._sectionStart = this._index + 1;
-
-		if(this._baseState !== IN_SCRIPT && this._baseState !== IN_STYLE){
-			this._baseState = TEXT;
-		}
 	} else if(!whitespace(c)){
 		this._state = BEFORE_ATTRIBUTE_NAME;
 		this._index--;
 	}
 };
 
-Tokenizer.prototype._stateInAttributeName = function (c) {
+Tokenizer.prototype._stateInAttributeName = function(c){
 	if(c === "=" || c === "/" || c === ">" || whitespace(c)){
 		if(this._index > this._sectionStart){
 			this._cbs.onattribname(this._getSection());
@@ -242,7 +239,7 @@ Tokenizer.prototype._stateInAttributeName = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateAfterAttributeName = function (c) {
+Tokenizer.prototype._stateAfterAttributeName = function(c){
 	if(c === "="){
 		this._state = BEFORE_ATTRIBUTE_VALUE;
 	} else if(c === "/" || c === ">"){
@@ -256,7 +253,7 @@ Tokenizer.prototype._stateAfterAttributeName = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateBeforeAttributeValue = function (c) {
+Tokenizer.prototype._stateBeforeAttributeValue = function(c){
 	if(c === "\""){
 		this._state = IN_ATTRIBUTE_VALUE_DQ;
 		this._sectionStart = this._index + 1;
@@ -269,7 +266,7 @@ Tokenizer.prototype._stateBeforeAttributeValue = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
+Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function(c){
 	if(c === "\""){
 		this._emitToken("onattribdata");
 		this._cbs.onattribend();
@@ -282,7 +279,7 @@ Tokenizer.prototype._stateInAttributeValueDoubleQuotes = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
+Tokenizer.prototype._stateInAttributeValueSingleQuotes = function(c){
 	if(c === "'"){
 		this._emitToken("onattribdata");
 		this._cbs.onattribend();
@@ -295,7 +292,7 @@ Tokenizer.prototype._stateInAttributeValueSingleQuotes = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
+Tokenizer.prototype._stateInAttributeValueNoQuotes = function(c){
 	if(whitespace(c) || c === ">"){
 		this._emitToken("onattribdata");
 		this._cbs.onattribend();
@@ -309,13 +306,13 @@ Tokenizer.prototype._stateInAttributeValueNoQuotes = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateBeforeDeclaration = function (c) {
+Tokenizer.prototype._stateBeforeDeclaration = function(c){
 	this._state = c === "[" ? BEFORE_CDATA_1 :
 					c === "-" ? BEFORE_COMMENT :
 						IN_DECLARATION;
 };
 
-Tokenizer.prototype._stateInDeclaration = function (c) {
+Tokenizer.prototype._stateInDeclaration = function(c){
 	if(c === ">"){
 		this._cbs.ondeclaration(this._getSection());
 		this._state = TEXT;
@@ -323,7 +320,7 @@ Tokenizer.prototype._stateInDeclaration = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateInProcessingInstruction = function (c) {
+Tokenizer.prototype._stateInProcessingInstruction = function(c){
 	if(c === ">"){
 		this._cbs.onprocessinginstruction(this._getSection());
 		this._state = TEXT;
@@ -331,7 +328,7 @@ Tokenizer.prototype._stateInProcessingInstruction = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateBeforeComment = function (c) {
+Tokenizer.prototype._stateBeforeComment = function(c){
 	if(c === "-"){
 		this._state = IN_COMMENT;
 		this._sectionStart = this._index + 1;
@@ -340,13 +337,13 @@ Tokenizer.prototype._stateBeforeComment = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateInComment = function (c) {
+Tokenizer.prototype._stateInComment = function(c){
 	if(c === "-") this._state = AFTER_COMMENT_1;
 };
 
 Tokenizer.prototype._stateAfterComment1 = ifElseState("-", AFTER_COMMENT_2, IN_COMMENT);
 
-Tokenizer.prototype._stateAfterComment2 = function (c) {
+Tokenizer.prototype._stateAfterComment2 = function(c){
 	if(c === ">"){
 		//remove 2 trailing chars
 		this._cbs.oncomment(this._buffer.substring(this._sectionStart, this._index - 2));
@@ -364,7 +361,7 @@ Tokenizer.prototype._stateBeforeCdata3 = ifElseState("A", BEFORE_CDATA_4, IN_DEC
 Tokenizer.prototype._stateBeforeCdata4 = ifElseState("T", BEFORE_CDATA_5, IN_DECLARATION);
 Tokenizer.prototype._stateBeforeCdata5 = ifElseState("A", BEFORE_CDATA_6, IN_DECLARATION);
 
-Tokenizer.prototype._stateBeforeCdata6 = function (c) {
+Tokenizer.prototype._stateBeforeCdata6 = function(c){
 	if(c === "["){
 		this._state = IN_CDATA;
 		this._sectionStart = this._index + 1;
@@ -373,13 +370,13 @@ Tokenizer.prototype._stateBeforeCdata6 = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateInCdata = function (c) {
+Tokenizer.prototype._stateInCdata = function(c){
 	if(c === "]") this._state = AFTER_CDATA_1;
 };
 
 Tokenizer.prototype._stateAfterCdata1 = ifElseState("]", AFTER_CDATA_2, IN_CDATA);
 
-Tokenizer.prototype._stateAfterCdata2 = function (c) {
+Tokenizer.prototype._stateAfterCdata2 = function(c){
 	if(c === ">"){
 		//remove 2 trailing chars
 		this._cbs.oncdata(this._buffer.substring(this._sectionStart, this._index - 2));
@@ -391,7 +388,7 @@ Tokenizer.prototype._stateAfterCdata2 = function (c) {
 	//else: stay in AFTER_CDATA_2 (`]]]>`)
 };
 
-Tokenizer.prototype._stateBeforeSpecial = function (c) {
+Tokenizer.prototype._stateBeforeSpecial = function(c){
 	if(c === "c" || c === "C"){
 		this._state = BEFORE_SCRIPT_1;
 	} else if(c === "t" || c === "T"){
@@ -402,10 +399,10 @@ Tokenizer.prototype._stateBeforeSpecial = function (c) {
 	}
 };
 
-Tokenizer.prototype._stateBeforeSpecialEnd = function (c) {
-	if(this._baseState === IN_SCRIPT && (c === "c" || c === "C")){
+Tokenizer.prototype._stateBeforeSpecialEnd = function(c){
+	if(this._special === SPECIAL_SCRIPT && (c === "c" || c === "C")){
 		this._state = AFTER_SCRIPT_1;
-	} else if(this._baseState === IN_SCRIPT && (c === "t" || c === "T")){
+	} else if(this._special === SPECIAL_STYLE && (c === "t" || c === "T")){
 		this._state = AFTER_STYLE_1;
 	}
 	else this._state = TEXT;
@@ -416,9 +413,9 @@ Tokenizer.prototype._stateBeforeScript2 = consumeSpecialNameChar("I", BEFORE_SCR
 Tokenizer.prototype._stateBeforeScript3 = consumeSpecialNameChar("P", BEFORE_SCRIPT_4);
 Tokenizer.prototype._stateBeforeScript4 = consumeSpecialNameChar("T", BEFORE_SCRIPT_5);
 
-Tokenizer.prototype._stateBeforeScript5 = function (c) {
+Tokenizer.prototype._stateBeforeScript5 = function(c){
 	if(c === "/" || c === ">" || whitespace(c)){
-		this._baseState = IN_SCRIPT;
+		this._special = SPECIAL_SCRIPT;
 	}
 	this._state = IN_TAG_NAME;
 	this._index--; //consume the token again
@@ -429,8 +426,9 @@ Tokenizer.prototype._stateAfterScript2 = ifElseState("I", AFTER_SCRIPT_3, TEXT);
 Tokenizer.prototype._stateAfterScript3 = ifElseState("P", AFTER_SCRIPT_4, TEXT);
 Tokenizer.prototype._stateAfterScript4 = ifElseState("T", AFTER_SCRIPT_5, TEXT);
 
-Tokenizer.prototype._stateAfterScript5 = function (c) {
+Tokenizer.prototype._stateAfterScript5 = function(c){
 	if(c === ">" || whitespace(c)){
+		this._special = SPECIAL_NONE;
 		this._state = IN_CLOSING_TAG_NAME;
 		this._sectionStart = this._index - 6;
 		this._index--; //reconsume the token
@@ -442,9 +440,9 @@ Tokenizer.prototype._stateBeforeStyle1 = consumeSpecialNameChar("Y", BEFORE_STYL
 Tokenizer.prototype._stateBeforeStyle2 = consumeSpecialNameChar("L", BEFORE_STYLE_3);
 Tokenizer.prototype._stateBeforeStyle3 = consumeSpecialNameChar("E", BEFORE_STYLE_4);
 
-Tokenizer.prototype._stateBeforeStyle4 = function (c) {
+Tokenizer.prototype._stateBeforeStyle4 = function(c){
 	if(c === "/" || c === ">" || whitespace(c)){
-		this._baseState = IN_STYLE;
+		this._special = SPECIAL_STYLE;
 	}
 	this._state = IN_TAG_NAME;
 	this._index--; //consume the token again
@@ -454,8 +452,9 @@ Tokenizer.prototype._stateAfterStyle1 = ifElseState("Y", AFTER_STYLE_2, TEXT);
 Tokenizer.prototype._stateAfterStyle2 = ifElseState("L", AFTER_STYLE_3, TEXT);
 Tokenizer.prototype._stateAfterStyle3 = ifElseState("E", AFTER_STYLE_4, TEXT);
 
-Tokenizer.prototype._stateAfterStyle4 = function (c) {
+Tokenizer.prototype._stateAfterStyle4 = function(c){
 	if(c === ">" || whitespace(c)){
+		this._special = SPECIAL_NONE;
 		this._state = IN_CLOSING_TAG_NAME;
 		this._sectionStart = this._index - 5;
 		this._index--; //reconsume the token
@@ -510,11 +509,7 @@ Tokenizer.prototype._stateInNamedEntity = function(c){
 		this._state = this._baseState;
 	} else if((c < "a" || c > "z") && (c < "A" || c > "Z") && (c < "0" || c > "9")){
 		if(this._xmlMode);
-		else if(
-			this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
-			this._baseState === IN_ATTRIBUTE_VALUE_SQ ||
-			this._baseState === IN_ATTRIBUTE_VALUE_DQ
-		) {
+		else if(this._baseState !== TEXT){
 			if(c !== "="){
 				this._parseNamedEntityStrict();
 				this._sectionStart--; //include the current character in the section
@@ -737,11 +732,6 @@ Tokenizer.prototype.write = function(chunk){
 			this._stateBeforeScript5(c);
 		}
 
-		else if(this._state === IN_SCRIPT){
-			this._state = TEXT;
-			this._index--;
-		}
-
 		else if(this._state === AFTER_SCRIPT_1){
 			this._stateAfterScript1(c);
 		} else if(this._state === AFTER_SCRIPT_2){
@@ -767,11 +757,6 @@ Tokenizer.prototype.write = function(chunk){
 			this._stateBeforeStyle4(c);
 		}
 
-		else if(this._state === IN_STYLE){
-			this._state = TEXT;
-			this._index--;
-		}
-
 		else if(this._state === AFTER_STYLE_1){
 			this._stateAfterStyle1(c);
 		} else if(this._state === AFTER_STYLE_2){
@@ -881,11 +866,7 @@ Tokenizer.prototype._emitToken = function(name){
 };
 
 Tokenizer.prototype._emitPartial = function(value){
-	if(
-		this._baseState === IN_ATTRIBUTE_VALUE_NQ ||
-		this._baseState === IN_ATTRIBUTE_VALUE_SQ ||
-		this._baseState === IN_ATTRIBUTE_VALUE_DQ
-	) {
+	if(this._baseState !== TEXT){
 		this._cbs.onattribdata(value); //TODO implement the new event
 	} else {
 		this._cbs.ontext(value);

From effc3a9820a91ac34eb41ffca76d9947ff4dba1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 18 Aug 2013 22:23:46 +0200
Subject: [PATCH 424/450] 3.2.3

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index ed0396b..01af48c 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.2.2",
+	"version": "3.2.3",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From e4fb613ae17888d74545601e3054d6058b672f8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Wed, 21 Aug 2013 15:04:47 +0200
Subject: [PATCH 425/450] only respect self-closing tags in XML mode

---
 lib/Parser.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 30e4cd2..04de718 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -179,7 +179,7 @@ Parser.prototype.onselfclosingtag = function(){
 
 	//self-closing tags will be on the top of the stack
 	//(cheaper check than in onclosetag)
-	if(this._stack[this._stack.length-1] === name){
+	if(!this._options.xmlMode && this._stack[this._stack.length-1] === name){
 		if(this._cbs.onclosetag){
 			this._cbs.onclosetag(name);
 		}

From 80a1ecb72abd7a985cafb7145421389ab6a45021 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 22 Aug 2013 10:56:33 +0200
Subject: [PATCH 426/450] [parser] properly removed self-closing tag support

also replaced call to `Array#slice` with setting the stack's `length`
property
---
 lib/Parser.js | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/lib/Parser.js b/lib/Parser.js
index 04de718..e94e346 100644
--- a/lib/Parser.js
+++ b/lib/Parser.js
@@ -154,6 +154,7 @@ Parser.prototype.onclosetag = function(name){
 	if(!(this._options.xmlMode || "lowerCaseTags" in this._options) || this._options.lowerCaseTags){
 		name = name.toLowerCase();
 	}
+
 	if(this._stack.length && (!(name in voidElements) || this._options.xmlMode)){
 		var pos = this._stack.lastIndexOf(name);
 		if(pos !== -1){
@@ -161,25 +162,33 @@ Parser.prototype.onclosetag = function(name){
 				pos = this._stack.length - pos;
 				while(pos--) this._cbs.onclosetag(this._stack.pop());
 			}
-			else this._stack.splice(pos);
+			else this._stack.length = pos;
 		} else if(name === "p" && !this._options.xmlMode){
 			this.onopentagname(name);
-			this.onselfclosingtag();
+			this._closeCurrentTag();
 		}
 	} else if(!this._options.xmlMode && (name === "br" || name === "p")){
 		this.onopentagname(name);
-		this.onselfclosingtag();
+		this._closeCurrentTag();
 	}
 };
 
 Parser.prototype.onselfclosingtag = function(){
+	if(this._options.xmlMode){
+		this._closeCurrentTag();
+	} else {
+		this.onopentagend();
+	}
+};
+
+Parser.prototype._closeCurrentTag = function(){
 	var name = this._tagname;
 
 	this.onopentagend();
 
 	//self-closing tags will be on the top of the stack
 	//(cheaper check than in onclosetag)
-	if(!this._options.xmlMode && this._stack[this._stack.length-1] === name){
+	if(this._stack[this._stack.length-1] === name){
 		if(this._cbs.onclosetag){
 			this._cbs.onclosetag(name);
 		}

From 0347cd7c7631250916ca5a0942c1b7ef939431b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 22 Aug 2013 11:59:04 +0200
Subject: [PATCH 427/450] [tests] read files in the tests file, improved os
 interoperability of stream test

---
 tests/00-runtests.js            | 20 +++++---------------
 tests/01-events.js              |  8 ++++----
 tests/02-stream.js              | 21 +++++++++++++--------
 tests/03-feed.js                |  9 +++++----
 tests/Stream/01-basic.json      |  2 +-
 tests/Stream/02-RSS.json        |  2 +-
 tests/Stream/03-Atom.json       |  2 +-
 tests/Stream/04-RDF.json        |  2 +-
 tests/Stream/05-Attributes.json |  2 +-
 tests/test-helper.js            | 33 +++++++++++++++++++++++++++++----
 10 files changed, 61 insertions(+), 40 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index c8c51f7..ac2d100 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -1,5 +1,4 @@
-var fs = require("fs"),
-    path = require("path"),
+var helper = require("./test-helper.js"),
     assert = require("assert");
 
 var runCount = 0,
@@ -13,27 +12,18 @@ var runCount = 0,
 ]
 .map(require)
 .forEach(function (test){
-	console.log("\nStarting", test.dir, "\n----");
+	console.log("\nStarting", test.name, "\n----");
 
-	var dir = path.resolve(__dirname, test.dir);
-
-	//read files, load them, run them
-	fs
-	.readdirSync(dir)
-	.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
-	.map(function(name){
-		return path.resolve(dir, name);
-	})
-	.map(require)
+	test.files
 	.forEach(function(file){
 		runCount++;
 		
 		console.log("Testing:", file.name);
 		
 		var second = false; //every test runs twice
-		test.test(file, function(err, dom){
+		test(file, function(err, dom){
 			assert.ifError(err);
-			assert.deepEqual(file.expected, dom, "didn't get expected output");
+			helper.deepEqual(file.expected, dom, "didn't get expected output");
 						
 			if(second){
 				testCount++;
diff --git a/tests/01-events.js b/tests/01-events.js
index 16dbfc7..bf41671 100644
--- a/tests/01-events.js
+++ b/tests/01-events.js
@@ -1,11 +1,11 @@
 var helper = require("./test-helper.js");
 
-exports.dir = "Events";
-
-exports.test = function(test, cb){
+module.exports = function events(test, cb){
 	helper.writeToParser(
 		helper.getEventCollector(cb),
 		test.options.parser,
 		test.html
 	);
-};
\ No newline at end of file
+};
+
+module.exports.files = helper.readFiles(__dirname, "Events");
\ No newline at end of file
diff --git a/tests/02-stream.js b/tests/02-stream.js
index fc22fed..9f1accd 100644
--- a/tests/02-stream.js
+++ b/tests/02-stream.js
@@ -1,11 +1,11 @@
 var helper = require("./test-helper.js"),
 	Stream = require("..").WritableStream,
-	fs = require("fs");
+	fs = require("fs"),
+	path = require("path");
 
-exports.dir = "Stream";
-
-exports.test = function(test, cb){
-	fs.createReadStream(__dirname + test.file).pipe(
+module.exports = function streams(test, cb){
+	var filePath = path.join(__dirname, "Documents", test.file);
+	fs.createReadStream(filePath).pipe(
 		new Stream(
 			helper.getEventCollector(function(err, events){
 				cb(err, events);
@@ -13,8 +13,13 @@ exports.test = function(test, cb){
 				var handler = helper.getEventCollector(cb),
 				    stream = new Stream(handler, test.options);
 
-				stream.end(fs.readFileSync(__dirname + test.file));
+				fs.readFile(filePath, function(err, data){
+					if(err) throw err;
+					else stream.end(data);
+				});
 			}
 		), test.options)
-	);
-};
\ No newline at end of file
+	).on("error", cb);
+};
+
+module.exports.files = helper.readFiles(__dirname, "Stream");
\ No newline at end of file
diff --git a/tests/03-feed.js b/tests/03-feed.js
index 230f925..bf6f9c6 100644
--- a/tests/03-feed.js
+++ b/tests/03-feed.js
@@ -3,17 +3,18 @@
 var helper = require("./test-helper.js"),
 	FeedHandler = require("../lib/FeedHandler.js"),
 	fs = require("fs"),
+	files = helper.readFiles(__dirname, "Feeds"),
 	parserOpts = {
 		xmlMode: true
 	};
 
-exports.dir = "Feeds";
-
-exports.test = function(test, cb){
+module.exports = function feeds(test, cb){
 	var handler = new FeedHandler(function(err, dom){
 		if(err) cb(err, 0); //return the error
 		else cb(null, dom);
 	});
 	var file = fs.readFileSync(__dirname + "/Documents/" + test.file).toString();
 	helper.writeToParser(handler, parserOpts, file);
-};
\ No newline at end of file
+};
+
+module.exports.files = files;
\ No newline at end of file
diff --git a/tests/Stream/01-basic.json b/tests/Stream/01-basic.json
index 9ae3e3f..e0766e7 100644
--- a/tests/Stream/01-basic.json
+++ b/tests/Stream/01-basic.json
@@ -1,7 +1,7 @@
 {
   "name": "Basic html",
   "options": {},
-  "file": "/Documents/Basic.html",
+  "file": "Basic.html",
   "expected": [
     {
       "event": "processinginstruction",
diff --git a/tests/Stream/02-RSS.json b/tests/Stream/02-RSS.json
index c615326..0d5921c 100644
--- a/tests/Stream/02-RSS.json
+++ b/tests/Stream/02-RSS.json
@@ -1,7 +1,7 @@
 {
   "name": "RSS feed",
   "options": {"xmlMode": true},
-  "file": "/Documents/RSS_Example.xml",
+  "file": "RSS_Example.xml",
   "expected": [
     {
       "event": "processinginstruction",
diff --git a/tests/Stream/03-Atom.json b/tests/Stream/03-Atom.json
index 965a538..4c81b24 100644
--- a/tests/Stream/03-Atom.json
+++ b/tests/Stream/03-Atom.json
@@ -1,7 +1,7 @@
 {
   "name": "Atom feed",
   "options": {"xmlMode": true},
-  "file": "/Documents/Atom_Example.xml",
+  "file": "Atom_Example.xml",
   "expected": [
     {
       "event": "processinginstruction",
diff --git a/tests/Stream/04-RDF.json b/tests/Stream/04-RDF.json
index b4d0f18..7ebf516 100644
--- a/tests/Stream/04-RDF.json
+++ b/tests/Stream/04-RDF.json
@@ -1,7 +1,7 @@
 {
   "name": "RDF feed",
   "options": {"xmlMode": true},
-  "file": "/Documents/RDF_Example.xml",
+  "file": "RDF_Example.xml",
   "expected": [
     {
       "event": "processinginstruction",
diff --git a/tests/Stream/05-Attributes.json b/tests/Stream/05-Attributes.json
index a1bd06d..ad364c0 100644
--- a/tests/Stream/05-Attributes.json
+++ b/tests/Stream/05-Attributes.json
@@ -1,7 +1,7 @@
 {
   "name": "Attributes",
   "options": {},
-  "file": "/Documents/Attributes.html",
+  "file": "Attributes.html",
   "expected": [
     {
       "event": "processinginstruction",
diff --git a/tests/test-helper.js b/tests/test-helper.js
index ccf9804..6b27395 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -1,6 +1,9 @@
-var htmlparser = require(".."),
-	Parser = htmlparser.Parser,
-	CollectingHandler = htmlparser.CollectingHandler,
+var htmlparser2 = require(".."),
+    fs = require("fs"),
+    path = require("path"),
+    assert = require("assert"),
+	Parser = htmlparser2.Parser,
+	CollectingHandler = htmlparser2.CollectingHandler,
 	chunkSize = 5;
 
 exports.writeToParser = function(handler, options, data){
@@ -24,7 +27,7 @@ exports.getEventCollector = function(cb){
 					events[events.length-1].data[0] += arr[1];
 				} else {
 					events.push({
-						event: arr[0].slice(2),
+						event: arr[0].substr(2),
 						data: arr.slice(1)
 					});
 				}
@@ -35,4 +38,26 @@ exports.getEventCollector = function(cb){
 	}});
 
 	return handler;
+};
+
+exports.readFiles = function(root, folder){
+	var dir = path.join(root, folder);
+
+	return fs
+			.readdirSync(dir)
+			.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
+			.map(function(name){
+				return path.join(dir, name);
+			})
+			.map(require);
+};
+
+exports.deepEqual = function(expected, actual, message){
+	try {
+		assert.deepEqual(expected, actual, message);
+	} catch(e){
+		e.expected = JSON.stringify(expected, null, 2);
+		e.actual = JSON.stringify(actual, null, 2);
+		throw e;
+	}
 };
\ No newline at end of file

From be0dafa77e9b90821e2f9ed538f626b45995ad00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 22 Aug 2013 12:33:51 +0200
Subject: [PATCH 428/450] [tests] added helper.getCallback method

---
 tests/00-runtests.js | 16 +++++-----------
 tests/test-helper.js | 16 +++++++++++++++-
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/tests/00-runtests.js b/tests/00-runtests.js
index ac2d100..78aec97 100644
--- a/tests/00-runtests.js
+++ b/tests/00-runtests.js
@@ -21,18 +21,12 @@ var runCount = 0,
 		console.log("Testing:", file.name);
 		
 		var second = false; //every test runs twice
-		test(file, function(err, dom){
-			assert.ifError(err);
-			helper.deepEqual(file.expected, dom, "didn't get expected output");
-						
-			if(second){
-				testCount++;
-				if(!--runCount && done){
-					console.log("Total tests:", testCount);
-				}
+		test(file, helper.getCallback(file.expected, function(){
+			testCount++;
+			if(!--runCount && done){
+				console.log("Total tests:", testCount);
 			}
-			else second = true;
-		});
+		}));
 	});
 });
 
diff --git a/tests/test-helper.js b/tests/test-helper.js
index 6b27395..b78b15f 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -52,7 +52,7 @@ exports.readFiles = function(root, folder){
 			.map(require);
 };
 
-exports.deepEqual = function(expected, actual, message){
+function deepEqual(expected, actual, message){
 	try {
 		assert.deepEqual(expected, actual, message);
 	} catch(e){
@@ -60,4 +60,18 @@ exports.deepEqual = function(expected, actual, message){
 		e.actual = JSON.stringify(actual, null, 2);
 		throw e;
 	}
+}
+
+exports.deepEqual = deepEqual;
+
+exports.getCallback = function(expected, done){
+	var repeated = false;
+
+	return function(err, dom){
+		assert.ifError(err);
+		deepEqual(expected, dom, "didn't get expected output");
+
+		if(repeated) done();
+		else repeated = true;
+	};
 };
\ No newline at end of file

From b948e864297a619cff373ebf0a397eb677b8cc6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 25 Aug 2013 14:13:33 +0200
Subject: [PATCH 429/450] [tests] converted tests to mocha

---
 tests/01-events.js   |  6 ++---
 tests/02-stream.js   |  6 ++---
 tests/03-feed.js     | 24 +++++++++---------
 tests/test-helper.js | 59 ++++++++++++++++++++++++--------------------
 4 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/tests/01-events.js b/tests/01-events.js
index bf41671..a3c7cf3 100644
--- a/tests/01-events.js
+++ b/tests/01-events.js
@@ -1,11 +1,9 @@
 var helper = require("./test-helper.js");
 
-module.exports = function events(test, cb){
+helper.mochaTest("Events", __dirname, function(test, cb){
 	helper.writeToParser(
 		helper.getEventCollector(cb),
 		test.options.parser,
 		test.html
 	);
-};
-
-module.exports.files = helper.readFiles(__dirname, "Events");
\ No newline at end of file
+});
\ No newline at end of file
diff --git a/tests/02-stream.js b/tests/02-stream.js
index 9f1accd..3403980 100644
--- a/tests/02-stream.js
+++ b/tests/02-stream.js
@@ -3,7 +3,7 @@ var helper = require("./test-helper.js"),
 	fs = require("fs"),
 	path = require("path");
 
-module.exports = function streams(test, cb){
+helper.mochaTest("Stream", __dirname, function(test, cb){
 	var filePath = path.join(__dirname, "Documents", test.file);
 	fs.createReadStream(filePath).pipe(
 		new Stream(
@@ -20,6 +20,4 @@ module.exports = function streams(test, cb){
 			}
 		), test.options)
 	).on("error", cb);
-};
-
-module.exports.files = helper.readFiles(__dirname, "Stream");
\ No newline at end of file
+});
\ No newline at end of file
diff --git a/tests/03-feed.js b/tests/03-feed.js
index bf6f9c6..ca1e048 100644
--- a/tests/03-feed.js
+++ b/tests/03-feed.js
@@ -3,18 +3,20 @@
 var helper = require("./test-helper.js"),
 	FeedHandler = require("../lib/FeedHandler.js"),
 	fs = require("fs"),
-	files = helper.readFiles(__dirname, "Feeds"),
+	path = require("path"),
 	parserOpts = {
 		xmlMode: true
 	};
 
-module.exports = function feeds(test, cb){
-	var handler = new FeedHandler(function(err, dom){
-		if(err) cb(err, 0); //return the error
-		else cb(null, dom);
-	});
-	var file = fs.readFileSync(__dirname + "/Documents/" + test.file).toString();
-	helper.writeToParser(handler, parserOpts, file);
-};
-
-module.exports.files = files;
\ No newline at end of file
+helper.mochaTest("Feeds", __dirname, function(test, cb){
+	var file = fs.readFile(
+		path.join(__dirname, "Documents", test.file),
+		function(err, file){
+			helper.writeToParser(
+				new FeedHandler(cb),
+				parserOpts,
+				file.toString()
+			);
+		}
+	);
+});
\ No newline at end of file
diff --git a/tests/test-helper.js b/tests/test-helper.js
index b78b15f..db1a623 100644
--- a/tests/test-helper.js
+++ b/tests/test-helper.js
@@ -40,38 +40,43 @@ exports.getEventCollector = function(cb){
 	return handler;
 };
 
-exports.readFiles = function(root, folder){
-	var dir = path.join(root, folder);
-
-	return fs
-			.readdirSync(dir)
-			.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
-			.map(function(name){
-				return path.join(dir, name);
-			})
-			.map(require);
-};
-
-function deepEqual(expected, actual, message){
-	try {
-		assert.deepEqual(expected, actual, message);
-	} catch(e){
-		e.expected = JSON.stringify(expected, null, 2);
-		e.actual = JSON.stringify(actual, null, 2);
-		throw e;
-	}
-}
-
-exports.deepEqual = deepEqual;
-
-exports.getCallback = function(expected, done){
+function getCallback(expected, done){
 	var repeated = false;
 
-	return function(err, dom){
+	return function(err, actual){
 		assert.ifError(err);
-		deepEqual(expected, dom, "didn't get expected output");
+		try {
+			assert.deepEqual(expected, actual, "didn't get expected output");
+		} catch(e){
+			e.expected = JSON.stringify(expected, null, 2);
+			e.actual = JSON.stringify(actual, null, 2);
+			throw e;
+		}
 
 		if(repeated) done();
 		else repeated = true;
 	};
+}
+
+exports.mochaTest = function(name, root, test){
+	describe(name, readDir);
+
+	function readDir(cb){
+		var dir = path.join(root, name);
+
+		fs
+		.readdirSync(dir)
+		.filter(RegExp.prototype.test, /^[^\._]/) //ignore all files with a leading dot or underscore
+		.map(function(name){
+			return path.join(dir, name);
+		})
+		.map(require)
+		.forEach(runTest);
+	}
+
+	function runTest(file){
+		it(file.name, function(done){
+			test(file, getCallback(file.expected, done));
+		});
+	}
 };
\ No newline at end of file

From 8737bf11ce7592446fb28d6a48ccee618d5ae9aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 25 Aug 2013 14:15:09 +0200
Subject: [PATCH 430/450] [tests] renamed tests dir to `test`

as required by mocha
---
 .DS_Store                                     | Bin 0 -> 6148 bytes
 test/.DS_Store                                | Bin 0 -> 6148 bytes
 {tests => test}/01-events.js                  |   0
 {tests => test}/02-stream.js                  |   0
 {tests => test}/03-feed.js                    |   0
 {tests => test}/Documents/Atom_Example.xml    |   0
 {tests => test}/Documents/Attributes.html     |   0
 {tests => test}/Documents/Basic.html          |   0
 {tests => test}/Documents/RDF_Example.xml     |   0
 {tests => test}/Documents/RSS_Example.xml     |   0
 {tests => test}/Events/01-simple.json         |   0
 {tests => test}/Events/02-template.json       |   0
 {tests => test}/Events/03-lowercase_tags.json |   0
 {tests => test}/Events/04-cdata.json          |   0
 {tests => test}/Events/05-cdata-special.json  |   0
 {tests => test}/Events/06-leading-lt.json     |   0
 {tests => test}/Events/07-self-closing.json   |   0
 .../Events/08-implicit-close-tags.json        |   0
 {tests => test}/Events/09-attributes.json     |   0
 {tests => test}/Events/10-crazy-attrib.json   |   0
 .../Events/11-script_in_script.json           |   0
 .../Events/12-long-comment-end.json           |   0
 {tests => test}/Events/13-long-cdata-end.json |   0
 .../Events/14-implicit-open-tags.json         |   0
 {tests => test}/Events/15-lt-whitespace.json  |   0
 {tests => test}/Events/16-double_attribs.json |   0
 .../Events/17-numeric_entities.json           |   0
 .../Events/18-legacy_entities.json            |   0
 {tests => test}/Events/19-named_entities.json |   0
 {tests => test}/Events/20-xml_entities.json   |   0
 .../Events/21-entity_in_attribute.json        |   0
 {tests => test}/Feeds/01-rss.js               |   0
 {tests => test}/Feeds/02-atom.js              |   0
 {tests => test}/Feeds/03-rdf.js               |   0
 {tests => test}/Stream/01-basic.json          |   0
 {tests => test}/Stream/02-RSS.json            |   0
 {tests => test}/Stream/03-Atom.json           |   0
 {tests => test}/Stream/04-RDF.json            |   0
 {tests => test}/Stream/05-Attributes.json     |   0
 {tests => test}/test-helper.js                |   0
 tests/00-runtests.js                          |  33 ------------------
 41 files changed, 33 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 test/.DS_Store
 rename {tests => test}/01-events.js (100%)
 rename {tests => test}/02-stream.js (100%)
 rename {tests => test}/03-feed.js (100%)
 rename {tests => test}/Documents/Atom_Example.xml (100%)
 rename {tests => test}/Documents/Attributes.html (100%)
 rename {tests => test}/Documents/Basic.html (100%)
 rename {tests => test}/Documents/RDF_Example.xml (100%)
 rename {tests => test}/Documents/RSS_Example.xml (100%)
 rename {tests => test}/Events/01-simple.json (100%)
 rename {tests => test}/Events/02-template.json (100%)
 rename {tests => test}/Events/03-lowercase_tags.json (100%)
 rename {tests => test}/Events/04-cdata.json (100%)
 rename {tests => test}/Events/05-cdata-special.json (100%)
 rename {tests => test}/Events/06-leading-lt.json (100%)
 rename {tests => test}/Events/07-self-closing.json (100%)
 rename {tests => test}/Events/08-implicit-close-tags.json (100%)
 rename {tests => test}/Events/09-attributes.json (100%)
 rename {tests => test}/Events/10-crazy-attrib.json (100%)
 rename {tests => test}/Events/11-script_in_script.json (100%)
 rename {tests => test}/Events/12-long-comment-end.json (100%)
 rename {tests => test}/Events/13-long-cdata-end.json (100%)
 rename {tests => test}/Events/14-implicit-open-tags.json (100%)
 rename {tests => test}/Events/15-lt-whitespace.json (100%)
 rename {tests => test}/Events/16-double_attribs.json (100%)
 rename {tests => test}/Events/17-numeric_entities.json (100%)
 rename {tests => test}/Events/18-legacy_entities.json (100%)
 rename {tests => test}/Events/19-named_entities.json (100%)
 rename {tests => test}/Events/20-xml_entities.json (100%)
 rename {tests => test}/Events/21-entity_in_attribute.json (100%)
 rename {tests => test}/Feeds/01-rss.js (100%)
 rename {tests => test}/Feeds/02-atom.js (100%)
 rename {tests => test}/Feeds/03-rdf.js (100%)
 rename {tests => test}/Stream/01-basic.json (100%)
 rename {tests => test}/Stream/02-RSS.json (100%)
 rename {tests => test}/Stream/03-Atom.json (100%)
 rename {tests => test}/Stream/04-RDF.json (100%)
 rename {tests => test}/Stream/05-Attributes.json (100%)
 rename {tests => test}/test-helper.js (100%)
 delete mode 100644 tests/00-runtests.js

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
GIT binary patch
literal 6148
zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3
zem<@ulZcFPQ@L2!n>{z**<q8>++&mCkOWA81W14cNZ<zv;LbK1Poaz?KmsK2CSc!(
z0ynLxE!0092;Krf2c+FF_Fe*7ECH>lEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ
zLs35+`xjp>T0<F0fCPF1$Cyrb|F7^5{eNG?83~ZUUlGt@xh*qZDeu<Z%US-OSsOPv
j)R!Z4KLME7ReXlK;d!wEw5GODWMKRea10D2@KpjYNUI8I

literal 0
HcmV?d00001

diff --git a/test/.DS_Store b/test/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
GIT binary patch
literal 6148
zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3
zem<@ulZcFPQ@L2!n>{z**<q8>++&mCkOWA81W14cNZ<zv;LbK1Poaz?KmsK2CSc!(
z0ynLxE!0092;Krf2c+FF_Fe*7ECH>lEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ
zLs35+`xjp>T0<F0fCPF1$Cyrb|F7^5{eNG?83~ZUUlGt@xh*qZDeu<Z%US-OSsOPv
j)R!Z4KLME7ReXlK;d!wEw5GODWMKRea10D2@KpjYNUI8I

literal 0
HcmV?d00001

diff --git a/tests/01-events.js b/test/01-events.js
similarity index 100%
rename from tests/01-events.js
rename to test/01-events.js
diff --git a/tests/02-stream.js b/test/02-stream.js
similarity index 100%
rename from tests/02-stream.js
rename to test/02-stream.js
diff --git a/tests/03-feed.js b/test/03-feed.js
similarity index 100%
rename from tests/03-feed.js
rename to test/03-feed.js
diff --git a/tests/Documents/Atom_Example.xml b/test/Documents/Atom_Example.xml
similarity index 100%
rename from tests/Documents/Atom_Example.xml
rename to test/Documents/Atom_Example.xml
diff --git a/tests/Documents/Attributes.html b/test/Documents/Attributes.html
similarity index 100%
rename from tests/Documents/Attributes.html
rename to test/Documents/Attributes.html
diff --git a/tests/Documents/Basic.html b/test/Documents/Basic.html
similarity index 100%
rename from tests/Documents/Basic.html
rename to test/Documents/Basic.html
diff --git a/tests/Documents/RDF_Example.xml b/test/Documents/RDF_Example.xml
similarity index 100%
rename from tests/Documents/RDF_Example.xml
rename to test/Documents/RDF_Example.xml
diff --git a/tests/Documents/RSS_Example.xml b/test/Documents/RSS_Example.xml
similarity index 100%
rename from tests/Documents/RSS_Example.xml
rename to test/Documents/RSS_Example.xml
diff --git a/tests/Events/01-simple.json b/test/Events/01-simple.json
similarity index 100%
rename from tests/Events/01-simple.json
rename to test/Events/01-simple.json
diff --git a/tests/Events/02-template.json b/test/Events/02-template.json
similarity index 100%
rename from tests/Events/02-template.json
rename to test/Events/02-template.json
diff --git a/tests/Events/03-lowercase_tags.json b/test/Events/03-lowercase_tags.json
similarity index 100%
rename from tests/Events/03-lowercase_tags.json
rename to test/Events/03-lowercase_tags.json
diff --git a/tests/Events/04-cdata.json b/test/Events/04-cdata.json
similarity index 100%
rename from tests/Events/04-cdata.json
rename to test/Events/04-cdata.json
diff --git a/tests/Events/05-cdata-special.json b/test/Events/05-cdata-special.json
similarity index 100%
rename from tests/Events/05-cdata-special.json
rename to test/Events/05-cdata-special.json
diff --git a/tests/Events/06-leading-lt.json b/test/Events/06-leading-lt.json
similarity index 100%
rename from tests/Events/06-leading-lt.json
rename to test/Events/06-leading-lt.json
diff --git a/tests/Events/07-self-closing.json b/test/Events/07-self-closing.json
similarity index 100%
rename from tests/Events/07-self-closing.json
rename to test/Events/07-self-closing.json
diff --git a/tests/Events/08-implicit-close-tags.json b/test/Events/08-implicit-close-tags.json
similarity index 100%
rename from tests/Events/08-implicit-close-tags.json
rename to test/Events/08-implicit-close-tags.json
diff --git a/tests/Events/09-attributes.json b/test/Events/09-attributes.json
similarity index 100%
rename from tests/Events/09-attributes.json
rename to test/Events/09-attributes.json
diff --git a/tests/Events/10-crazy-attrib.json b/test/Events/10-crazy-attrib.json
similarity index 100%
rename from tests/Events/10-crazy-attrib.json
rename to test/Events/10-crazy-attrib.json
diff --git a/tests/Events/11-script_in_script.json b/test/Events/11-script_in_script.json
similarity index 100%
rename from tests/Events/11-script_in_script.json
rename to test/Events/11-script_in_script.json
diff --git a/tests/Events/12-long-comment-end.json b/test/Events/12-long-comment-end.json
similarity index 100%
rename from tests/Events/12-long-comment-end.json
rename to test/Events/12-long-comment-end.json
diff --git a/tests/Events/13-long-cdata-end.json b/test/Events/13-long-cdata-end.json
similarity index 100%
rename from tests/Events/13-long-cdata-end.json
rename to test/Events/13-long-cdata-end.json
diff --git a/tests/Events/14-implicit-open-tags.json b/test/Events/14-implicit-open-tags.json
similarity index 100%
rename from tests/Events/14-implicit-open-tags.json
rename to test/Events/14-implicit-open-tags.json
diff --git a/tests/Events/15-lt-whitespace.json b/test/Events/15-lt-whitespace.json
similarity index 100%
rename from tests/Events/15-lt-whitespace.json
rename to test/Events/15-lt-whitespace.json
diff --git a/tests/Events/16-double_attribs.json b/test/Events/16-double_attribs.json
similarity index 100%
rename from tests/Events/16-double_attribs.json
rename to test/Events/16-double_attribs.json
diff --git a/tests/Events/17-numeric_entities.json b/test/Events/17-numeric_entities.json
similarity index 100%
rename from tests/Events/17-numeric_entities.json
rename to test/Events/17-numeric_entities.json
diff --git a/tests/Events/18-legacy_entities.json b/test/Events/18-legacy_entities.json
similarity index 100%
rename from tests/Events/18-legacy_entities.json
rename to test/Events/18-legacy_entities.json
diff --git a/tests/Events/19-named_entities.json b/test/Events/19-named_entities.json
similarity index 100%
rename from tests/Events/19-named_entities.json
rename to test/Events/19-named_entities.json
diff --git a/tests/Events/20-xml_entities.json b/test/Events/20-xml_entities.json
similarity index 100%
rename from tests/Events/20-xml_entities.json
rename to test/Events/20-xml_entities.json
diff --git a/tests/Events/21-entity_in_attribute.json b/test/Events/21-entity_in_attribute.json
similarity index 100%
rename from tests/Events/21-entity_in_attribute.json
rename to test/Events/21-entity_in_attribute.json
diff --git a/tests/Feeds/01-rss.js b/test/Feeds/01-rss.js
similarity index 100%
rename from tests/Feeds/01-rss.js
rename to test/Feeds/01-rss.js
diff --git a/tests/Feeds/02-atom.js b/test/Feeds/02-atom.js
similarity index 100%
rename from tests/Feeds/02-atom.js
rename to test/Feeds/02-atom.js
diff --git a/tests/Feeds/03-rdf.js b/test/Feeds/03-rdf.js
similarity index 100%
rename from tests/Feeds/03-rdf.js
rename to test/Feeds/03-rdf.js
diff --git a/tests/Stream/01-basic.json b/test/Stream/01-basic.json
similarity index 100%
rename from tests/Stream/01-basic.json
rename to test/Stream/01-basic.json
diff --git a/tests/Stream/02-RSS.json b/test/Stream/02-RSS.json
similarity index 100%
rename from tests/Stream/02-RSS.json
rename to test/Stream/02-RSS.json
diff --git a/tests/Stream/03-Atom.json b/test/Stream/03-Atom.json
similarity index 100%
rename from tests/Stream/03-Atom.json
rename to test/Stream/03-Atom.json
diff --git a/tests/Stream/04-RDF.json b/test/Stream/04-RDF.json
similarity index 100%
rename from tests/Stream/04-RDF.json
rename to test/Stream/04-RDF.json
diff --git a/tests/Stream/05-Attributes.json b/test/Stream/05-Attributes.json
similarity index 100%
rename from tests/Stream/05-Attributes.json
rename to test/Stream/05-Attributes.json
diff --git a/tests/test-helper.js b/test/test-helper.js
similarity index 100%
rename from tests/test-helper.js
rename to test/test-helper.js
diff --git a/tests/00-runtests.js b/tests/00-runtests.js
deleted file mode 100644
index 78aec97..0000000
--- a/tests/00-runtests.js
+++ /dev/null
@@ -1,33 +0,0 @@
-var helper = require("./test-helper.js"),
-    assert = require("assert");
-
-var runCount = 0,
-	testCount = 0,
-	done = false;
-
-[
- "./01-events.js",
- "./02-stream.js",
- "./03-feed.js"
-]
-.map(require)
-.forEach(function (test){
-	console.log("\nStarting", test.name, "\n----");
-
-	test.files
-	.forEach(function(file){
-		runCount++;
-		
-		console.log("Testing:", file.name);
-		
-		var second = false; //every test runs twice
-		test(file, helper.getCallback(file.expected, function(){
-			testCount++;
-			if(!--runCount && done){
-				console.log("Total tests:", testCount);
-			}
-		}));
-	});
-});
-
-var done = true; //started all tests
\ No newline at end of file

From 96a00fb0990d435160a8ea59e65a13b05b50db6c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sun, 25 Aug 2013 14:18:05 +0200
Subject: [PATCH 431/450] [package] run mocha as the test script

---
 package.json | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 01af48c..f50f38e 100644
--- a/package.json
+++ b/package.json
@@ -18,7 +18,7 @@
 	},
 	"main": "lib/index.js",
 	"scripts": {
-		"test": "node tests/00-runtests.js"
+		"test": "mocha -R spec"
 	},
 	"dependencies": {
 		"domhandler": "2.0",
@@ -26,6 +26,9 @@
 		"domelementtype": "1",
 		"readable-stream": "1.0"
 	},
+	"devDependencies": {
+		"mocha": "1"
+	},
 	"licenses": [{
 		"type": "MIT",
 		"url": "http://github.com/fb55/htmlparser2/raw/master/LICENSE"

From 41ad9141faf148c5b30729b21d84e58bd413bcb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Mon, 26 Aug 2013 14:01:24 +0200
Subject: [PATCH 432/450] Delete .DS_Store

---
 .DS_Store | Bin 6148 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 .DS_Store

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 5008ddfcf53c02e82d7eee2e57c38e5672ef89f6..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 6148
zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3
zem<@ulZcFPQ@L2!n>{z**<q8>++&mCkOWA81W14cNZ<zv;LbK1Poaz?KmsK2CSc!(
z0ynLxE!0092;Krf2c+FF_Fe*7ECH>lEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ
zLs35+`xjp>T0<F0fCPF1$Cyrb|F7^5{eNG?83~ZUUlGt@xh*qZDeu<Z%US-OSsOPv
j)R!Z4KLME7ReXlK;d!wEw5GODWMKRea10D2@KpjYNUI8I


From fc22b7d3d4f73e02125721aefd48aaa39cca3f28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Thu, 29 Aug 2013 00:24:02 +0200
Subject: [PATCH 433/450] [tokenizer] emit `onattribdata` in
 `_handleTrailingData`

fixes #66
---
 lib/Tokenizer.js | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index afd20ee..904737c 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -823,10 +823,9 @@ Tokenizer.prototype._handleTrailingData = function(){
 		this._cbs.onopentagend();
 	} else if(this._state === IN_ATTRIBUTE_NAME){
 		this._cbs.onattribname(data);
-	} else if(this._state === IN_ATTRIBUTE_VALUE_SQ || this._state === IN_ATTRIBUTE_VALUE_DQ){
-		this._cbs.onattribvalue(data.substr(1));
-	} else if(this._state === IN_ATTRIBUTE_VALUE_NQ){
-		this._cbs.onattribvalue(data);
+	} else if(this._state === IN_ATTRIBUTE_VALUE_SQ || this._state === IN_ATTRIBUTE_VALUE_DQ || this._state === IN_ATTRIBUTE_VALUE_NQ){
+		this._cbs.onattribdata(data);
+		this._cbs.onattribend();
 	} else if(this._state === IN_CLOSING_TAG_NAME){
 		this._cbs.onclosetag(data);
 	} else if(this._state === IN_NAMED_ENTITY && !this._xmlMode){

From 336af9bd97ce92478c6a2ac55d34447da9a9e472 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Mon, 26 Aug 2013 18:49:20 +0200
Subject: [PATCH 434/450] [tests] simplifications

---
 test/03-feed.js     |  9 +++------
 test/test-helper.js | 30 +++++++++++++++---------------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/test/03-feed.js b/test/03-feed.js
index ca1e048..8e78eb8 100644
--- a/test/03-feed.js
+++ b/test/03-feed.js
@@ -3,18 +3,15 @@
 var helper = require("./test-helper.js"),
 	FeedHandler = require("../lib/FeedHandler.js"),
 	fs = require("fs"),
-	path = require("path"),
-	parserOpts = {
-		xmlMode: true
-	};
+	path = require("path");
 
 helper.mochaTest("Feeds", __dirname, function(test, cb){
-	var file = fs.readFile(
+	fs.readFile(
 		path.join(__dirname, "Documents", test.file),
 		function(err, file){
 			helper.writeToParser(
 				new FeedHandler(cb),
-				parserOpts,
+				{ xmlMode: true },
 				file.toString()
 			);
 		}
diff --git a/test/test-helper.js b/test/test-helper.js
index db1a623..4ed993b 100644
--- a/test/test-helper.js
+++ b/test/test-helper.js
@@ -20,26 +20,26 @@ exports.writeToParser = function(handler, options, data){
 //returns a tree structure
 exports.getEventCollector = function(cb){
 	var handler = new CollectingHandler({onerror: cb, onend: function(){
-		cb(null, handler.events
-			.reduce(function(events, arr){
-				if(arr[0] === "onerror" || arr[0] === "onend");
-				else if(arr[0] === "ontext" && events.length && events[events.length-1].event === "text"){
-					events[events.length-1].data[0] += arr[1];
-				} else {
-					events.push({
-						event: arr[0].substr(2),
-						data: arr.slice(1)
-					});
-				}
-
-				return events;
-			}, [])
-		);
+		cb(null, handler.events.reduce(eventReducer, []));
 	}});
 
 	return handler;
 };
 
+function eventReducer(events, arr){
+	if(arr[0] === "onerror" || arr[0] === "onend");
+	else if(arr[0] === "ontext" && events.length && events[events.length-1].event === "text"){
+		events[events.length-1].data[0] += arr[1];
+	} else {
+		events.push({
+			event: arr[0].substr(2),
+			data: arr.slice(1)
+		});
+	}
+
+	return events;
+}
+
 function getCallback(expected, done){
 	var repeated = false;
 

From fc0918c0c202bbc8613cf3bb2e3df895aaa07711 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Thu, 29 Aug 2013 11:34:17 +0200
Subject: [PATCH 435/450] 3.2.4

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index f50f38e..e5e22c6 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.2.3",
+	"version": "3.2.4",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 7b1e4c994b2e6dd5705a66ca8edea772861ba8fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Fri, 30 Aug 2013 17:47:27 +0200
Subject: [PATCH 436/450] [readme] updated performance characteristics

---
 README.md | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 54cf642..9ed236d 100644
--- a/README.md
+++ b/README.md
@@ -58,17 +58,19 @@ new htmlparser.FeedHandler(function(<error> error, <object> feed){
 
 After having some artificial benchmarks for some time, __@AndreasMadsen__ published his [`htmlparser-benchmark`](https://github.com/AndreasMadsen/htmlparser-benchmark), which benchmarks HTML parses based on real-world websites.
 
-At the time of writing, the latest versions of all supported parsers show the following performance characteristics on my MacBook:
+At the time of writing, the latest versions of all supported parsers show the following performance characteristics on [Travis CI](https://travis-ci.org/AndreasMadsen/htmlparser-benchmark/builds/10805007) (please note that Travis doesn't guarantee equal conditions for all tests):
 
 ```
-gumbo-parser   : 28.9543 ms/file ± 15.9772
-html-parser    : 19.0935 ms/file ± 13.4118
-htmlparser     : 48.9674 ms/file ± 293.747
-hubbub         : 30.1816 ms/file ± 16.1811
-libxmljs       : 13.0610 ms/file ± 18.6695
-sax            : 44.5736 ms/file ± 22.6353
-htmlparser2-dom: 5.27927 ms/file ± 4.80156
-htmlparser2    : 3.56451 ms/file ± 2.51882
+gumbo-parser   : 34.9208 ms/file ± 21.4238
+html-parser    : 24.8224 ms/file ± 15.8703
+html5          : 419.597 ms/file ± 264.265
+htmlparser     : 60.0722 ms/file ± 384.844
+htmlparser2-dom: 12.0749 ms/file ± 6.49474
+htmlparser2    : 7.49130 ms/file ± 5.74368
+hubbub         : 30.4980 ms/file ± 16.4682
+libxmljs       : 14.1338 ms/file ± 18.6541
+parse5         : 22.0439 ms/file ± 15.3743
+sax            : 49.6513 ms/file ± 26.6032
 ```
 
 ##How is this different from [node-htmlparser](https://github.com/tautologistics/node-htmlparser)?

From 76643d33451fc40901390535409f848d7de2af45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Fri, 30 Aug 2013 19:16:49 +0200
Subject: [PATCH 437/450] [tokenizer] handle `<<` correctly

fixes MatthewMueller/cheerio#247
---
 lib/Tokenizer.js | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/Tokenizer.js b/lib/Tokenizer.js
index 904737c..ef98766 100644
--- a/lib/Tokenizer.js
+++ b/lib/Tokenizer.js
@@ -156,6 +156,9 @@ Tokenizer.prototype._stateBeforeTagName = function(c){
 	} else if(c === "?"){
 		this._state = IN_PROCESSING_INSTRUCTION;
 		this._sectionStart = this._index + 1;
+	} else if(c === "<"){
+		this._cbs.ontext(this._getSection());
+		this._sectionStart = this._index;
 	} else {
 		this._state = (!this._xmlMode && (c === "s" || c === "S")) ?
 						BEFORE_SPECIAL : IN_TAG_NAME;

From 2f2449183ee0d69be16ba4e8ccab447406627af7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Fri, 30 Aug 2013 19:17:58 +0200
Subject: [PATCH 438/450] 3.2.5

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index e5e22c6..3c5e9cb 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.2.4",
+	"version": "3.2.5",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],

From 834d6d259a94a61e8b280be5999d77ca1711262a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Sat, 31 Aug 2013 00:30:42 +0200
Subject: [PATCH 439/450] [tests] added test case for
 MatthewMueller/cheerio#247

---
 test/Events/22-double_brackets.json | 41 +++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 test/Events/22-double_brackets.json

diff --git a/test/Events/22-double_brackets.json b/test/Events/22-double_brackets.json
new file mode 100644
index 0000000..38a513b
--- /dev/null
+++ b/test/Events/22-double_brackets.json
@@ -0,0 +1,41 @@
+{
+  "name": "double brackets",
+  "options": {
+    "handler": {},
+    "parser": {}
+  },
+  "html": "<<princess-purpose>>testing</princess-purpose>",
+  "expected": [
+    {
+      "event": "text",
+      "data": [
+        "<"
+      ]
+    },
+    {
+      "event": "opentagname",
+      "data": [
+        "princess-purpose"
+      ]
+    },
+    {
+      "event": "opentag",
+      "data": [
+        "princess-purpose",
+        {}
+      ]
+    },
+    {
+      "event": "text",
+      "data": [
+        ">testing"
+      ]
+    },
+    {
+      "event": "closetag",
+      "data": [
+        "princess-purpose"
+      ]
+    }
+  ]
+}

From 994cfda4f65a2ba4ca3d566546758ea5afbf0b12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 4 Sep 2013 17:02:29 +0200
Subject: [PATCH 440/450] update to DomHandler@2.1, updated FeedHandler
 accordingly, bump

---
 lib/FeedHandler.js   | 41 +++++++++++++++++++++++------------------
 package.json         |  4 ++--
 test/Feeds/03-rdf.js | 12 ++++++------
 3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index a62e61c..8b7d32c 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -3,8 +3,8 @@ var index = require("./index.js"),
 	DomUtils = index.DomUtils;
 
 //TODO: make this a streamable handler
-function FeedHandler(callback){
-	this.init(callback, { ignoreWhitespace: true });
+function FeedHandler(callback, options){
+	this.init(callback, options);
 }
 
 require("util").inherits(FeedHandler, DomHandler);
@@ -20,7 +20,12 @@ function getOneElement(what, where){
 function fetch(what, where, recurse){
 	return DomUtils.getText(
 		DomUtils.getElementsByTagName(what, where, recurse, 1)
-	);
+	).trim();
+}
+
+function addConditionally(obj, prop, what, where, recurse){
+	var tmp = fetch(what, where, recurse);
+	if(tmp) obj[prop] = tmp;
 }
 
 var isValidFeed = function(value) {
@@ -37,22 +42,22 @@ FeedHandler.prototype.onend = function() {
 			childs = feedRoot.children;
 
 			feed.type = "atom";
-			if(tmp = fetch("id", childs)) feed.id = tmp;
-			if(tmp = fetch("title", childs)) feed.title = tmp;
+			addConditionally(feed, "id", "id", childs);
+			addConditionally(feed, "title", "title", childs);
 			if((tmp = getOneElement("link", childs)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
-			if(tmp = fetch("subtitle", childs)) feed.description = tmp;
+			addConditionally(feed, "description", "subtitle", childs);
 			if(tmp = fetch("updated", childs)) feed.updated = new Date(tmp);
-			if(tmp = fetch("email", childs, true)) feed.author = tmp;
+			addConditionally(feed, "author", "email", childs, true);
 
 			feed.items = getElements("entry", childs).map(function(item){
 				var entry = {}, tmp;
 
 				item = item.children;
 
-				if(tmp = fetch("id", item)) entry.id = tmp;
-				if(tmp = fetch("title", item)) entry.title = tmp;
+				addConditionally(entry, "id", "id", item);
+				addConditionally(entry, "title", "title", item);
 				if((tmp = getOneElement("link", item)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
-				if(tmp = fetch("summary", item)) entry.description = tmp;
+				addConditionally(entry, "description", "summary", item);
 				if(tmp = fetch("updated", item)) entry.pubDate = new Date(tmp);
 				return entry;
 			});
@@ -61,21 +66,21 @@ FeedHandler.prototype.onend = function() {
 
 			feed.type = feedRoot.name.substr(0, 3);
 			feed.id = "";
-			if(tmp = fetch("title", childs)) feed.title = tmp;
-			if(tmp = fetch("link", childs)) feed.link = tmp;
-			if(tmp = fetch("description", childs)) feed.description = tmp;
+			addConditionally(feed, "title", "title", childs);
+			addConditionally(feed, "link", "link", childs);
+			addConditionally(feed, "description", "description", childs);
 			if(tmp = fetch("lastBuildDate", childs)) feed.updated = new Date(tmp);
-			if(tmp = fetch("managingEditor", childs)) feed.author = tmp;
+			addConditionally(feed, "author", "managingEditor", childs, true);
 
 			feed.items = getElements("item", feedRoot.children).map(function(item){
 				var entry = {}, tmp;
 
 				item = item.children;
 
-				if(tmp = fetch("guid", item)) entry.id = tmp;
-				if(tmp = fetch("title", item)) entry.title = tmp;
-				if(tmp = fetch("link", item)) entry.link = tmp;
-				if(tmp = fetch("description", item)) entry.description = tmp;
+				addConditionally(entry, "id", "guid", item);
+				addConditionally(entry, "title", "title", item);
+				addConditionally(entry, "link", "link", item);
+				addConditionally(entry, "description", "description", item);
 				if(tmp = fetch("pubDate", item)) entry.pubDate = new Date(tmp);
 				return entry;
 			});
diff --git a/package.json b/package.json
index 3c5e9cb..8de5bf8 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.2.5",
+	"version": "3.3.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
@@ -21,7 +21,7 @@
 		"test": "mocha -R spec"
 	},
 	"dependencies": {
-		"domhandler": "2.0",
+		"domhandler": "2.1",
 		"domutils": "1.1",
 		"domelementtype": "1",
 		"readable-stream": "1.0"
diff --git a/test/Feeds/03-rdf.js b/test/Feeds/03-rdf.js
index 726d580..0f1cbc4 100644
--- a/test/Feeds/03-rdf.js
+++ b/test/Feeds/03-rdf.js
@@ -7,14 +7,14 @@ exports.expected = {
   "link": "http://sfbay.craigslist.org/ccc/",
   "items": [
     {
-      "title": " Music Equipment Repair and Consignment ",
-      "link": "\nhttp://sfbay.craigslist.org/sby/muc/2681301534.html\n",
-      "description": "\nSan Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->\n"
+      "title": "Music Equipment Repair and Consignment",
+      "link": "http://sfbay.craigslist.org/sby/muc/2681301534.html",
+      "description": "San Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->"
     },
     {
-      "title": "\nRide Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)\n",
-      "link": "\nhttp://sfbay.craigslist.org/eby/rid/2685010755.html\n",
-      "description": "\nIm offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->\n"
+      "title": "Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)",
+      "link": "http://sfbay.craigslist.org/eby/rid/2685010755.html",
+      "description": "Im offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->"
     }
   ]
 };
\ No newline at end of file

From 11eba281352940aef167b3c9ae852cde2f5ed380 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20Bo=CC=88hm?= <me@feedic.com>
Date: Wed, 4 Sep 2013 17:04:15 +0200
Subject: [PATCH 441/450] [tests] write only single characters for testing
 chunked data

failed previously (only for FeedHandler tests), fixed now due to
DomHandler upgrade (which removed the `ignoreWhitespace` option)
---
 test/test-helper.js | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/test/test-helper.js b/test/test-helper.js
index 4ed993b..5111259 100644
--- a/test/test-helper.js
+++ b/test/test-helper.js
@@ -3,14 +3,13 @@ var htmlparser2 = require(".."),
     path = require("path"),
     assert = require("assert"),
 	Parser = htmlparser2.Parser,
-	CollectingHandler = htmlparser2.CollectingHandler,
-	chunkSize = 5;
+	CollectingHandler = htmlparser2.CollectingHandler;
 
 exports.writeToParser = function(handler, options, data){
 	var parser = new Parser(handler, options);
 	//first, try to run the test via chunks
-	for(var i = 0; i < data.length; i += chunkSize){
-		parser.write(data.substr(i, chunkSize));
+	for(var i = 0; i < data.length; i++){
+		parser.write(data.charAt(i));
 	}
 	parser.end();
 	//then parse everything

From 029c5659f96b722e1dad282931e940672c41494e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Sun, 20 Oct 2013 19:42:52 +0200
Subject: [PATCH 442/450] [package] require domutils@1.2

as requested in fb55/CSSselect#11
---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 8de5bf8..0f0fab5 100644
--- a/package.json
+++ b/package.json
@@ -22,7 +22,7 @@
 	},
 	"dependencies": {
 		"domhandler": "2.1",
-		"domutils": "1.1",
+		"domutils": "1.2",
 		"domelementtype": "1",
 		"readable-stream": "1.0"
 	},

From e6418c2a1245316cb6d6b2f2f83233768736939c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Fri, 22 Nov 2013 11:05:31 +0100
Subject: [PATCH 443/450] package: update readable-stream

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 0f0fab5..d216f24 100644
--- a/package.json
+++ b/package.json
@@ -24,7 +24,7 @@
 		"domhandler": "2.1",
 		"domutils": "1.2",
 		"domelementtype": "1",
-		"readable-stream": "1.0"
+		"readable-stream": "1.1"
 	},
 	"devDependencies": {
 		"mocha": "1"

From 0e5775cca6c39c0366847f312075a3c696be0393 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Fri, 22 Nov 2013 11:09:01 +0100
Subject: [PATCH 444/450] package: use simple `license` field

---
 package.json | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/package.json b/package.json
index d216f24..375fc1c 100644
--- a/package.json
+++ b/package.json
@@ -29,8 +29,5 @@
 	"devDependencies": {
 		"mocha": "1"
 	},
-	"licenses": [{
-		"type": "MIT",
-		"url": "http://github.com/fb55/htmlparser2/raw/master/LICENSE"
-	}]
+	"license": "MIT"
 }

From 2c568d36d4af04de7754e3194429c238f3793acf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 26 Nov 2013 17:01:14 +0100
Subject: [PATCH 445/450] replace non-breaking space with regular space

as requested in #70
---
 lib/CollectingHandler.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/CollectingHandler.js b/lib/CollectingHandler.js
index 8d63305..93d97a7 100644
--- a/lib/CollectingHandler.js
+++ b/lib/CollectingHandler.js
@@ -1,7 +1,7 @@
 module.exports = CollectingHandler;
 
 function CollectingHandler(cbs){
-	this._cbs = cbs || {};
+	this._cbs = cbs || {};
 	this.events = [];
 }
 
@@ -52,4 +52,4 @@ CollectingHandler.prototype.restart = function(){
 			}
 		}
 	}
-};
\ No newline at end of file
+};

From c9d4abee59902a9c5321c21c3a6411ef1d4ad4c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 10 Dec 2013 12:53:57 +0100
Subject: [PATCH 446/450] index: pass `options` argument to constructors

---
 lib/index.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/index.js b/lib/index.js
index 7f6f649..9fa2761 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -43,8 +43,8 @@ module.exports = {
 		return handler.dom;
 	},
 	parseFeed: function(feed, options){
-		var handler = new module.exports.FeedHandler();
-		var parser = new Parser(handler);
+		var handler = new module.exports.FeedHandler(options);
+		var parser = new Parser(handler, options);
 		parser.end(feed);
 		return handler.dom;
 	},

From 298546c26bbddb94d53c84b2dc07b8ed81e9175a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 10 Dec 2013 12:56:33 +0100
Subject: [PATCH 447/450] tests: remove unused `cb` argument

---
 test/test-helper.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/test-helper.js b/test/test-helper.js
index 5111259..6a33cd6 100644
--- a/test/test-helper.js
+++ b/test/test-helper.js
@@ -60,7 +60,7 @@ function getCallback(expected, done){
 exports.mochaTest = function(name, root, test){
 	describe(name, readDir);
 
-	function readDir(cb){
+	function readDir(){
 		var dir = path.join(root, name);
 
 		fs
@@ -78,4 +78,4 @@ exports.mochaTest = function(name, root, test){
 			test(file, getCallback(file.expected, done));
 		});
 	}
-};
\ No newline at end of file
+};

From f9bc72fac2a89ab69d519c963a6c75e46a7d2dc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 10 Dec 2013 13:01:58 +0100
Subject: [PATCH 448/450] feedhandler: wrap assignments

---
 lib/FeedHandler.js | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/FeedHandler.js b/lib/FeedHandler.js
index 8b7d32c..72a044b 100644
--- a/lib/FeedHandler.js
+++ b/lib/FeedHandler.js
@@ -46,7 +46,7 @@ FeedHandler.prototype.onend = function() {
 			addConditionally(feed, "title", "title", childs);
 			if((tmp = getOneElement("link", childs)) && (tmp = tmp.attribs) && (tmp = tmp.href)) feed.link = tmp;
 			addConditionally(feed, "description", "subtitle", childs);
-			if(tmp = fetch("updated", childs)) feed.updated = new Date(tmp);
+			if((tmp = fetch("updated", childs))) feed.updated = new Date(tmp);
 			addConditionally(feed, "author", "email", childs, true);
 
 			feed.items = getElements("entry", childs).map(function(item){
@@ -58,7 +58,7 @@ FeedHandler.prototype.onend = function() {
 				addConditionally(entry, "title", "title", item);
 				if((tmp = getOneElement("link", item)) && (tmp = tmp.attribs) && (tmp = tmp.href)) entry.link = tmp;
 				addConditionally(entry, "description", "summary", item);
-				if(tmp = fetch("updated", item)) entry.pubDate = new Date(tmp);
+				if((tmp = fetch("updated", item))) entry.pubDate = new Date(tmp);
 				return entry;
 			});
 		} else{
@@ -69,7 +69,7 @@ FeedHandler.prototype.onend = function() {
 			addConditionally(feed, "title", "title", childs);
 			addConditionally(feed, "link", "link", childs);
 			addConditionally(feed, "description", "description", childs);
-			if(tmp = fetch("lastBuildDate", childs)) feed.updated = new Date(tmp);
+			if((tmp = fetch("lastBuildDate", childs))) feed.updated = new Date(tmp);
 			addConditionally(feed, "author", "managingEditor", childs, true);
 
 			feed.items = getElements("item", feedRoot.children).map(function(item){
@@ -81,7 +81,7 @@ FeedHandler.prototype.onend = function() {
 				addConditionally(entry, "title", "title", item);
 				addConditionally(entry, "link", "link", item);
 				addConditionally(entry, "description", "description", item);
-				if(tmp = fetch("pubDate", item)) entry.pubDate = new Date(tmp);
+				if((tmp = fetch("pubDate", item))) entry.pubDate = new Date(tmp);
 				return entry;
 			});
 		}

From 5f244df1ac8daffb0c141584165c3c8ab734bae5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Tue, 10 Dec 2013 13:04:34 +0100
Subject: [PATCH 449/450] tests: changed indentation to tabs

---
 test/Feeds/03-rdf.js | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/test/Feeds/03-rdf.js b/test/Feeds/03-rdf.js
index 0f1cbc4..b38ee13 100644
--- a/test/Feeds/03-rdf.js
+++ b/test/Feeds/03-rdf.js
@@ -1,20 +1,20 @@
 exports.name = "RDF test";
 exports.file = "/RDF_Example.xml";
 exports.expected = {
-  "type": "rdf",
-  "id": "",
-  "title": "craigslist | all community in SF bay area",
-  "link": "http://sfbay.craigslist.org/ccc/",
-  "items": [
-    {
-      "title": "Music Equipment Repair and Consignment",
-      "link": "http://sfbay.craigslist.org/sby/muc/2681301534.html",
-      "description": "San Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->"
-    },
-    {
-      "title": "Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)",
-      "link": "http://sfbay.craigslist.org/eby/rid/2685010755.html",
-      "description": "Im offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->"
-    }
-  ]
-};
\ No newline at end of file
+	"type": "rdf",
+	"id": "",
+	"title": "craigslist | all community in SF bay area",
+	"link": "http://sfbay.craigslist.org/ccc/",
+	"items": [
+		{
+			"title": "Music Equipment Repair and Consignment",
+			"link": "http://sfbay.craigslist.org/sby/muc/2681301534.html",
+			"description": "San Jose Rock Shop offers musical instrument repair and consignment! (408) 215-2065<br> <br> We are pleased to announce our NEW LOCATION: 1199 N 5th st. San Jose, ca 95112. Please call ahead, by appointment only.<br> <br> Recently featured by Metro Newspaper in their 2011 Best of the Silicon Valley edition see it online here:<br> <a href=\"http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html\" rel=\"nofollow\">http://www.metroactive.com/best-of-silicon-valley/2011/music-nightlife/editor-picks.html</a><br> <br> Guitar Set up (acoustic and electronic) $40!<!-- END CLTAGS -->"
+		},
+		{
+			"title": "Ride Offered - Oakland/BART to LA/SFV - TODAY 3PM 11/04 (oakland north / temescal)",
+			"link": "http://sfbay.craigslist.org/eby/rid/2685010755.html",
+			"description": "Im offering a lift for up to two people from Oakland (or near any BART station in the East Bay/580/880 Corridor, or San Jose/Morgan Hill, Gilroy) to the San Fernando Valley / Los Angeles area. Specifically, Im leaving from Oakland between 2:30 and 3:00pm (this is flexible, but if I leave too late my girlfriend will kill me), and heading to Woodland Hills via the 580, I-5, 405, and 101.<!-- END CLTAGS -->"
+		}
+	]
+};

From 7153b2711d6d298396dbd16987c5a34b81f9b999 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <me@feedic.com>
Date: Thu, 12 Dec 2013 15:18:02 +0100
Subject: [PATCH 450/450] package: updated dom module versions, 3.4.0

---
 package.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/package.json b/package.json
index 375fc1c..45d49d2 100644
--- a/package.json
+++ b/package.json
@@ -1,7 +1,7 @@
 {
 	"name": "htmlparser2",
 	"description": "Fast & forgiving HTML/XML/RSS parser",
-	"version": "3.3.0",
+	"version": "3.4.0",
 	"author": "Felix Boehm <me@feedic.com>",
 	"keywords": ["html", "parser", "streams", "xml", "dom", "rss", "feed", "atom"],
 	"contributors": ["Chris Winberry <chris@winberry.net>"],
@@ -21,8 +21,8 @@
 		"test": "mocha -R spec"
 	},
 	"dependencies": {
-		"domhandler": "2.1",
-		"domutils": "1.2",
+		"domhandler": "2.2",
+		"domutils": "1.3",
 		"domelementtype": "1",
 		"readable-stream": "1.1"
 	},