From f6acb50ee8654f1fa739a69b948a5282ce9541a5 Mon Sep 17 00:00:00 2001 From: Albert Yu Date: Fri, 17 Jul 2015 13:05:14 -0700 Subject: [PATCH 1/3] remove unnecessary end tag reset in matchEndTagWithStartTag --- src/context-parser.js | 15 ++++++++++++++- tests/unit/run-functions-spec.js | 4 ++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/context-parser.js b/src/context-parser.js index e08a809..83a2ed8 100644 --- a/src/context-parser.js +++ b/src/context-parser.js @@ -200,6 +200,7 @@ FastParser.prototype.walk = function(i, input, endsWithEOF) { FastParser.prototype.createStartTag = function (ch) { this.tagIdx = 0; this.tags[0] = ch; + this.tags[1] = ''; }; FastParser.prototype.createEndTag = function (ch) { @@ -225,8 +226,20 @@ FastParser.prototype.matchEndTagWithStartTag = function (symbol) { GREATER-THAN SIGN (>): If the current end tag token is an appropriate end tag token, then switch to the data state and emit the current tag token. Otherwise, treat it as per the 'anything else' entry below. */ + + /* + The reason we reset tags[0] (start tag) is that there is no trivial way to reset RCDATA/RAWTEXT/SCRIPT state to DATA state. + By setting tags[0] = '', we would expect processTagName() to transit the state back the DATA state + + The actual logic is described in https://html.spec.whatwg.org/multipage/syntax.html#tree-construction, + "As each token is emitted from the tokenizer, the user agent must follow the appropriate steps..." + + That implies state transition happens when a start or end tag token is emitted. + We captured all of those with state 11 (Switch state based on tag name). + + */ + this.tags[0] = ''; - this.tags[1] = ''; switch (symbol) { case stateMachine.Symbol.SPACE: /** Whitespaces */ diff --git a/tests/unit/run-functions-spec.js b/tests/unit/run-functions-spec.js index f3b4f85..c8a234b 100644 --- a/tests/unit/run-functions-spec.js +++ b/tests/unit/run-functions-spec.js @@ -222,8 +222,8 @@ Authors: Nera Liu [ { html: "
", tag0: 'div', tag1: 'div', index: 1}, { html: "
", tag0: 'div', tag1: 'div', index: 1}, - { html: "
", tag0: 'img', tag1: 'div', index: 0}, - { html: "
", tag0: 'img', tag1: '', index: 0}, + { html: "
Date: Fri, 17 Jul 2015 13:38:33 -0700 Subject: [PATCH 2/3] remove unnecessary end tag reset in matchEndTagWithStartTag --- src/context-parser.js | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/context-parser.js b/src/context-parser.js index 83a2ed8..212e238 100644 --- a/src/context-parser.js +++ b/src/context-parser.js @@ -180,6 +180,20 @@ FastParser.prototype.walk = function(i, input, endsWithEOF) { if(this.tags[0].toLowerCase() === this.tags[1].toLowerCase()) { reconsume = 0; /* see 12.2.4.13 - switch state for the following case, otherwise, reconsume. */ this.matchEndTagWithStartTag(symbol); + + /* + The reason we reset tags[0] (start tag) is that there is no trivial way to reset RCDATA/RAWTEXT/SCRIPT state to DATA state. + By setting tags[0] = '', we would expect processTagName() to transit the state back the DATA state + + The actual logic is described in https://html.spec.whatwg.org/multipage/syntax.html#tree-construction, + "As each token is emitted from the tokenizer, the user agent must follow the appropriate steps..." + + That implies state transition happens when a start or end tag token is emitted. + We captured all of those with state 11 (Switch state based on tag name). + + */ + + this.tags[0] = ""; } break; case 8: this.matchEscapedScriptTag(ch); break; @@ -227,20 +241,6 @@ FastParser.prototype.matchEndTagWithStartTag = function (symbol) { Otherwise, treat it as per the 'anything else' entry below. */ - /* - The reason we reset tags[0] (start tag) is that there is no trivial way to reset RCDATA/RAWTEXT/SCRIPT state to DATA state. - By setting tags[0] = '', we would expect processTagName() to transit the state back the DATA state - - The actual logic is described in https://html.spec.whatwg.org/multipage/syntax.html#tree-construction, - "As each token is emitted from the tokenizer, the user agent must follow the appropriate steps..." - - That implies state transition happens when a start or end tag token is emitted. - We captured all of those with state 11 (Switch state based on tag name). - - */ - - this.tags[0] = ''; - switch (symbol) { case stateMachine.Symbol.SPACE: /** Whitespaces */ this.state = stateMachine.State.STATE_BEFORE_ATTRIBUTE_NAME; From 2558978eebe2a74fdcacc4ae92bb5212e61def49 Mon Sep 17 00:00:00 2001 From: Albert Yu Date: Mon, 3 Aug 2015 09:39:52 -0700 Subject: [PATCH 3/3] Update context-parser.js --- src/context-parser.js | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/context-parser.js b/src/context-parser.js index 212e238..793f151 100644 --- a/src/context-parser.js +++ b/src/context-parser.js @@ -180,17 +180,11 @@ FastParser.prototype.walk = function(i, input, endsWithEOF) { if(this.tags[0].toLowerCase() === this.tags[1].toLowerCase()) { reconsume = 0; /* see 12.2.4.13 - switch state for the following case, otherwise, reconsume. */ this.matchEndTagWithStartTag(symbol); - /* - The reason we reset tags[0] (start tag) is that there is no trivial way to reset RCDATA/RAWTEXT/SCRIPT state to DATA state. - By setting tags[0] = '', we would expect processTagName() to transit the state back the DATA state - - The actual logic is described in https://html.spec.whatwg.org/multipage/syntax.html#tree-construction, - "As each token is emitted from the tokenizer, the user agent must follow the appropriate steps..." - - That implies state transition happens when a start or end tag token is emitted. - We captured all of those with state 11 (Switch state based on tag name). - + After matchEndTagWithStartTag (with start tag name == end tag name), + the state will be transition back to DATA. + + Thus we need to reset the start tag variable (tags[0]) back to nil. */ this.tags[0] = "";