Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ func (c Config) parseTokens(tokens []Token) (ASTNode, Error) { // nolint: gocycl
node *ASTBlock
ap *[]ASTNode
}
type WhitespaceControl struct {
stack []ASTNode // stack to buffer ws for trimming
trimRight bool
}
var (
g = c.Grammar
root = &ASTSeq{} // root of AST; will be returned
Expand All @@ -32,8 +36,18 @@ func (c Config) parseTokens(tokens []Token) (ASTNode, Error) { // nolint: gocycl
rawTag *ASTRaw // current raw tag
inComment = false
inRaw = false
wsc = &WhitespaceControl{}
)
for _, tok := range tokens {
// check the current tag for trim left in the current tag if true
// then drop any stacked whitespace.
if tok.Type != WhitespaceTokenType {
if !tok.TrimLeft {
*ap = append(*ap, wsc.stack...)
}
wsc.stack = wsc.stack[:0]
wsc.trimRight = tok.TrimRight
}
switch {
// The parser needs to know about comment and raw, because tags inside
// needn't match each other e.g. {%comment%}{%if%}{%endcomment%}
Expand All @@ -56,6 +70,19 @@ func (c Config) parseTokens(tokens []Token) (ASTNode, Error) { // nolint: gocycl
*ap = append(*ap, &ASTObject{tok, expr})
case tok.Type == TextTokenType:
*ap = append(*ap, &ASTText{Token: tok})
case tok.Type == WhitespaceTokenType:
// append to the ws stack unless the previous node requested
// ws should be trimmed
if !wsc.trimRight {
wsc.stack = append(wsc.stack, &ASTText{Token: tok})
}
if tok.Name == "New Line" {
// trimming should only occur up to the first newline
// so it is safe to append the stack now
*ap = append(*ap, wsc.stack...)
wsc.stack = wsc.stack[:0]
wsc.trimRight = false
}
case tok.Type == TagTokenType:
if cs, ok := g.BlockSyntax(tok.Name); ok {
switch {
Expand Down Expand Up @@ -101,5 +128,8 @@ func (c Config) parseTokens(tokens []Token) (ASTNode, Error) { // nolint: gocycl
if bn != nil {
return nil, Errorf(bn, "unterminated %q block", bn.Name)
}

// append any whitespace still queued
*ap = append(*ap, wsc.stack...)
return root, nil
}
34 changes: 33 additions & 1 deletion parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ var parserTests = []struct{ in string }{
{`{% unless test %}{% endunless %}`},
{`{% for item in list %}{% if test %}{% else %}{% endif %}{% endfor %}`},
{`{% if true %}{% raw %}{% endraw %}{% endif %}`},

{`{% comment %}{% if true %}{% endcomment %}`},
{`{% raw %}{% if true %}{% endraw %}`},
}
Expand All @@ -68,3 +67,36 @@ func TestParser(t *testing.T) {
})
}
}

var parseWhitespaceTests = []struct {
in string
expected int
}{
// expected counts include object tokens
{"{{ obj -}} \t\n\t {{ obj }}", 3},
{"{{ obj }} \t\n\t {{- obj }}", 4},
{"{{ obj -}} \t\n\t {{- obj }}", 2},
{"{{ obj -}} \t\n\t\n\t {{- obj }}", 4}, // preseves mid whitespace

// expected counts for whitespace in clause do not include if tags
{"{% if test -%} \t\n\t {% endif %}", 1},
{"{% if test %} \t\n\t {%- endif %}", 2},
{"{% if test -%} \t\n\t {%- endif %}", 0},
{"{% if test -%} \t\n\t\n\t {%- endif %}", 2},
}

func TestParseWhitespace(t *testing.T) {
cfg := Config{Grammar: grammarFake{}}
for i, test := range parseWhitespaceTests {
t.Run(fmt.Sprintf("%02d", i+1), func(t *testing.T) {
ast, _ := cfg.Parse(test.in, SourceLoc{})
children := ast.(*ASTSeq).Children
switch children[0].(type) {
case *ASTSeq:
require.Equal(t, len(children), test.expected)
case *ASTBlock:
require.Equal(t, len(children[0].(*ASTBlock).Body), test.expected)
}
})
}
}
29 changes: 17 additions & 12 deletions parser/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"regexp"
"strings"
"unicode"
)

// Scan breaks a string into a sequence of Tokens.
Expand All @@ -20,12 +21,18 @@ func Scan(data string, loc SourceLoc, delims []string) (tokens []Token) {
p, pe := 0, len(data)
for _, m := range tokenMatcher.FindAllStringSubmatchIndex(data, -1) {
ts, te := m[0], m[1]
source := data[ts:te]
if p < ts {
tokens = append(tokens, Token{Type: TextTokenType, SourceLoc: loc, Source: data[p:ts]})
loc.LineNo += strings.Count(data[p:ts], "\n")
}
source := data[ts:te]
switch {
case rune(data[ts]) == '\n':
tok := Token{Type: WhitespaceTokenType, Name: "New Line", SourceLoc: loc, Source: source}
loc.LineNo++
tokens = append(tokens, tok)
case unicode.IsSpace(rune(data[ts])):
tok := Token{Type: WhitespaceTokenType, Name: "Whitespace", SourceLoc: loc, Source: source}
tokens = append(tokens, tok)
case data[ts:ts+len(delims[0])] == delims[0]:
tok := Token{
Type: ObjTokenType,
Expand All @@ -41,16 +48,15 @@ func Scan(data string, loc SourceLoc, delims []string) (tokens []Token) {
Type: TagTokenType,
SourceLoc: loc,
Source: source,
Name: data[m[4]:m[5]],
Name: data[m[8]:m[9]],
TrimLeft: source[2] == '-',
TrimRight: source[len(source)-3] == '-',
}
if m[6] > 0 {
tok.Args = data[m[6]:m[7]]
if m[10] > 0 {
tok.Args = data[m[10]:m[11]]
}
tokens = append(tokens, tok)
}
loc.LineNo += strings.Count(source, "\n")
p = te
}
if p < pe {
Expand All @@ -73,13 +79,12 @@ func formTokenMatcher(delims []string) *regexp.Regexp {
}
}

tokenMatcher := regexp.MustCompile(
fmt.Sprintf(`%s-?\s*(.+?)\s*-?%s|%s-?\s*(\w+)(?:\s+((?:%v)+?))?\s*-?%s`,
// QuoteMeta will escape any of these that are regex commands
regexp.QuoteMeta(delims[0]), regexp.QuoteMeta(delims[1]),
regexp.QuoteMeta(delims[2]), strings.Join(exclusion, "|"), regexp.QuoteMeta(delims[3]),
),
p := fmt.Sprintf(`%s-?\s*(.+?)\s*-?%s|([ \t]+)|(\n)|%s-?\s*(\w+)(?:\s+((?:%v)+?))?\s*-?%s`,
// QuoteMeta will escape any of these that are regex commands
regexp.QuoteMeta(delims[0]), regexp.QuoteMeta(delims[1]),
regexp.QuoteMeta(delims[2]), strings.Join(exclusion, "|"), regexp.QuoteMeta(delims[3]),
)
tokenMatcher := regexp.MustCompile(p)

return tokenMatcher
}
113 changes: 76 additions & 37 deletions parser/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,39 +25,29 @@ var scannerCountTests = []struct {
func TestScan(t *testing.T) {
scan := func(src string) []Token { return Scan(src, SourceLoc{}, nil) }
tokens := scan("12")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, TextTokenType, tokens[0].Type)
verifyTokens(t, TextTokenType, 1, tokens)
require.Equal(t, "12", tokens[0].Source)

tokens = scan("{{obj}}")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, ObjTokenType, tokens[0].Type)
verifyTokens(t, ObjTokenType, 1, tokens)
require.Equal(t, "obj", tokens[0].Args)

tokens = scan("{{ obj }}")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, ObjTokenType, tokens[0].Type)
verifyTokens(t, ObjTokenType, 1, tokens)
require.Equal(t, "obj", tokens[0].Args)

tokens = scan("{%tag args%}")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, TagTokenType, tokens[0].Type)
verifyTokens(t, TagTokenType, 1, tokens)
require.Equal(t, "tag", tokens[0].Name)
require.Equal(t, "args", tokens[0].Args)

tokens = scan("{% tag args %}")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, TagTokenType, tokens[0].Type)
verifyTokens(t, TagTokenType, 1, tokens)
require.Equal(t, "tag", tokens[0].Name)
require.Equal(t, "args", tokens[0].Args)

tokens = scan("pre{% tag args %}mid{{ object }}post")
require.Equal(t, `[TextTokenType{"pre"} TagTokenType{Tag:"tag", Args:"args"} TextTokenType{"mid"} ObjTokenType{"object"} TextTokenType{"post"}]`, fmt.Sprint(tokens))
require.Equal(t, `[TextTokenType{"pre"} TagTokenType{Tag:"tag", Args:"args", l: false, r: false} TextTokenType{"mid"} ObjTokenType{"object"} TextTokenType{"post"}]`, fmt.Sprint(tokens))

for i, test := range scannerCountTests {
t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) {
Expand All @@ -78,14 +68,11 @@ func TestScan_ws(t *testing.T) {
{`{{ expr }}`, "expr", false, false},
{`{{- expr }}`, "expr", true, false},
{`{{ expr -}}`, "expr", false, true},
{`{% tag arg %}`, "tag", false, false},
{`{%- tag arg %}`, "tag", true, false},
{`{% tag arg -%}`, "tag", false, true},
{`{{- expr -}}`, "expr", true, true},
}
for i, test := range wsTests {
t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) {
tokens := scan(test.in)
require.Len(t, tokens, 1)
tok := tokens[0]
if test.expect == "tag" {
require.Equalf(t, "tag", tok.Name, test.in)
Expand All @@ -99,6 +86,61 @@ func TestScan_ws(t *testing.T) {
}
}

func TestScanWhiteSpaceTokens(t *testing.T) {
// whitespace control
scan := func(src string) []Token { return Scan(src, SourceLoc{}, nil) }

wsTests := []struct {
in string
numTokens int
expected []TokenType
}{
{" ", 1, []TokenType{WhitespaceTokenType}},
{" ", 1, []TokenType{WhitespaceTokenType}},
{"\n", 1, []TokenType{WhitespaceTokenType}},
{"\t", 1, []TokenType{WhitespaceTokenType}},
{"\t\t\t\t", 1, []TokenType{WhitespaceTokenType}},
{"\t\n\t", 3, []TokenType{WhitespaceTokenType, WhitespaceTokenType, WhitespaceTokenType}},
{"{{ expr }} {{ expr }}", 3, []TokenType{ObjTokenType, WhitespaceTokenType, ObjTokenType}},
{"{{ expr }}\t\n\t{{ expr }}", 5, []TokenType{ObjTokenType, WhitespaceTokenType, WhitespaceTokenType, WhitespaceTokenType, ObjTokenType}},
{"{{ expr }}\t \t\n\t \t{{ expr }}", 5, []TokenType{ObjTokenType, WhitespaceTokenType, WhitespaceTokenType, WhitespaceTokenType, ObjTokenType}},
{"{{ expr }}\t \t\nSomeText\n\t \t{{ expr }}", 7, []TokenType{ObjTokenType, WhitespaceTokenType, WhitespaceTokenType, TextTokenType, WhitespaceTokenType, WhitespaceTokenType, ObjTokenType}},
}
for i, test := range wsTests {
t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) {
tokens := scan(test.in)
require.Len(t, tokens, test.numTokens)
for x, tok := range tokens {
require.Equal(t, test.expected[x], tok.Type)
}
})
}
}

func TestScanTokenLocationParsing(t *testing.T) {
// whitespace control
scan := func(src string) []Token { return Scan(src, SourceLoc{LineNo: 1}, nil) }

wsTests := []struct {
in string
expectedLineNos []int
}{
{"\t \t \tsometext", []int{1, 1}},
{"\t\n\t", []int{1, 1, 2}},
{"\nsometext", []int{1, 2}},
{"{{ expr }}\t \t\nSomeText\n\t \t{{ expr }}", []int{1, 1, 1, 2, 2, 3, 3}},
}
for i, test := range wsTests {
t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) {
tokens := scan(test.in)
require.Len(t, tokens, len(test.expectedLineNos))
for x, tok := range tokens {
require.Equal(t, test.expectedLineNos[x], tok.SourceLoc.LineNo)
}
})
}
}

var scannerCountTestsDelims = []struct {
in string
len int
Expand All @@ -119,39 +161,29 @@ func TestScan_delims(t *testing.T) {
return Scan(src, SourceLoc{}, []string{"OBJECT@LEFT", "OBJECT#RIGHT", "TAG*LEFT", "TAG!RIGHT"})
}
tokens := scan("12")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, TextTokenType, tokens[0].Type)
verifyTokens(t, TextTokenType, 1, tokens)
require.Equal(t, "12", tokens[0].Source)

tokens = scan("OBJECT@LEFTobjOBJECT#RIGHT")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, ObjTokenType, tokens[0].Type)
verifyTokens(t, ObjTokenType, 1, tokens)
require.Equal(t, "obj", tokens[0].Args)

tokens = scan("OBJECT@LEFT obj OBJECT#RIGHT")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, ObjTokenType, tokens[0].Type)
verifyTokens(t, ObjTokenType, 1, tokens)
require.Equal(t, "obj", tokens[0].Args)

tokens = scan("TAG*LEFTtag argsTAG!RIGHT")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, TagTokenType, tokens[0].Type)
verifyTokens(t, TagTokenType, 1, tokens)
require.Equal(t, "tag", tokens[0].Name)
require.Equal(t, "args", tokens[0].Args)

tokens = scan("TAG*LEFT tag args TAG!RIGHT")
require.NotNil(t, tokens)
require.Len(t, tokens, 1)
require.Equal(t, TagTokenType, tokens[0].Type)
verifyTokens(t, TagTokenType, 1, tokens)
require.Equal(t, "tag", tokens[0].Name)
require.Equal(t, "args", tokens[0].Args)

tokens = scan("preTAG*LEFT tag args TAG!RIGHTmidOBJECT@LEFT object OBJECT#RIGHTpost")
require.Equal(t, `[TextTokenType{"pre"} TagTokenType{Tag:"tag", Args:"args"} TextTokenType{"mid"} ObjTokenType{"object"} TextTokenType{"post"}]`, fmt.Sprint(tokens))
tokens = scan("\npreTAG*LEFT tag args TAG!RIGHTmidOBJECT@LEFT object OBJECT#RIGHTpost\t")
require.Equal(t, `[WhitespaceTokenType{"New Line"} TextTokenType{"pre"} TagTokenType{Tag:"tag", Args:"args", l: false, r: false} TextTokenType{"mid"} ObjTokenType{"object"} TextTokenType{"post"} WhitespaceTokenType{"Whitespace"}]`, fmt.Sprint(tokens))

for i, test := range scannerCountTestsDelims {
t.Run(fmt.Sprintf("%02d", i), func(t *testing.T) {
Expand All @@ -160,3 +192,10 @@ func TestScan_delims(t *testing.T) {
})
}
}

func verifyTokens(t require.TestingT, tokenType TokenType, length int, tokens []Token) []Token {
require.NotNil(t, tokens)
require.Len(t, tokens, length)
require.Equal(t, tokenType, tokens[0].Type)
return tokens
}
8 changes: 6 additions & 2 deletions parser/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type Token struct {
// TokenType is the type of a Chunk
type TokenType int

////go:generate stringer -type=TokenType
//go:generate stringer -type=TokenType

const (
// TextTokenType is the type of a text Chunk
Expand All @@ -24,6 +24,8 @@ const (
TagTokenType
// ObjTokenType is the type of an object Chunk "{{…}}"
ObjTokenType
// WhitespaceTokenType represents whitespace
WhitespaceTokenType
)

// SourceLoc contains a Token's source location.
Expand All @@ -48,9 +50,11 @@ func (c Token) String() string {
case TextTokenType:
return fmt.Sprintf("%v{%#v}", c.Type, c.Source)
case TagTokenType:
return fmt.Sprintf("%v{Tag:%#v, Args:%#v}", c.Type, c.Name, c.Args)
return fmt.Sprintf("%v{Tag:%#v, Args:%#v, l: %#v, r: %#v}", c.Type, c.Name, c.Args, c.TrimLeft, c.TrimRight)
case ObjTokenType:
return fmt.Sprintf("%v{%#v}", c.Type, c.Args)
case WhitespaceTokenType:
return fmt.Sprintf("%v{%#v}", c.Type, c.Name)
default:
return fmt.Sprintf("%v{%#v}", c.Type, c.Source)
}
Expand Down
Loading