From baefbf866753ae690b5c187c1c7123c5d7e25e1b Mon Sep 17 00:00:00 2001 From: Shivansh Rai Date: Thu, 10 May 2018 01:18:02 +0530 Subject: [PATCH 1/3] parser: Add debug statements to demo unexpected behavior in recovery --- example/errorrecovery/Makefile | 2 +- example/errorrecovery/er.bnf | 2 +- example/errorrecovery/er_test.go | 2 +- internal/parser/gen/golang/parser.go | 29 ++++++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/example/errorrecovery/Makefile b/example/errorrecovery/Makefile index 5831fc7d..bd7996c1 100644 --- a/example/errorrecovery/Makefile +++ b/example/errorrecovery/Makefile @@ -1,2 +1,2 @@ regenerate: - gocc er.bnf \ No newline at end of file + gocc -v -debug_parser er.bnf diff --git a/example/errorrecovery/er.bnf b/example/errorrecovery/er.bnf index 47ec84b6..f423f706 100644 --- a/example/errorrecovery/er.bnf +++ b/example/errorrecovery/er.bnf @@ -22,4 +22,4 @@ StmtList : Stmt : id << ast.NewStmt($0) >> | error -; \ No newline at end of file +; diff --git a/example/errorrecovery/er_test.go b/example/errorrecovery/er_test.go index 8af87bb3..de8550ea 100644 --- a/example/errorrecovery/er_test.go +++ b/example/errorrecovery/er_test.go @@ -11,7 +11,7 @@ import ( ) func TestFail(t *testing.T) { - sml, err := test([]byte("a b ; d e f")) + sml, err := test([]byte("a ; b")) if err != nil { t.Fail() } diff --git a/internal/parser/gen/golang/parser.go b/internal/parser/gen/golang/parser.go index 3eaf788f..79302f1b 100644 --- a/internal/parser/gen/golang/parser.go +++ b/internal/parser/gen/golang/parser.go @@ -183,6 +183,12 @@ func (p *Parser) Error(err error, scanner Scanner) (recovered bool, errorAttrib } if action := actionTab[p.stack.top()].actions[token.TokMap.Type("error")]; action != nil { + {{- if .Debug }} + fmt.Printf("Next input symbol: %s\n", p.nextToken.Lit) + // NOTE: In this case we push the errorAttrib and not the + // invalid symbol. + fmt.Println("Pushing errorAttrib, look below for modified stack top\n") + {{- end }} p.stack.push(int(action.(shift)), errorAttrib) // action can only be shift } else { return @@ -203,6 +209,9 @@ func (p *Parser) Error(err error, scanner Scanner) (recovered bool, errorAttrib func (p *Parser) popNonRecoveryStates() (removedAttribs []parseError.ErrorSymbol) { if rs, ok := p.firstRecoveryState(); ok { + {{- if .Debug }} + fmt.Printf("Number of pops performed to reach the next recovery state: %d\n", p.stack.topIndex() - rs) + {{- end }} errorSymbols := p.stack.popN(p.stack.topIndex() - rs) removedAttribs = make([]parseError.ErrorSymbol, len(errorSymbols)) for i, e := range errorSymbols { @@ -211,6 +220,19 @@ func (p *Parser) popNonRecoveryStates() (removedAttribs []parseError.ErrorSymbol } else { removedAttribs = []parseError.ErrorSymbol{} } + + {{- if .Debug }} + fmt.Printf("Attribute corresponding to the popped non-recovery states: ") + for _, v := range removedAttribs { + switch attr := v.(type) { + case *token.Token: + fmt.Printf("%s ", attr.Lit) + default: + fmt.Printf("%v ", attr) + } + } + fmt.Println() + {{- end }} return } @@ -221,6 +243,7 @@ func (p *Parser) firstRecoveryState() (recoveryState int, canRecover bool) { recoveryState-- canRecover = actionTab[p.stack.peek(recoveryState)].canRecover } + fmt.Printf("Next recovery state: %d\n", recoveryState) return } @@ -243,8 +266,14 @@ func (p *Parser) Parse(scanner Scanner) (res interface{}, err error) { p.Reset() p.nextToken = scanner.Scan() for acc := false; !acc; { + {{- if .Debug }} + fmt.Println(p.stack.String()) + {{- end }} action := actionTab[p.stack.top()].actions[p.nextToken.Type] if action == nil { + {{- if .Debug }} + fmt.Println("~~ No action exists for the current stack top and next input symbol ~~") + {{- end }} if recovered, errAttrib := p.Error(nil, scanner); !recovered { p.nextToken = errAttrib.ErrorToken return nil, p.newError(nil) From 2fd6b665337a1b5b80ec6814e71c3f00d852b357 Mon Sep 17 00:00:00 2001 From: Shivansh Rai Date: Thu, 10 May 2018 09:19:44 +0530 Subject: [PATCH 2/3] parser: Update heuristic for error-recovery The following steps are performed when an invalid input symbol is encountered - * In case the input reaches an invalid symbol, we save its error attribute and keep discarding all the immediately following error symbols until a valid symbol is found. * If the action corresponding to the found valid symbol is reduce, it performed. In case it is a shift, it is deferred until the error attribute is pushed on the stack. * It should be noted that the current heuristic cannot report multiple errors for invalid symbols occurring one after the other. If such a case arises, the error for only the first invalid symbol is reported. This is also the case with the currently implemented error recovery heuristic in gocc. Demo ~~~~ >> go test -v === RUN TestFail input: a ; b output: [ a Error in S0: INVALID(0,;), Pos(offset=2, line=1, column=3), expected one of: $ id error b ] --- PASS: TestFail (0.00s) PASS ok github.com/goccmack/gocc/example/errorrecovery 0.005s Updates #76 --- example/errorrecovery/Makefile | 2 +- internal/parser/gen/golang/parser.go | 50 +++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/example/errorrecovery/Makefile b/example/errorrecovery/Makefile index bd7996c1..ceb61ede 100644 --- a/example/errorrecovery/Makefile +++ b/example/errorrecovery/Makefile @@ -1,2 +1,2 @@ regenerate: - gocc -v -debug_parser er.bnf + gocc er.bnf diff --git a/internal/parser/gen/golang/parser.go b/internal/parser/gen/golang/parser.go index 79302f1b..327517e7 100644 --- a/internal/parser/gen/golang/parser.go +++ b/internal/parser/gen/golang/parser.go @@ -173,7 +173,7 @@ func (p *Parser) Error(err error, scanner Scanner) (recovered bool, errorAttrib errorAttrib = &parseError.Error{ Err: err, ErrorToken: p.nextToken, - ErrorSymbols: p.popNonRecoveryStates(), + ErrorSymbols: []parseError.ErrorSymbol{}, ExpectedTokens: make([]string, 0, 8), } for t, action := range actionTab[p.stack.top()].actions { @@ -182,14 +182,56 @@ func (p *Parser) Error(err error, scanner Scanner) (recovered bool, errorAttrib } } + lostSymbols := []*token.Token{} + // The current input symbol is invalid. Keep scanning the input symbols + // until the next valid symbol is encountered. + for i := 0; ; i++ { + if action := actionTab[p.stack.top()].actions[p.nextToken.Type]; action != nil { + break + } else { + if i > 0 { + lostSymbols = append(lostSymbols, p.nextToken) + } + p.nextToken = scanner.Scan() + } + } + {{- if .Debug }} + fmt.Printf("Lost %d error symbol(s)\n", len(lostSymbols)) + fmt.Printf("Next input symbol: %s\n", p.nextToken.Lit) + {{- end }} + + // If the action corresponding to the found valid input symbol is a + // reduce, perform it. If it is a shift, defer performing the action + // until the error attribute is pushed to the stack. + for breakHere := false; !breakHere; { + action := actionTab[p.stack.top()].actions[p.nextToken.Type] + switch act := action.(type) { + case reduce: + prod := productionsTable[int(act)] + attrib, err := prod.ReduceFunc(p.stack.popN(prod.NumSymbols)) + if err != nil { + return false, errorAttrib + } else { + p.stack.push(gotoTab[p.stack.top()][prod.NTType], attrib) + } + case shift: + // Defer pushing the valid input symbol. + breakHere = true + case accept: + breakHere = true + // TODO: Combine the above two case clauses. + } + } + + if action := actionTab[p.stack.top()].actions[token.TokMap.Type("error")]; action != nil { {{- if .Debug }} - fmt.Printf("Next input symbol: %s\n", p.nextToken.Lit) // NOTE: In this case we push the errorAttrib and not the // invalid symbol. fmt.Println("Pushing errorAttrib, look below for modified stack top\n") {{- end }} - p.stack.push(int(action.(shift)), errorAttrib) // action can only be shift + // Action corresponding to errorAttrib can only be a shift. + p.stack.push(int(action.(shift)), errorAttrib) } else { return } @@ -210,7 +252,7 @@ func (p *Parser) Error(err error, scanner Scanner) (recovered bool, errorAttrib func (p *Parser) popNonRecoveryStates() (removedAttribs []parseError.ErrorSymbol) { if rs, ok := p.firstRecoveryState(); ok { {{- if .Debug }} - fmt.Printf("Number of pops performed to reach the next recovery state: %d\n", p.stack.topIndex() - rs) + fmt.Printf("Number of pops performed to reach the next recovery state: %d\n", p.stack.topIndex()-rs) {{- end }} errorSymbols := p.stack.popN(p.stack.topIndex() - rs) removedAttribs = make([]parseError.ErrorSymbol, len(errorSymbols)) From 06519f5d3a540e31b28a1b0209285d0cbb04f295 Mon Sep 17 00:00:00 2001 From: Shivansh Rai Date: Thu, 10 May 2018 19:35:12 +0530 Subject: [PATCH 3/3] parser: Fix edge case when input contains only invalid symbols >> go test -v === RUN TestFail input: ; ; output: [ Error in S0: INVALID(0,;), Pos(offset=0, line=1, column=1), expected one of: id error ] --- PASS: TestFail (0.00s) PASS ok github.com/goccmack/gocc/example/errorrecovery 0.002s --- internal/parser/gen/golang/parser.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/internal/parser/gen/golang/parser.go b/internal/parser/gen/golang/parser.go index 327517e7..bd11d540 100644 --- a/internal/parser/gen/golang/parser.go +++ b/internal/parser/gen/golang/parser.go @@ -185,26 +185,28 @@ func (p *Parser) Error(err error, scanner Scanner) (recovered bool, errorAttrib lostSymbols := []*token.Token{} // The current input symbol is invalid. Keep scanning the input symbols // until the next valid symbol is encountered. - for i := 0; ; i++ { + p.nextToken = scanner.Scan() + for len(p.nextToken.Lit) > 0 { if action := actionTab[p.stack.top()].actions[p.nextToken.Type]; action != nil { break } else { - if i > 0 { - lostSymbols = append(lostSymbols, p.nextToken) - } + lostSymbols = append(lostSymbols, p.nextToken) p.nextToken = scanner.Scan() } } {{- if .Debug }} fmt.Printf("Lost %d error symbol(s)\n", len(lostSymbols)) - fmt.Printf("Next input symbol: %s\n", p.nextToken.Lit) + fmt.Printf("Next valid input symbol: %s\n", p.nextToken.Lit) {{- end }} // If the action corresponding to the found valid input symbol is a // reduce, perform it. If it is a shift, defer performing the action // until the error attribute is pushed to the stack. - for breakHere := false; !breakHere; { + for again := true; again; { action := actionTab[p.stack.top()].actions[p.nextToken.Type] + if action == nil { + break + } switch act := action.(type) { case reduce: prod := productionsTable[int(act)] @@ -216,14 +218,14 @@ func (p *Parser) Error(err error, scanner Scanner) (recovered bool, errorAttrib } case shift: // Defer pushing the valid input symbol. - breakHere = true + again = false case accept: - breakHere = true - // TODO: Combine the above two case clauses. + again = false + default: + panic("unknown action: " + action.String()) } } - if action := actionTab[p.stack.top()].actions[token.TokMap.Type("error")]; action != nil { {{- if .Debug }} // NOTE: In this case we push the errorAttrib and not the