From cc54ed2189f1a8fb3adc7916b82df9cf8072b559 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 28 May 2021 10:09:56 +0200 Subject: [PATCH 01/21] Fix for detecting left recursion in rules like: indexing_list = indexing_element | indexing_list ',' indexing_element . --- src/Tab.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Tab.cs b/src/Tab.cs index 220417f..d11403b 100644 --- a/src/Tab.cs +++ b/src/Tab.cs @@ -911,17 +911,17 @@ public CNode (Symbol l, Symbol r) { } } - void GetSingles(Node p, ArrayList singles) { + void GetSingles(Node p, ArrayList singles, Node rule) { if (p == null) return; // end of graph if (p.typ == Node.nt) { - if (p.up || DelGraph(p.next)) singles.Add(p.sym); + if (p.up || DelGraph(p.next) || p.sym.graph == rule) singles.Add(p.sym); } else if (p.typ == Node.alt || p.typ == Node.iter || p.typ == Node.opt) { if (p.up || DelGraph(p.next)) { - GetSingles(p.sub, singles); - if (p.typ == Node.alt) GetSingles(p.down, singles); + GetSingles(p.sub, singles, rule); + if (p.typ == Node.alt) GetSingles(p.down, singles, rule); } } - if (!p.up && DelNode(p)) GetSingles(p.next, singles); + if (!p.up && DelNode(p)) GetSingles(p.next, singles, rule); } public bool NoCircularProductions() { @@ -929,7 +929,7 @@ public bool NoCircularProductions() { ArrayList list = new ArrayList(); foreach (Symbol sym in nonterminals) { ArrayList singles = new ArrayList(); - GetSingles(sym.graph, singles); // get nonterminals s such that sym-->s + GetSingles(sym.graph, singles, sym.graph); // get nonterminals s such that sym-->s foreach (Symbol s in singles) list.Add(new CNode(sym, s)); } do { From a54e9a550fbaf11c02b58bdd7cee7bede7409546 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 28 May 2021 10:24:59 +0200 Subject: [PATCH 02/21] Fix possible mistake mixing boolean comparison with bitwise operation --- src/DFA.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DFA.cs b/src/DFA.cs index fa0fada..51333d0 100644 --- a/src/DFA.cs +++ b/src/DFA.cs @@ -478,7 +478,7 @@ void DeleteRedundantStates() { for (State s1 = firstState.next; s1 != null; s1 = s1.next) // firstState cannot be final if (used[s1.nr] && s1.endOf != null && s1.firstAction == null && !s1.ctx) for (State s2 = s1.next; s2 != null; s2 = s2.next) - if (used[s2.nr] && s1.endOf == s2.endOf && s2.firstAction == null & !s2.ctx) { + if (used[s2.nr] && s1.endOf == s2.endOf && s2.firstAction == null && !s2.ctx) { used[s2.nr] = false; newState[s2.nr] = s1; } for (State state = firstState; state != null; state = state.next) From a356063940018c415354b71a6a231c4c49add45a Mon Sep 17 00:00:00 2001 From: mingodad Date: Mon, 7 Jun 2021 20:05:11 +0200 Subject: [PATCH 03/21] Added a kind of TreeView for LL1 errors/warnings. Added an option to ignore errors for development purposes (like rules not reachable). Add a TERMINALS section to create custom terminal tokens not managed by the Scanner (from cocoxml). Added column info to Symbols and Nodes to enhance error/warning messages. Added terminal name/value info between comments to the Scanner to make easier to read/understand it. --- src/Coco.atg | 97 +++++----- src/Coco.cs | 36 ++-- src/Copyright.frame | 26 +++ src/DFA.cs | 30 ++- src/Parser.cs | 435 +++++++++++++++++++++++--------------------- src/Parser.frame | 40 ++-- src/ParserGen.cs | 101 ++++++---- src/Scanner.cs | 131 +++++++------ src/Tab.cs | 394 ++++++++++++++++++++++++--------------- src/build.sh | 1 + 10 files changed, 752 insertions(+), 539 deletions(-) create mode 100644 src/Copyright.frame create mode 100644 src/build.sh diff --git a/src/Coco.atg b/src/Coco.atg index 1312f64..6ea58d0 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -5,24 +5,24 @@ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- @@ -37,15 +37,17 @@ COMPILER Coco const int id = 0; const int str = 1; - + public TextWriter trace; // other Coco objects referenced in this ATG public Tab tab; public DFA dfa; public ParserGen pgen; bool genScanner; + bool ignoreGammarErrors = false; string tokenString; // used in declarations of literal tokens string noString = "-none-"; // used in declarations of literal tokens + string gramName; // grammar name /*-------------------------------------------------------------------------*/ @@ -85,25 +87,32 @@ IGNORE cr + lf + tab PRODUCTIONS -Coco (. Symbol sym; Graph g, g1, g2; string gramName; CharSet s; int beg, line; .) +Coco (. Symbol sym; Graph g, g1, g2; CharSet s; int beg, line; .) = [ // using statements ANY (. beg = t.pos; line = t.line; .) { ANY } (. pgen.usingPos = new Position(beg, la.pos, 0, line); .) ] - "COMPILER" (. genScanner = true; - tab.ignored = new CharSet(); .) + "COMPILER" (. genScanner = true; + tab.ignored = new CharSet(); .) ident (. gramName = t.val; beg = la.pos; line = la.line; .) { ANY } (. tab.semDeclPos = new Position(beg, la.pos, 0, line); .) [ "IGNORECASE" (. dfa.ignoreCase = true; .) ] /* pdt */ + [ "TERMINALS" { ident (.sym = tab.FindSym(t.val); + if (sym != null) SemErr("name declared twice"); + else { + sym = tab.NewSym(Node.t, t.val, t.line, t.col); + sym.tokenKind = Symbol.fixedToken; + }.) + } ] /*from cocoxml*/ [ "CHARACTERS" { SetDecl }] [ "TOKENS" { TokenDecl }] [ "PRAGMAS" { TokenDecl }] { "COMMENTS" (. bool nested = false; .) - "FROM" TokenExpr + "FROM" TokenExpr "TO" TokenExpr [ "NESTED" (. nested = true; .) ] (. dfa.NewComment(g1.l, g2.l, nested); .) @@ -111,13 +120,13 @@ Coco (. Symbol sym; Graph g, g1, g2; string gramName; { "IGNORE" Set (. tab.ignored.Or(s); .) } - SYNC + SYNC "PRODUCTIONS" (. if (genScanner) dfa.MakeDeterministic(); tab.DeleteNodes(); .) { ident (. sym = tab.FindSym(t.val); bool undef = sym == null; - if (undef) sym = tab.NewSym(Node.nt, t.val, t.line); + if (undef) sym = tab.NewSym(Node.nt, t.val, t.line, t.col); else { if (sym.typ == Node.nt) { if (sym.graph != null) SemErr("name declared twice"); @@ -149,7 +158,7 @@ Coco (. Symbol sym; Graph g, g1, g2; string gramName; if (sym.attrPos != null) SemErr("grammar symbol must not have attributes"); } - tab.noSym = tab.NewSym(Node.t, "???", 0); // noSym gets highest number + tab.noSym = tab.NewSym(Node.t, "???", 0, 0); // noSym gets highest number tab.SetupAnys(); tab.RenumberPragmas(); if (tab.ddt[2]) tab.PrintNodes(); @@ -157,7 +166,13 @@ Coco (. Symbol sym; Graph g, g1, g2; string gramName; Console.WriteLine("checking"); tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); - if (tab.GrammarOk()) { + bool doGenCode = false; + if(ignoreGammarErrors) { + doGenCode = true; + tab.GrammarCheckAll(); + } + else doGenCode = tab.GrammarOk(); + if (doGenCode) { Console.Write("parser"); pgen.WriteParser(); if (genScanner) { @@ -205,8 +220,7 @@ SimSet (. int n1, n2; .) ( ident (. CharClass c = tab.FindCharClass(t.val); if (c == null) SemErr("undefined name"); else s.Or(c.set); .) -| string (. string name = t.val; - name = tab.Unescape(name.Substring(1, name.Length-2)); +| string (. string name = tab.Unstring(t.val); foreach (char ch in name) if (dfa.ignoreCase) s.Set(char.ToLower(ch)); else s.Set(ch); .) @@ -221,8 +235,7 @@ SimSet (. int n1, n2; .) Char = - char (. string name = t.val; n = 0; - name = tab.Unescape(name.Substring(1, name.Length-2)); + char (. string name = tab.Unstring(t.val); n = 0; if (name.Length == 1) n = name[0]; else SemErr("unacceptable character value"); if (dfa.ignoreCase && (char)n >= 'A' && (char)n <= 'Z') n += 32; @@ -236,7 +249,7 @@ TokenDecl (. string name; int kind; Symbol sym; Graph g; . Sym (. sym = tab.FindSym(name); if (sym != null) SemErr("name declared twice"); else { - sym = tab.NewSym(typ, name, t.line); + sym = tab.NewSym(typ, name, t.line, t.col); sym.tokenKind = Symbol.fixedToken; } tokenString = null; @@ -282,7 +295,7 @@ AttrDecl /*------------------------------------------------------------------------------------*/ Expression (. Graph g2; .) -= += Term (. bool first = true; .) { WEAK '|' @@ -296,7 +309,7 @@ Expression (. Graph g2; .) Term (. Graph g2; Node rslv = null; g = null; .) = -( [ (. rslv = tab.NewNode(Node.rslv, null, la.line); .) +( [ (. rslv = tab.NewNode(Node.rslv, null, la.line, la.col); .) Resolver (. g = new Graph(rslv); .) ] Factor (. if (rslv != null) tab.MakeSequence(g, g2); @@ -304,15 +317,15 @@ Term (. Graph g2; Node rslv = null; g = null; .) .) { Factor (. tab.MakeSequence(g, g2); .) } -| (. g = new Graph(tab.NewNode(Node.eps, null, 0)); .) +| (. g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) ) (. if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . /*------------------------------------------------------------------------------------*/ -Factor (. string name; int kind; Position pos; bool weak = false; +Factor (. string name; int kind; Position pos; bool weak = false; g = null; .) = @@ -324,9 +337,9 @@ Factor (. string name; int kind; Position pos; bool wea bool undef = sym == null; if (undef) { if (kind == id) - sym = tab.NewSym(Node.nt, name, 0); // forward nt - else if (genScanner) { - sym = tab.NewSym(Node.t, name, t.line); + sym = tab.NewSym(Node.nt, name, 0, 0); // forward nt + else if (genScanner) { + sym = tab.NewSym(Node.t, name, t.line, t.col); dfa.MatchLiteral(sym.name, sym); } else { // undefined string in production SemErr("undefined string in production"); @@ -339,7 +352,7 @@ Factor (. string name; int kind; Position pos; bool wea if (weak) if (typ == Node.t) typ = Node.wt; else SemErr("only terminals may be weak"); - Node p = tab.NewNode(typ, sym, t.line); + Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); .) [ Attribs

(. if (kind != id) SemErr("a literal must not have attributes"); .) @@ -351,24 +364,24 @@ Factor (. string name; int kind; Position pos; bool wea | '(' Expression ')' | '[' Expression ']' (. tab.MakeOption(g); .) | '{' Expression '}' (. tab.MakeIteration(g); .) -| SemText (. Node p = tab.NewNode(Node.sem, null, 0); +| SemText (. Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); .) -| "ANY" (. Node p = tab.NewNode(Node.any, null, 0); // p.set is set in tab.SetupAnys +| "ANY" (. Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); .) -| "SYNC" (. Node p = tab.NewNode(Node.sync, null, 0); +| "SYNC" (. Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); .) ) (. if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . /*------------------------------------------------------------------------------------*/ -Resolver +Resolver = "IF" "(" (. int beg = la.pos; int col = la.col; int line = la.line; .) Condition (. pos = new Position(beg, t.pos, col, line); .) @@ -413,10 +426,10 @@ TokenFactor (. string name; int kind; .) ( Sym (. if (kind == id) { CharClass c = tab.FindCharClass(name); if (c == null) { - SemErr("undefined name"); + SemErr("undefined name: " + name); c = tab.NewCharClass(name, new CharSet()); } - Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; + Node p = tab.NewNode(Node.clas, null, t.line, t.col); p.val = c.n; g = new Graph(p); tokenString = noString; } else { // str @@ -429,7 +442,7 @@ TokenFactor (. string name; int kind; .) | '[' TokenExpr ']' (. tab.MakeOption(g); tokenString = noString; .) | '{' TokenExpr '}' (. tab.MakeIteration(g); tokenString = noString; .) ) (. if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, 0)); .) + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . /*------------------------------------------------------------------------------------*/ @@ -451,12 +464,12 @@ Sym Attribs = '<' (. int beg = la.pos; int col = la.col; int line = la.line; .) - { ANY + { ANY | badString (. SemErr("bad string in attributes"); .) } '>' (. if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); .) | "<." (. int beg = la.pos; int col = la.col; int line = la.line; .) - { ANY + { ANY | badString (. SemErr("bad string in attributes"); .) } ".>" (. if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); .) diff --git a/src/Coco.cs b/src/Coco.cs index 385c3c6..438b8c3 100644 --- a/src/Coco.cs +++ b/src/Coco.cs @@ -4,24 +4,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- @@ -34,7 +34,7 @@ Trace output options 6 | S: prints the symbol table (terminals, nonterminals, pragmas) 7 | X: prints a cross reference list of all syntax symbols 8 | P: prints statistics about the Coco run - + Trace output can be switched on by the pragma $ { digit | letter } in the attributed grammar or as a command-line option @@ -46,12 +46,12 @@ in the attributed grammar or as a command-line option namespace at.jku.ssw.Coco { public class Coco { - + public static int Main (string[] arg) { Console.WriteLine("Coco/R (Apr 19, 2011)"); string srcName = null, nsName = null, frameDir = null, ddtString = null, traceFileName = null, outDir = null; - bool emitLines = false; + bool emitLines = false, ignoreErrors = false, genAST = false; int retVal = 1; for (int i = 0; i < arg.Length; i++) { if (arg[i] == "-namespace" && i < arg.Length - 1) nsName = arg[++i].Trim(); @@ -59,18 +59,22 @@ public static int Main (string[] arg) { else if (arg[i] == "-trace" && i < arg.Length - 1) ddtString = arg[++i].Trim(); else if (arg[i] == "-o" && i < arg.Length - 1) outDir = arg[++i].Trim(); else if (arg[i] == "-lines") emitLines = true; + else if (arg[i] == "-genAST") genAST = true; + else if (arg[i] == "-ignoreErrors") ignoreErrors = true; else srcName = arg[i]; } if (arg.Length > 0 && srcName != null) { try { string srcDir = Path.GetDirectoryName(srcName); - + Scanner scanner = new Scanner(srcName); Parser parser = new Parser(scanner); traceFileName = Path.Combine(srcDir, "trace.txt"); parser.trace = new StreamWriter(new FileStream(traceFileName, FileMode.Create)); parser.tab = new Tab(parser); + if(genAST) parser.tab.genAST = true; + if(ignoreErrors) parser.tab.ignoreErrors = true; parser.dfa = new DFA(parser); parser.pgen = new ParserGen(parser); @@ -88,7 +92,7 @@ public static int Main (string[] arg) { FileInfo f = new FileInfo(traceFileName); if (f.Length == 0) f.Delete(); else Console.WriteLine("trace output is in " + traceFileName); - Console.WriteLine("{0} errors detected", parser.errors.count); + Console.WriteLine("{0} error(s) detected", parser.errors.count); if (parser.errors.count == 0) { retVal = 0; } } catch (IOException) { Console.WriteLine("-- could not open " + traceFileName); @@ -103,6 +107,8 @@ public static int Main (string[] arg) { + " -trace {0}" + " -o {0}" + " -lines{0}" + + " -genAST" + + " -ignoreErrors ignore grammar errors for developing purposes" + "Valid characters in the trace string:{0}" + " A trace automaton{0}" + " F list first/follow sets{0}" @@ -118,7 +124,7 @@ public static int Main (string[] arg) { } return retVal; } - + } // end Coco } // end namespace diff --git a/src/Copyright.frame b/src/Copyright.frame new file mode 100644 index 0000000..a792aa6 --- /dev/null +++ b/src/Copyright.frame @@ -0,0 +1,26 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ diff --git a/src/DFA.cs b/src/DFA.cs index 51333d0..66174df 100644 --- a/src/DFA.cs +++ b/src/DFA.cs @@ -177,10 +177,30 @@ public class Range { public int from, to; public Range next; public Range(int from, int to) { this.from = from; this.to = to; } + + public override string ToString() { + if (from == to) + return from.ToString("X"); + if (from <= 256 && to <= 256) + return string.Format("{0:X2}-{1:X2}", from, to); + return string.Format("{0:X4}-{1:X4}", from, to); + } } public Range head; + public override string ToString() { + if (head == null) return "[]"; + StringBuilder sb = new StringBuilder(); + sb.Append('['); + for (Range cur = head; cur != null; cur = cur.next) { + if (cur != head) sb.Append('|'); + sb.Append(cur.ToString()); + } + sb.Append(']'); + return sb.ToString(); + } + public bool this[int i] { get { for (Range p = head; p != null; p = p.next) @@ -881,17 +901,15 @@ void GenComment(Comment com, int i) { gen.WriteLine(); gen.Write ("\tbool Comment{0}() ", i); gen.WriteLine("{"); gen.WriteLine("\t\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;"); + gen.WriteLine("\t\tNextCh();"); if (com.start.Length == 1) { - gen.WriteLine("\t\tNextCh();"); GenComBody(com); } else { - gen.WriteLine("\t\tNextCh();"); gen.Write ("\t\tif ({0}) ", ChCond(com.start[1])); gen.WriteLine("{"); gen.WriteLine("\t\t\tNextCh();"); GenComBody(com); - gen.WriteLine("\t\t} else {"); - gen.WriteLine("\t\t\tbuffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0;"); gen.WriteLine("\t\t}"); + gen.WriteLine("\t\tbuffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0;"); gen.WriteLine("\t\treturn false;"); } gen.WriteLine("\t}"); @@ -929,7 +947,7 @@ void WriteState(State state) { Symbol endOf = state.endOf; gen.WriteLine("\t\t\tcase {0}:", state.nr); if (endOf != null && state.firstAction != null) { - gen.WriteLine("\t\t\t\trecEnd = pos; recKind = {0};", endOf.n); + gen.WriteLine("\t\t\t\trecEnd = pos; recKind = {0} /* {1} */;", endOf.n, endOf.name); } bool ctxEnd = state.ctx; for (Action action = state.firstAction; action != null; action = action.next) { @@ -958,7 +976,7 @@ void WriteState(State state) { if (endOf == null) { gen.WriteLine("goto case 0;}"); } else { - gen.Write("t.kind = {0}; ", endOf.n); + gen.Write("t.kind = {0} /* {1} */; ", endOf.n, endOf.name); if (endOf.tokenKind == Symbol.classLitToken) { gen.WriteLine("t.val = new String(tval, 0, tlen); CheckLiteral(); return t;}"); } else { diff --git a/src/Parser.cs b/src/Parser.cs index bf5713f..77370a8 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -4,24 +4,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ using System.IO; @@ -41,14 +41,14 @@ public class Parser { public const int _string = 3; public const int _badString = 4; public const int _char = 5; - public const int maxT = 41; - public const int _ddtSym = 42; - public const int _optionSym = 43; + public const int maxT = 42; + public const int _ddtSym = 43; + public const int _optionSym = 44; const bool _T = true; const bool _x = false; const int minErrDist = 2; - + public Scanner scanner; public Errors errors; @@ -58,15 +58,17 @@ public class Parser { const int id = 0; const int str = 1; - + public TextWriter trace; // other Coco objects referenced in this ATG public Tab tab; public DFA dfa; public ParserGen pgen; bool genScanner; + bool ignoreGammarErrors = false; string tokenString; // used in declarations of literal tokens string noString = "-none-"; // used in declarations of literal tokens + string gramName; // grammar name /*-------------------------------------------------------------------------*/ @@ -86,31 +88,31 @@ public void SemErr (string msg) { if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); errDist = 0; } - + void Get () { for (;;) { t = la; la = scanner.Scan(); if (la.kind <= maxT) { ++errDist; break; } - if (la.kind == 42) { + if (la.kind == _ddtSym) { tab.SetDDT(la.val); - } - if (la.kind == 43) { + } + if (la.kind == _optionSym) { tab.SetOption(la.val); - } + } la = t; } } - + void Expect (int n) { if (la.kind==n) Get(); else { SynErr(n); } } - + bool StartOf (int s) { return set[s, la.kind]; } - + void ExpectWeak (int n, int follow) { if (la.kind == n) Get(); else { @@ -134,9 +136,9 @@ bool WeakSeparator(int n, int syFol, int repFol) { } } - + void Coco() { - Symbol sym; Graph g, g1, g2; string gramName; CharSet s; int beg, line; + Symbol sym; Graph g, g1, g2; CharSet s; int beg, line; if (StartOf(1)) { Get(); beg = t.pos; line = t.line; @@ -145,10 +147,10 @@ void Coco() { } pgen.usingPos = new Position(beg, la.pos, 0, line); } - Expect(6); - genScanner = true; + Expect(6 /* "COMPILER" */); + genScanner = true; tab.ignored = new CharSet(); - Expect(1); + Expect(_ident); gramName = t.val; beg = la.pos; line = la.line; @@ -156,56 +158,68 @@ void Coco() { Get(); } tab.semDeclPos = new Position(beg, la.pos, 0, line); - if (la.kind == 7) { + if (la.kind == 7 /* "IGNORECASE" */) { Get(); dfa.ignoreCase = true; } - if (la.kind == 8) { + if (la.kind == 8 /* "TERMINALS" */) { + Get(); + while (la.kind == _ident) { + Get(); + sym = tab.FindSym(t.val); + if (sym != null) SemErr("name declared twice"); + else { + sym = tab.NewSym(Node.t, t.val, t.line, t.col); + sym.tokenKind = Symbol.fixedToken; + } + } + } + if (la.kind == 9 /* "CHARACTERS" */) { Get(); - while (la.kind == 1) { + while (la.kind == _ident) { SetDecl(); } } - if (la.kind == 9) { + if (la.kind == 10 /* "TOKENS" */) { Get(); - while (la.kind == 1 || la.kind == 3 || la.kind == 5) { + while (la.kind == _ident || la.kind == _string || la.kind == _char) { TokenDecl(Node.t); } } - if (la.kind == 10) { + if (la.kind == 11 /* "PRAGMAS" */) { Get(); - while (la.kind == 1 || la.kind == 3 || la.kind == 5) { + while (la.kind == _ident || la.kind == _string || la.kind == _char) { TokenDecl(Node.pr); } } - while (la.kind == 11) { + while (la.kind == 12 /* "COMMENTS" */) { Get(); bool nested = false; - Expect(12); + Expect(13 /* "FROM" */); TokenExpr(out g1); - Expect(13); + Expect(14 /* "TO" */); TokenExpr(out g2); - if (la.kind == 14) { + if (la.kind == 15 /* "NESTED" */) { Get(); nested = true; } dfa.NewComment(g1.l, g2.l, nested); } - while (la.kind == 15) { + while (la.kind == 16 /* "IGNORE" */) { Get(); Set(out s); tab.ignored.Or(s); } - while (!(la.kind == 0 || la.kind == 16)) {SynErr(42); Get();} - Expect(16); + while (!(la.kind == _EOF || la.kind == 17 /* "PRODUCTIONS" */)) {SynErr(43); Get();} + Expect(17 /* "PRODUCTIONS" */); if (genScanner) dfa.MakeDeterministic(); tab.DeleteNodes(); - while (la.kind == 1) { + while (la.kind == _ident) { Get(); sym = tab.FindSym(t.val); bool undef = sym == null; - if (undef) sym = tab.NewSym(Node.nt, t.val, t.line); + if (undef) sym = tab.NewSym(Node.nt, t.val, t.line, t.col); else { if (sym.typ == Node.nt) { if (sym.graph != null) SemErr("name declared twice"); @@ -215,25 +229,25 @@ void Coco() { bool noAttrs = sym.attrPos == null; sym.attrPos = null; - if (la.kind == 24 || la.kind == 26) { + if (la.kind == 25 /* "<" */ || la.kind == 27 /* "<." */) { AttrDecl(sym); } if (!undef) if (noAttrs != (sym.attrPos == null)) SemErr("attribute mismatch between declaration and use of this symbol"); - if (la.kind == 39) { + if (la.kind == 40 /* "(." */) { SemText(out sym.semPos); } - ExpectWeak(17, 3); + ExpectWeak(18 /* "=" */, 3); Expression(out g); sym.graph = g.l; tab.Finish(g); - ExpectWeak(18, 4); + ExpectWeak(19 /* "." */, 4); } - Expect(19); - Expect(1); + Expect(20 /* "END" */); + Expect(_ident); if (gramName != t.val) SemErr("name does not match grammar name"); tab.gramSy = tab.FindSym(gramName); @@ -244,7 +258,7 @@ void Coco() { if (sym.attrPos != null) SemErr("grammar symbol must not have attributes"); } - tab.noSym = tab.NewSym(Node.t, "???", 0); // noSym gets highest number + tab.noSym = tab.NewSym(Node.t, "???", 0, 0); // noSym gets highest number tab.SetupAnys(); tab.RenumberPragmas(); if (tab.ddt[2]) tab.PrintNodes(); @@ -252,7 +266,13 @@ void Coco() { Console.WriteLine("checking"); tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); - if (tab.GrammarOk()) { + bool doGenCode = false; + if(ignoreGammarErrors) { + doGenCode = true; + tab.GrammarCheckAll(); + } + else doGenCode = tab.GrammarOk(); + if (doGenCode) { Console.Write("parser"); pgen.WriteParser(); if (genScanner) { @@ -266,22 +286,22 @@ void Coco() { } if (tab.ddt[6]) tab.PrintSymbolTable(); - Expect(18); + Expect(19 /* "." */); } void SetDecl() { CharSet s; - Expect(1); + Expect(_ident); string name = t.val; CharClass c = tab.FindCharClass(name); if (c != null) SemErr("name declared twice"); - Expect(17); + Expect(18 /* "=" */); Set(out s); if (s.Elements() == 0) SemErr("character set must not be empty"); tab.NewCharClass(name, s); - Expect(18); + Expect(19 /* "." */); } void TokenDecl(int typ) { @@ -290,16 +310,16 @@ void TokenDecl(int typ) { sym = tab.FindSym(name); if (sym != null) SemErr("name declared twice"); else { - sym = tab.NewSym(typ, name, t.line); + sym = tab.NewSym(typ, name, t.line, t.col); sym.tokenKind = Symbol.fixedToken; } tokenString = null; - while (!(StartOf(5))) {SynErr(43); Get();} - if (la.kind == 17) { + while (!(StartOf(5))) {SynErr(44); Get();} + if (la.kind == 18 /* "=" */) { Get(); TokenExpr(out g); - Expect(18); + Expect(19 /* "." */); if (kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); if (tokenString == null || tokenString.Equals(noString)) @@ -315,8 +335,8 @@ void TokenDecl(int typ) { if (kind == id) genScanner = false; else dfa.MatchLiteral(sym.name, sym); - } else SynErr(44); - if (la.kind == 39) { + } else SynErr(45); + if (la.kind == 40 /* "(." */) { SemText(out sym.semPos); if (typ != Node.pr) SemErr("semantic action not allowed here"); } @@ -326,7 +346,7 @@ void TokenExpr(out Graph g) { Graph g2; TokenTerm(out g); bool first = true; - while (WeakSeparator(28,7,8) ) { + while (WeakSeparator(29 /* "|" */,7,8) ) { TokenTerm(out g2); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); @@ -337,8 +357,8 @@ void TokenExpr(out Graph g) { void Set(out CharSet s) { CharSet s2; SimSet(out s); - while (la.kind == 20 || la.kind == 21) { - if (la.kind == 20) { + while (la.kind == 21 /* "+" */ || la.kind == 22 /* "-" */) { + if (la.kind == 21 /* "+" */) { Get(); SimSet(out s2); s.Or(s2); @@ -351,7 +371,7 @@ void Set(out CharSet s) { } void AttrDecl(Symbol sym) { - if (la.kind == 24) { + if (la.kind == 25 /* "<" */) { Get(); int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(9)) { @@ -362,10 +382,10 @@ void AttrDecl(Symbol sym) { SemErr("bad string in attributes"); } } - Expect(25); + Expect(26 /* ">" */); if (t.pos > beg) sym.attrPos = new Position(beg, t.pos, col, line); - } else if (la.kind == 26) { + } else if (la.kind == 27 /* "<." */) { Get(); int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(11)) { @@ -376,19 +396,19 @@ void AttrDecl(Symbol sym) { SemErr("bad string in attributes"); } } - Expect(27); + Expect(28 /* ".>" */); if (t.pos > beg) sym.attrPos = new Position(beg, t.pos, col, line); - } else SynErr(45); + } else SynErr(46); } void SemText(out Position pos) { - Expect(39); + Expect(40 /* "(." */); int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(13)) { if (StartOf(14)) { Get(); - } else if (la.kind == 4) { + } else if (la.kind == _badString) { Get(); SemErr("bad string in semantic action"); } else { @@ -396,7 +416,7 @@ void SemText(out Position pos) { SemErr("missing end of previous semantic action"); } } - Expect(40); + Expect(41 /* ".)" */); pos = new Position(beg, t.pos, col, line); } @@ -404,7 +424,7 @@ void Expression(out Graph g) { Graph g2; Term(out g); bool first = true; - while (WeakSeparator(28,15,16) ) { + while (WeakSeparator(29 /* "|" */,15,16) ) { Term(out g2); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); @@ -415,36 +435,34 @@ void Expression(out Graph g) { void SimSet(out CharSet s) { int n1, n2; s = new CharSet(); - if (la.kind == 1) { + if (la.kind == _ident) { Get(); CharClass c = tab.FindCharClass(t.val); if (c == null) SemErr("undefined name"); else s.Or(c.set); - } else if (la.kind == 3) { + } else if (la.kind == _string) { Get(); - string name = t.val; - name = tab.Unescape(name.Substring(1, name.Length-2)); + string name = tab.Unstring(t.val); foreach (char ch in name) if (dfa.ignoreCase) s.Set(char.ToLower(ch)); else s.Set(ch); - } else if (la.kind == 5) { + } else if (la.kind == _char) { Char(out n1); s.Set(n1); - if (la.kind == 22) { + if (la.kind == 23 /* ".." */) { Get(); Char(out n2); for (int i = n1; i <= n2; i++) s.Set(i); } - } else if (la.kind == 23) { + } else if (la.kind == 24 /* "ANY" */) { Get(); s = new CharSet(); s.Fill(); - } else SynErr(46); + } else SynErr(47); } void Char(out int n) { - Expect(5); - string name = t.val; n = 0; - name = tab.Unescape(name.Substring(1, name.Length-2)); + Expect(_char); + string name = tab.Unstring(t.val); n = 0; if (name.Length == 1) n = name[0]; else SemErr("unacceptable character value"); if (dfa.ignoreCase && (char)n >= 'A' && (char)n <= 'Z') n += 32; @@ -453,11 +471,11 @@ void Char(out int n) { void Sym(out string name, out int kind) { name = "???"; kind = id; - if (la.kind == 1) { + if (la.kind == _ident) { Get(); kind = id; name = t.val; - } else if (la.kind == 3 || la.kind == 5) { - if (la.kind == 3) { + } else if (la.kind == _string || la.kind == _char) { + if (la.kind == _string) { Get(); name = t.val; } else { @@ -468,14 +486,14 @@ void Sym(out string name, out int kind) { if (dfa.ignoreCase) name = name.ToLower(); if (name.IndexOf(' ') >= 0) SemErr("literal tokens must not contain blanks"); - } else SynErr(47); + } else SynErr(48); } void Term(out Graph g) { Graph g2; Node rslv = null; g = null; if (StartOf(17)) { - if (la.kind == 37) { - rslv = tab.NewNode(Node.rslv, null, la.line); + if (la.kind == 38 /* "IF" */) { + rslv = tab.NewNode(Node.rslv, null, la.line, la.col); Resolver(out rslv.pos); g = new Graph(rslv); } @@ -488,28 +506,28 @@ void Term(out Graph g) { tab.MakeSequence(g, g2); } } else if (StartOf(19)) { - g = new Graph(tab.NewNode(Node.eps, null, 0)); - } else SynErr(48); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); + } else SynErr(49); if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); } void Resolver(out Position pos) { - Expect(37); - Expect(30); + Expect(38 /* "IF" */); + Expect(31 /* "(" */); int beg = la.pos; int col = la.col; int line = la.line; Condition(); pos = new Position(beg, t.pos, col, line); } void Factor(out Graph g) { - string name; int kind; Position pos; bool weak = false; + string name; int kind; Position pos; bool weak = false; g = null; switch (la.kind) { - case 1: case 3: case 5: case 29: { - if (la.kind == 29) { + case _ident: case _string: case _char: case 30 /* "WEAK" */: { + if (la.kind == 30 /* "WEAK" */) { Get(); weak = true; } @@ -520,9 +538,9 @@ void Factor(out Graph g) { bool undef = sym == null; if (undef) { if (kind == id) - sym = tab.NewSym(Node.nt, name, 0); // forward nt - else if (genScanner) { - sym = tab.NewSym(Node.t, name, t.line); + sym = tab.NewSym(Node.nt, name, 0, 0); // forward nt + else if (genScanner) { + sym = tab.NewSym(Node.t, name, t.line, t.col); dfa.MatchLiteral(sym.name, sym); } else { // undefined string in production SemErr("undefined string in production"); @@ -535,10 +553,10 @@ void Factor(out Graph g) { if (weak) if (typ == Node.t) typ = Node.wt; else SemErr("only terminals may be weak"); - Node p = tab.NewNode(typ, sym, t.line); + Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); - if (la.kind == 24 || la.kind == 26) { + if (la.kind == 25 /* "<" */ || la.kind == 27 /* "<." */) { Attribs(p); if (kind != id) SemErr("a literal must not have attributes"); } @@ -549,57 +567,57 @@ void Factor(out Graph g) { break; } - case 30: { + case 31 /* "(" */: { Get(); Expression(out g); - Expect(31); + Expect(32 /* ")" */); break; } - case 32: { + case 33 /* "[" */: { Get(); Expression(out g); - Expect(33); + Expect(34 /* "]" */); tab.MakeOption(g); break; } - case 34: { + case 35 /* "{" */: { Get(); Expression(out g); - Expect(35); + Expect(36 /* "}" */); tab.MakeIteration(g); break; } - case 39: { + case 40 /* "(." */: { SemText(out pos); - Node p = tab.NewNode(Node.sem, null, 0); + Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); break; } - case 23: { + case 24 /* "ANY" */: { Get(); - Node p = tab.NewNode(Node.any, null, 0); // p.set is set in tab.SetupAnys + Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); break; } - case 36: { + case 37 /* "SYNC" */: { Get(); - Node p = tab.NewNode(Node.sync, null, 0); + Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); break; } - default: SynErr(49); break; + default: SynErr(50); break; } if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); } void Attribs(Node p) { - if (la.kind == 24) { + if (la.kind == 25 /* "<" */) { Get(); int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(9)) { @@ -610,9 +628,9 @@ void Attribs(Node p) { SemErr("bad string in attributes"); } } - Expect(25); + Expect(26 /* ">" */); if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); - } else if (la.kind == 26) { + } else if (la.kind == 27 /* "<." */) { Get(); int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(11)) { @@ -623,21 +641,21 @@ void Attribs(Node p) { SemErr("bad string in attributes"); } } - Expect(27); + Expect(28 /* ".>" */); if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); - } else SynErr(50); + } else SynErr(51); } void Condition() { while (StartOf(20)) { - if (la.kind == 30) { + if (la.kind == 31 /* "(" */) { Get(); Condition(); } else { Get(); } } - Expect(31); + Expect(32 /* ")" */); } void TokenTerm(out Graph g) { @@ -647,28 +665,28 @@ void TokenTerm(out Graph g) { TokenFactor(out g2); tab.MakeSequence(g, g2); } - if (la.kind == 38) { + if (la.kind == 39 /* "CONTEXT" */) { Get(); - Expect(30); + Expect(31 /* "(" */); TokenExpr(out g2); tab.SetContextTrans(g2.l); dfa.hasCtxMoves = true; tab.MakeSequence(g, g2); - Expect(31); + Expect(32 /* ")" */); } } void TokenFactor(out Graph g) { string name; int kind; g = null; - if (la.kind == 1 || la.kind == 3 || la.kind == 5) { + if (la.kind == _ident || la.kind == _string || la.kind == _char) { Sym(out name, out kind); if (kind == id) { CharClass c = tab.FindCharClass(name); if (c == null) { - SemErr("undefined name"); + SemErr("undefined name: " + name); c = tab.NewCharClass(name, new CharSet()); } - Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; + Node p = tab.NewNode(Node.clas, null, t.line, t.col); p.val = c.n; g = new Graph(p); tokenString = noString; } else { // str @@ -677,58 +695,58 @@ void TokenFactor(out Graph g) { else tokenString = noString; } - } else if (la.kind == 30) { + } else if (la.kind == 31 /* "(" */) { Get(); TokenExpr(out g); - Expect(31); - } else if (la.kind == 32) { + Expect(32 /* ")" */); + } else if (la.kind == 33 /* "[" */) { Get(); TokenExpr(out g); - Expect(33); + Expect(34 /* "]" */); tab.MakeOption(g); tokenString = noString; - } else if (la.kind == 34) { + } else if (la.kind == 35 /* "{" */) { Get(); TokenExpr(out g); - Expect(35); + Expect(36 /* "}" */); tab.MakeIteration(g); tokenString = noString; - } else SynErr(51); + } else SynErr(52); if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); } public void Parse() { la = new Token(); - la.val = ""; + la.val = ""; Get(); Coco(); Expect(0); } - + static readonly bool[,] set = { - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _T,_T,_T,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _T,_T,_T,_x, _T,_x,_T,_x, _T,_T,_x,_T, _x,_x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_T,_T, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _x,_T,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_T,_T,_x, _T,_x,_T,_x, _T,_T,_x,_T, _x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_T,_T,_x, _T,_x,_T,_x, _T,_x,_x,_T, _x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_T, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x} + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _x,_T,_T,_T, _x,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_T, _x,_x,_x,_x, _T,_x,_x,_x, _x,_T,_T,_T, _x,_T,_x,_T, _x,_T,_T,_x, _T,_x,_x,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_x,_T,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _T,_x,_x,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_T,_T, _x,_T,_x,_T, _x,_T,_T,_x, _T,_x,_x,_x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_T,_T, _x,_T,_x,_T, _x,_T,_x,_x, _T,_x,_x,_x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _T,_x,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x} }; } // end Parser @@ -750,50 +768,51 @@ public virtual void SynErr (int line, int col, int n) { case 5: s = "char expected"; break; case 6: s = "\"COMPILER\" expected"; break; case 7: s = "\"IGNORECASE\" expected"; break; - case 8: s = "\"CHARACTERS\" expected"; break; - case 9: s = "\"TOKENS\" expected"; break; - case 10: s = "\"PRAGMAS\" expected"; break; - case 11: s = "\"COMMENTS\" expected"; break; - case 12: s = "\"FROM\" expected"; break; - case 13: s = "\"TO\" expected"; break; - case 14: s = "\"NESTED\" expected"; break; - case 15: s = "\"IGNORE\" expected"; break; - case 16: s = "\"PRODUCTIONS\" expected"; break; - case 17: s = "\"=\" expected"; break; - case 18: s = "\".\" expected"; break; - case 19: s = "\"END\" expected"; break; - case 20: s = "\"+\" expected"; break; - case 21: s = "\"-\" expected"; break; - case 22: s = "\"..\" expected"; break; - case 23: s = "\"ANY\" expected"; break; - case 24: s = "\"<\" expected"; break; - case 25: s = "\">\" expected"; break; - case 26: s = "\"<.\" expected"; break; - case 27: s = "\".>\" expected"; break; - case 28: s = "\"|\" expected"; break; - case 29: s = "\"WEAK\" expected"; break; - case 30: s = "\"(\" expected"; break; - case 31: s = "\")\" expected"; break; - case 32: s = "\"[\" expected"; break; - case 33: s = "\"]\" expected"; break; - case 34: s = "\"{\" expected"; break; - case 35: s = "\"}\" expected"; break; - case 36: s = "\"SYNC\" expected"; break; - case 37: s = "\"IF\" expected"; break; - case 38: s = "\"CONTEXT\" expected"; break; - case 39: s = "\"(.\" expected"; break; - case 40: s = "\".)\" expected"; break; - case 41: s = "??? expected"; break; - case 42: s = "this symbol not expected in Coco"; break; - case 43: s = "this symbol not expected in TokenDecl"; break; - case 44: s = "invalid TokenDecl"; break; - case 45: s = "invalid AttrDecl"; break; - case 46: s = "invalid SimSet"; break; - case 47: s = "invalid Sym"; break; - case 48: s = "invalid Term"; break; - case 49: s = "invalid Factor"; break; - case 50: s = "invalid Attribs"; break; - case 51: s = "invalid TokenFactor"; break; + case 8: s = "\"TERMINALS\" expected"; break; + case 9: s = "\"CHARACTERS\" expected"; break; + case 10: s = "\"TOKENS\" expected"; break; + case 11: s = "\"PRAGMAS\" expected"; break; + case 12: s = "\"COMMENTS\" expected"; break; + case 13: s = "\"FROM\" expected"; break; + case 14: s = "\"TO\" expected"; break; + case 15: s = "\"NESTED\" expected"; break; + case 16: s = "\"IGNORE\" expected"; break; + case 17: s = "\"PRODUCTIONS\" expected"; break; + case 18: s = "\"=\" expected"; break; + case 19: s = "\".\" expected"; break; + case 20: s = "\"END\" expected"; break; + case 21: s = "\"+\" expected"; break; + case 22: s = "\"-\" expected"; break; + case 23: s = "\"..\" expected"; break; + case 24: s = "\"ANY\" expected"; break; + case 25: s = "\"<\" expected"; break; + case 26: s = "\">\" expected"; break; + case 27: s = "\"<.\" expected"; break; + case 28: s = "\".>\" expected"; break; + case 29: s = "\"|\" expected"; break; + case 30: s = "\"WEAK\" expected"; break; + case 31: s = "\"(\" expected"; break; + case 32: s = "\")\" expected"; break; + case 33: s = "\"[\" expected"; break; + case 34: s = "\"]\" expected"; break; + case 35: s = "\"{\" expected"; break; + case 36: s = "\"}\" expected"; break; + case 37: s = "\"SYNC\" expected"; break; + case 38: s = "\"IF\" expected"; break; + case 39: s = "\"CONTEXT\" expected"; break; + case 40: s = "\"(.\" expected"; break; + case 41: s = "\".)\" expected"; break; + case 42: s = "??? expected"; break; + case 43: s = "this symbol not expected in Coco"; break; + case 44: s = "this symbol not expected in TokenDecl"; break; + case 45: s = "invalid TokenDecl"; break; + case 46: s = "invalid AttrDecl"; break; + case 47: s = "invalid SimSet"; break; + case 48: s = "invalid Sym"; break; + case 49: s = "invalid Term"; break; + case 50: s = "invalid Factor"; break; + case 51: s = "invalid Attribs"; break; + case 52: s = "invalid TokenFactor"; break; default: s = "error " + n; break; } @@ -805,16 +824,16 @@ public virtual void SemErr (int line, int col, string s) { errorStream.WriteLine(errMsgFormat, line, col, s); count++; } - + public virtual void SemErr (string s) { errorStream.WriteLine(s); count++; } - + public virtual void Warning (int line, int col, string s) { errorStream.WriteLine(errMsgFormat, line, col, s); } - + public virtual void Warning(string s) { errorStream.WriteLine(s); } diff --git a/src/Parser.frame b/src/Parser.frame index 6878147..e8932d3 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -4,24 +4,24 @@ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. ----------------------------------------------------------------------*/ -->begin @@ -34,7 +34,7 @@ public class Parser { const bool _T = true; const bool _x = false; const int minErrDist = 2; - + public Scanner scanner; public Errors errors; @@ -58,7 +58,7 @@ public class Parser { if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); errDist = 0; } - + void Get () { for (;;) { t = la; @@ -68,15 +68,15 @@ public class Parser { la = t; } } - + void Expect (int n) { if (la.kind==n) Get(); else { SynErr(n); } } - + bool StartOf (int s) { return set[s, la.kind]; } - + void ExpectWeak (int n, int follow) { if (la.kind == n) Get(); else { @@ -100,16 +100,16 @@ public class Parser { } } - + -->productions public void Parse() { la = new Token(); - la.val = ""; + la.val = ""; Get(); -->parseRoot } - + static readonly bool[,] set = { -->initialization }; @@ -135,16 +135,16 @@ public class Errors { errorStream.WriteLine(errMsgFormat, line, col, s); count++; } - + public virtual void SemErr (string s) { errorStream.WriteLine(s); count++; } - + public virtual void Warning (int line, int col, string s) { errorStream.WriteLine(errMsgFormat, line, col, s); } - + public virtual void Warning(string s) { errorStream.WriteLine(s); } diff --git a/src/ParserGen.cs b/src/ParserGen.cs index cfcb814..62e03c5 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ using System; @@ -42,7 +42,7 @@ public class ParserGen { const int tErr = 0; // error codes const int altErr = 1; const int syncErr = 2; - + public Position usingPos; // "using" definitions from the attributed grammar int errorNr; // highest parser error number @@ -51,17 +51,17 @@ public class ParserGen { StreamWriter gen; // generated parser source file StringWriter err; // generated parser error messages ArrayList symSet = new ArrayList(); - + Tab tab; // other Coco objects TextWriter trace; Errors errors; Buffer buffer; - + void Indent (int n) { for (int i = 1; i <= n; i++) gen.Write('\t'); } - - + + bool Overlaps(BitArray s1, BitArray s2) { int len = s1.Count; for (int i = 0; i < len; ++i) { @@ -71,7 +71,15 @@ bool Overlaps(BitArray s1, BitArray s2) { } return false; } - + + void WriteSymbolOrCode(Symbol sym) { + if (!Char.IsLetter(sym.name[0])) { + gen.Write("{0} /* {1} */", sym.n, sym.name); + } else { + gen.Write("_{0}", sym.name); + } + } + // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning bool UseSwitch (Node p) { BitArray s1, s2; @@ -106,7 +114,7 @@ void CopySourcePart (Position pos, int indent) { gen.WriteLine(); Indent(indent); if (ch == CR) ch = buffer.Read(); // skip CR if (ch == LF) ch = buffer.Read(); // skip LF - for (i = 1; i <= pos.col && (ch == ' ' || ch == '\t'); i++) { + for (i = 1; i <= pos.col && (ch == ' ' || ch == '\t'); i++) { // skip blanks at beginning of line ch = buffer.Read(); } @@ -124,23 +132,23 @@ void GenErrorMsg (int errTyp, Symbol sym) { errorNr++; err.Write("\t\t\tcase " + errorNr + ": s = \""); switch (errTyp) { - case tErr: + case tErr: if (sym.name[0] == '"') err.Write(tab.Escape(sym.name) + " expected"); - else err.Write(sym.name + " expected"); + else err.Write(sym.name + " expected"); break; case altErr: err.Write("invalid " + sym.name); break; case syncErr: err.Write("this symbol not expected in " + sym.name); break; } err.WriteLine("\"; break;"); } - + int NewCondSet (BitArray s) { for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets) if (Sets.Equals(s, (BitArray)symSet[i])) return i; symSet.Add(s.Clone()); return symSet.Count - 1; } - + void GenCond (BitArray s, Node p) { if (p.typ == Node.rslv) CopySourcePart(p.pos, 0); else { @@ -149,7 +157,8 @@ void GenCond (BitArray s, Node p) { else if (n <= maxTerm) foreach (Symbol sym in tab.terminals) { if (s[sym.n]) { - gen.Write("la.kind == {0}", sym.n); + gen.Write("la.kind == "); + WriteSymbolOrCode(sym); --n; if (n > 0) gen.Write(" || "); } @@ -158,12 +167,16 @@ void GenCond (BitArray s, Node p) { gen.Write("StartOf({0})", NewCondSet(s)); } } - + void PutCaseLabels (BitArray s) { foreach (Symbol sym in tab.terminals) - if (s[sym.n]) gen.Write("case {0}: ", sym.n); + if (s[sym.n]) { + gen.Write("case "); + WriteSymbolOrCode(sym); + gen.Write(": "); + } } - + void GenCode (Node p, int indent, BitArray isChecked) { Node p2; BitArray s1, s2; @@ -180,14 +193,20 @@ void GenCode (Node p, int indent, BitArray isChecked) { Indent(indent); // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n if (isChecked[p.sym.n]) gen.WriteLine("Get();"); - else gen.WriteLine("Expect({0});", p.sym.n); + else { + gen.Write("Expect("); + WriteSymbolOrCode(p.sym); + gen.WriteLine(");"); + } break; } case Node.wt: { Indent(indent); s1 = tab.Expected(p.next, curSy); s1.Or(tab.allSyncSets); - gen.WriteLine("ExpectWeak({0}, {1});", p.sym.n, NewCondSet(s1)); + gen.Write("ExpectWeak("); + WriteSymbolOrCode(p.sym); + gen.WriteLine(", {0});", NewCondSet(s1)); break; } case Node.any: { @@ -227,13 +246,13 @@ void GenCode (Node p, int indent, BitArray isChecked) { while (p2 != null) { s1 = tab.Expected(p2.sub, curSy); Indent(indent); - if (useSwitch) { + if (useSwitch) { PutCaseLabels(s1); gen.WriteLine("{"); - } else if (p2 == p) { - gen.Write("if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); + } else if (p2 == p) { + gen.Write("if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); } else if (p2.down == null && equal) { gen.WriteLine("} else {"); - } else { - gen.Write("} else if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); + } else { + gen.Write("} else if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); } GenCode(p2.sub, indent + 1, s1); if (useSwitch) { @@ -263,7 +282,9 @@ void GenCode (Node p, int indent, BitArray isChecked) { if (p2.typ == Node.wt) { s1 = tab.Expected(p2.next, curSy); s2 = tab.Expected(p.next, curSy); - gen.Write("WeakSeparator({0},{1},{2}) ", p2.sym.n, NewCondSet(s1), NewCondSet(s2)); + gen.Write("WeakSeparator("); + WriteSymbolOrCode(p2.sym); + gen.Write(",{0},{1}) ", NewCondSet(s1), NewCondSet(s2)); s1 = new BitArray(tab.terminals.Count); // for inner structure if (p2.up || p2.next == null) p2 = null; else p2 = p2.next; } else { @@ -283,20 +304,20 @@ void GenCode (Node p, int indent, BitArray isChecked) { Indent(indent); gen.WriteLine("}"); break; } - if (p.typ != Node.eps && p.typ != Node.sem && p.typ != Node.sync) + if (p.typ != Node.eps && p.typ != Node.sem && p.typ != Node.sync) isChecked.SetAll(false); // = new BitArray(tab.terminals.Count); if (p.up) break; p = p.next; } } - + void GenTokens() { foreach (Symbol sym in tab.terminals) { if (Char.IsLetter(sym.name[0])) gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n); } } - + void GenPragmas() { foreach (Symbol sym in tab.pragmas) { gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n); @@ -305,9 +326,11 @@ void GenPragmas() { void GenCodePragmas() { foreach (Symbol sym in tab.pragmas) { - gen.WriteLine("\t\t\t\tif (la.kind == {0}) {{", sym.n); + gen.Write("\t\t\tif (la.kind == "); + WriteSymbolOrCode(sym); + gen.WriteLine(") {"); CopySourcePart(sym.semPos, 4); - gen.WriteLine("\t\t\t\t}"); + gen.WriteLine("\t\t\t}"); } } @@ -346,7 +369,7 @@ public void WriteParser () { gen = g.OpenGen("Parser.cs"); err = new StringWriter(); foreach (Symbol sym in tab.terminals) GenErrorMsg(tErr, sym); - + g.GenCopyright(); g.SkipFramePart("-->begin"); @@ -373,7 +396,7 @@ public void WriteParser () { gen.Close(); buffer.Pos = oldPos; } - + public void WriteStatistics () { trace.WriteLine(); trace.WriteLine("{0} terminals", tab.terminals.Count); diff --git a/src/Scanner.cs b/src/Scanner.cs index 0443bb9..33f0e1d 100644 --- a/src/Scanner.cs +++ b/src/Scanner.cs @@ -4,24 +4,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ @@ -229,8 +229,8 @@ public override int Read() { public class Scanner { const char EOL = '\n'; const int eofSym = 0; /* pdt */ - const int maxT = 41; - const int noSym = 41; + const int maxT = 42; + const int noSym = 42; public Buffer buffer; // scanner buffer @@ -348,9 +348,8 @@ bool Comment0() { } else if (ch == Buffer.EOF) return false; else NextCh(); } - } else { - buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -375,9 +374,8 @@ bool Comment1() { } else if (ch == Buffer.EOF) return false; else NextCh(); } - } else { - buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -386,21 +384,22 @@ void CheckLiteral() { switch (t.val) { case "COMPILER": t.kind = 6; break; case "IGNORECASE": t.kind = 7; break; - case "CHARACTERS": t.kind = 8; break; - case "TOKENS": t.kind = 9; break; - case "PRAGMAS": t.kind = 10; break; - case "COMMENTS": t.kind = 11; break; - case "FROM": t.kind = 12; break; - case "TO": t.kind = 13; break; - case "NESTED": t.kind = 14; break; - case "IGNORE": t.kind = 15; break; - case "PRODUCTIONS": t.kind = 16; break; - case "END": t.kind = 19; break; - case "ANY": t.kind = 23; break; - case "WEAK": t.kind = 29; break; - case "SYNC": t.kind = 36; break; - case "IF": t.kind = 37; break; - case "CONTEXT": t.kind = 38; break; + case "TERMINALS": t.kind = 8; break; + case "CHARACTERS": t.kind = 9; break; + case "TOKENS": t.kind = 10; break; + case "PRAGMAS": t.kind = 11; break; + case "COMMENTS": t.kind = 12; break; + case "FROM": t.kind = 13; break; + case "TO": t.kind = 14; break; + case "NESTED": t.kind = 15; break; + case "IGNORE": t.kind = 16; break; + case "PRODUCTIONS": t.kind = 17; break; + case "END": t.kind = 20; break; + case "ANY": t.kind = 24; break; + case "WEAK": t.kind = 30; break; + case "SYNC": t.kind = 37; break; + case "IF": t.kind = 38; break; + case "CONTEXT": t.kind = 39; break; default: break; } } @@ -428,17 +427,17 @@ Token NextToken() { t.kind = recKind; break; } // NextCh already done case 1: - recEnd = pos; recKind = 1; + recEnd = pos; recKind = 1 /* ident */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 1;} - else {t.kind = 1; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} + else {t.kind = 1 /* ident */; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} case 2: - recEnd = pos; recKind = 2; + recEnd = pos; recKind = 2 /* number */; if (ch >= '0' && ch <= '9') {AddCh(); goto case 2;} - else {t.kind = 2; break;} + else {t.kind = 2 /* number */; break;} case 3: - {t.kind = 3; break;} + {t.kind = 3 /* string */; break;} case 4: - {t.kind = 4; break;} + {t.kind = 4 /* badString */; break;} case 5: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); goto case 6;} else if (ch == 92) {AddCh(); goto case 7;} @@ -454,15 +453,15 @@ Token NextToken() { else if (ch == 39) {AddCh(); goto case 9;} else {goto case 0;} case 9: - {t.kind = 5; break;} + {t.kind = 5 /* char */; break;} case 10: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 43 /* ddtSym */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 10;} - else {t.kind = 42; break;} + else {t.kind = 43 /* ddtSym */; break;} case 11: - recEnd = pos; recKind = 43; + recEnd = pos; recKind = 44 /* optionSym */; if (ch >= '-' && ch <= '.' || ch >= '0' && ch <= ':' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 11;} - else {t.kind = 43; break;} + else {t.kind = 44 /* optionSym */; break;} case 12: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); goto case 12;} else if (ch == 10 || ch == 13) {AddCh(); goto case 4;} @@ -470,63 +469,63 @@ Token NextToken() { else if (ch == 92) {AddCh(); goto case 14;} else {goto case 0;} case 13: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 43 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); goto case 10;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 15;} - else {t.kind = 42; break;} + else {t.kind = 43 /* ddtSym */; break;} case 14: if (ch >= ' ' && ch <= '~') {AddCh(); goto case 12;} else {goto case 0;} case 15: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 43 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); goto case 10;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 15;} else if (ch == '=') {AddCh(); goto case 11;} - else {t.kind = 42; break;} + else {t.kind = 43 /* ddtSym */; break;} case 16: - {t.kind = 17; break;} + {t.kind = 18 /* "=" */; break;} case 17: - {t.kind = 20; break;} + {t.kind = 21 /* "+" */; break;} case 18: - {t.kind = 21; break;} + {t.kind = 22 /* "-" */; break;} case 19: - {t.kind = 22; break;} + {t.kind = 23 /* ".." */; break;} case 20: - {t.kind = 25; break;} + {t.kind = 26 /* ">" */; break;} case 21: - {t.kind = 26; break;} + {t.kind = 27 /* "<." */; break;} case 22: - {t.kind = 27; break;} + {t.kind = 28 /* ".>" */; break;} case 23: - {t.kind = 28; break;} + {t.kind = 29 /* "|" */; break;} case 24: - {t.kind = 31; break;} + {t.kind = 32 /* ")" */; break;} case 25: - {t.kind = 32; break;} + {t.kind = 33 /* "[" */; break;} case 26: - {t.kind = 33; break;} + {t.kind = 34 /* "]" */; break;} case 27: - {t.kind = 34; break;} + {t.kind = 35 /* "{" */; break;} case 28: - {t.kind = 35; break;} + {t.kind = 36 /* "}" */; break;} case 29: - {t.kind = 39; break;} + {t.kind = 40 /* "(." */; break;} case 30: - {t.kind = 40; break;} + {t.kind = 41 /* ".)" */; break;} case 31: - recEnd = pos; recKind = 18; + recEnd = pos; recKind = 19 /* "." */; if (ch == '.') {AddCh(); goto case 19;} else if (ch == '>') {AddCh(); goto case 22;} else if (ch == ')') {AddCh(); goto case 30;} - else {t.kind = 18; break;} + else {t.kind = 19 /* "." */; break;} case 32: - recEnd = pos; recKind = 24; + recEnd = pos; recKind = 25 /* "<" */; if (ch == '.') {AddCh(); goto case 21;} - else {t.kind = 24; break;} + else {t.kind = 25 /* "<" */; break;} case 33: - recEnd = pos; recKind = 30; + recEnd = pos; recKind = 31 /* "(" */; if (ch == '.') {AddCh(); goto case 29;} - else {t.kind = 30; break;} + else {t.kind = 31 /* "(" */; break;} } t.val = new String(tval, 0, tlen); diff --git a/src/Tab.cs b/src/Tab.cs index d11403b..34abc45 100644 --- a/src/Tab.cs +++ b/src/Tab.cs @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ using System; @@ -37,7 +37,7 @@ public class Position { // position of source code stretch (e.g. semantic actio public readonly int end; // end of stretch public readonly int col; // column number of start position public readonly int line; // line number of start position - + public Position(int beg, int end, int col, int line) { this.beg = beg; this.end = end; this.col = col; this.line = line; } @@ -47,15 +47,15 @@ public Position(int beg, int end, int col, int line) { //===================================================================== // Symbol //===================================================================== - + public class Symbol { - + // token kinds public const int fixedToken = 0; // e.g. 'a' ('b' | 'c') (structure of literals) public const int classToken = 1; // e.g. digit {digit} (at least one char class) public const int litToken = 2; // e.g. "while" public const int classLitToken = 3; // e.g. letter {letter} but without literals that have the same structure - + public int n; // symbol number public int typ; // t, nt, pr, unknown, rslv /* ML 29_11_2002 slv added */ /* AW slv --> rslv */ public string name; // symbol name @@ -67,12 +67,13 @@ public class Symbol { public BitArray follow; // nt: terminal followers public BitArray nts; // nt: nonterminals whose followers have to be added to this sym public int line; // source text line number of item in this node + public int col; // source text line column number of item in this node public Position attrPos; // nt: position of attributes in source text (or null) public Position semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) - public Symbol(int typ, string name, int line) { - this.typ = typ; this.name = name; this.line = line; + public Symbol(int typ, string name, int line, int col) { + this.typ = typ; this.name = name; this.line = line; this.col = col; } } @@ -89,7 +90,7 @@ public class Node { public const int clas = 4; // character class public const int chr = 5; // character public const int wt = 6; // weak terminal symbol - public const int any = 7; // + public const int any = 7; // public const int eps = 8; // empty public const int sync = 9; // synchronization symbol public const int sem = 10; // semantic action: (. .) @@ -97,10 +98,10 @@ public class Node { public const int iter = 12; // iteration: { } public const int opt = 13; // option: [ ] public const int rslv = 14; // resolver expr - + public const int normalTrans = 0; // transition codes public const int contextTrans = 1; - + public int n; // node number public int typ; // t, nt, wt, chr, clas, any, eps, sem, sync, alt, iter, opt, rslv public Node next; // to successor node @@ -113,44 +114,45 @@ public class Node { public int code; // chr, clas: transition code public BitArray set; // any, sync: the set represented by this node public Position pos; // nt, t, wt: pos of actual attributes - // sem: pos of semantic action in source text - // rslv: pos of resolver in source text - public int line; // source text line number of item in this node - public State state; // DFA state corresponding to this node + // sem: pos of semantic action in source text + // rslv: pos of resolver in source text + public int line; // source text line number of item in this node + public int col; // source text line column number of item in this node + public State state; // DFA state corresponding to this node // (only used in DFA.ConvertToStates) - public Node(int typ, Symbol sym, int line) { - this.typ = typ; this.sym = sym; this.line = line; + public Node(int typ, Symbol sym, int line, int col) { + this.typ = typ; this.sym = sym; this.line = line; this.col = col; } } //===================================================================== -// Graph +// Graph //===================================================================== -public class Graph { +public class Graph { public Node l; // left end of graph = head public Node r; // right end of graph = list of nodes to be linked to successor graph - + public Graph() { l = null; r = null; } - + public Graph(Node left, Node right) { l = left; r = right; } - + public Graph(Node p) { l = p; r = p; } } //===================================================================== -// Sets +// Sets //===================================================================== public class Sets { - + public static int Elements(BitArray s) { int max = s.Count; int n = 0; @@ -158,33 +160,33 @@ public static int Elements(BitArray s) { if (s[i]) n++; return n; } - + public static bool Equals(BitArray a, BitArray b) { int max = a.Count; for (int i=0; i= 127 || ch == '\'' || ch == '\\') return ch.ToString(); else return String.Format("'{0}'", (char)ch); } - + void WriteCharSet(CharSet s) { for (CharSet.Range r = s.head; r != null; r = r.next) if (r.from < r.to) { trace.Write(Ch(r.from) + ".." + Ch(r.to) + " "); } else { trace.Write(Ch(r.from) + " "); } } - + public void WriteCharClasses () { foreach (CharClass c in classes) { trace.Write("{0,-10}: ", c.name); @@ -554,7 +562,7 @@ public void WriteCharClasses () { } trace.WriteLine(); } - + //--------------------------------------------------------------------- // Symbol set computations @@ -592,11 +600,11 @@ BitArray First0(Node p, BitArray mark) { } return fs; } - + public BitArray First(Node p) { BitArray fs = First0(p, new BitArray(nodes.Count)); if (ddt[3]) { - trace.WriteLine(); + trace.WriteLine(); if (p != null) trace.WriteLine("First: node = {0}", p.n); else trace.WriteLine("First: node = null"); PrintSet(fs, 0); @@ -614,7 +622,7 @@ void CompFirstSets() { sym.firstReady = true; } } - + void CompFollow(Node p) { while (p != null && !visited[p.n]) { visited[p.n] = true; @@ -631,7 +639,7 @@ void CompFollow(Node p) { p = p.next; } } - + void Complete(Symbol sym) { if (!visited[sym.n]) { visited[sym.n] = true; @@ -644,7 +652,7 @@ void Complete(Symbol sym) { } } } - + void CompFollowSets() { foreach (Symbol sym in nonterminals) { sym.follow = new BitArray(terminals.Count); @@ -662,7 +670,7 @@ void CompFollowSets() { Complete(sym); } } - + Node LeadingAny(Node p) { if (p == null) return null; Node a = null; @@ -675,7 +683,7 @@ Node LeadingAny(Node p) { if (a == null && DelNode(p) && !p.up) a = LeadingAny(p.next); return a; } - + void FindAS(Node p) { // find ANY sets Node a; while (p != null) { @@ -713,11 +721,11 @@ void FindAS(Node p) { // find ANY sets p = p.next; } } - + void CompAnySets() { foreach (Symbol sym in nonterminals) FindAS(sym.graph); } - + public BitArray Expected(Node p, Symbol curSy) { BitArray s = First(p); if (DelGraph(p)) s.Or(curSy.follow); @@ -745,7 +753,7 @@ void CompSync(Node p) { p = p.next; } } - + void CompSyncSets() { allSyncSets = new BitArray(terminals.Count); allSyncSets[eofSy.n] = true; @@ -755,7 +763,7 @@ void CompSyncSets() { CompSync(curSy.graph); } } - + public void SetupAnys() { foreach (Node p in nodes) if (p.typ == Node.any) { @@ -763,7 +771,7 @@ public void SetupAnys() { p.set[eofSy.n] = false; } } - + public void CompDeletableSymbols() { bool changed; do { @@ -776,7 +784,7 @@ public void CompDeletableSymbols() { foreach (Symbol sym in nonterminals) if (sym.deletable) errors.Warning(" " + sym.name + " deletable"); } - + public void RenumberPragmas() { int n = terminals.Count; foreach (Symbol sym in pragmas) sym.n = n++; @@ -805,16 +813,16 @@ public void CompSymbolSets() { trace.WriteLine("-----------------"); foreach (Node p in nodes) if (p.typ == Node.any || p.typ == Node.sync) { - trace.Write("{0,4} {1,4}: ", p.n, nTyp[p.typ]); + trace.Write("Line: {0, 4} Node: {1,4} {2,4}: ", p.line, p.n, nTyp[p.typ]); PrintSet(p.set, 11); } } } - + //--------------------------------------------------------------------- // String handling //--------------------------------------------------------------------- - + char Hex2Char(string s) { int val = 0; for (int i = 0; i < s.Length; i++) { @@ -835,6 +843,11 @@ string Char2Hex(char ch) { return w.ToString(); } + public string Unstring(string s) { + if (s == null || s.Length < 2) return s; + return Unescape(s.Substring(1, s.Length - 2)); + } + public string Unescape (string s) { /* replaces escape sequences in s by their Unicode values. */ StringBuilder buf = new StringBuilder(); @@ -869,6 +882,11 @@ public string Unescape (string s) { return buf.ToString(); } + public string Quoted (string s) { + if (s == null) return "null"; + return string.Concat("\"", Escape(s), "\""); + } + public string Escape (string s) { StringBuilder buf = new StringBuilder(); foreach (char ch in s) { @@ -887,25 +905,35 @@ public string Escape (string s) { } return buf.ToString(); } - + //--------------------------------------------------------------------- // Grammar checks //--------------------------------------------------------------------- - + public bool GrammarOk() { - bool ok = NtsComplete() - && AllNtReached() + bool ok = NtsComplete() + && AllNtReached() && NoCircularProductions() && AllNtToTerm(); - if (ok) { CheckResolvers(); CheckLL1(); } - return ok; + if (ok) { CheckResolvers(); CheckLL1(); } + return ok; + } + + public bool GrammarCheckAll() { + int errors = 0; + if(!NtsComplete()) ++errors; + if(!AllNtReached()) ++errors; + if(!NoCircularProductions()) ++errors; + if(!AllNtToTerm()) ++errors; + CheckResolvers(); CheckLL1(); + return errors == 0; } //--------------- check for circular productions ---------------------- - + class CNode { // node of list for finding circular productions public Symbol left, right; - + public CNode (Symbol l, Symbol r) { left = l; right = r; } @@ -923,7 +951,7 @@ void GetSingles(Node p, ArrayList singles, Node rule) { } if (!p.up && DelNode(p)) GetSingles(p.next, singles, rule); } - + public bool NoCircularProductions() { bool ok, changed, onLeftSide, onRightSide; ArrayList list = new ArrayList(); @@ -953,12 +981,12 @@ public bool NoCircularProductions() { } return ok; } - + //--------------- check for LL(1) errors ---------------------- - + void LL1Error(int cond, Symbol sym) { - string s = " LL1 warning in " + curSy.name + ": "; - if (sym != null) s += sym.name + " is "; + string s = " LL1 warning in " + curSy.name + ":" + curSy.line + ":" + curSy.col + ": "; + if (sym != null) s += sym.name + " is "; switch (cond) { case 1: s += "start of several alternatives"; break; case 2: s += "start & successor of deletable structure"; break; @@ -967,22 +995,89 @@ void LL1Error(int cond, Symbol sym) { } errors.Warning(s); } - - void CheckOverlap(BitArray s1, BitArray s2, int cond) { + + int CheckOverlap(BitArray s1, BitArray s2, int cond) { + int overlaped = 0; foreach (Symbol sym in terminals) { - if (s1[sym.n] && s2[sym.n]) LL1Error(cond, sym); + if (s1[sym.n] && s2[sym.n]) { LL1Error(cond, sym); ++overlaped; } + } + return overlaped; + } + + /* print the path for first set that contains token tok for the graph rooted at p */ + void PrintFirstPath(Node p, int tok, string indent = "\t", int depth = 0) + { + //if(p && p.sym) Console.WriteLine("{0}==> {1}:{2}:{3}: {4}", indent, p.sym.name, p.sym.line, p.sym.col, depth); + //else Console.WriteLine("{0}==> xxx:{0}", indent, depth); + while (p != null) + { + //if(p.sym) Console.WriteLine("{0}----> {1}:{2}:{3}: {4}", indent, p.sym.name, p.sym.line, p.sym.col, depth); + switch (p.typ) + { + case Node.nt: + { + if (p.sym.firstReady) + { + if (p.sym.first[tok]) + { + if (indent.Length == 1) Console.WriteLine("{0}=> {1}:{2}:{3}:", indent, p.sym.name, p.line, p.col); + Console.WriteLine("{0}-> {1}:{2}:{3}:", indent, p.sym.name, p.sym.line, p.sym.col); + if (p.sym.graph != null) PrintFirstPath(p.sym.graph, tok, indent + " ", depth + 1); + return; + } + } + break; + } + case Node.t: + case Node.wt: + { + if (p.sym.n == tok) Console.WriteLine("{0}= {1}:{2}:{3}:", indent, p.sym.name, p.line, p.col); + break; + } + case Node.any: + { + break; + } + case Node.alt: + { + PrintFirstPath(p.sub, tok, indent, depth + 1); + PrintFirstPath(p.down, tok, indent, depth + 1); + break; + } + case Node.iter: + case Node.opt: + { + PrintFirstPath(p.sub, tok, indent, depth + 1); + break; + } + } + if (!DelNode(p)) break; + p = p.next; } } - - void CheckAlts(Node p) { + + int CheckAlts(Node p) { BitArray s1, s2; + int rc = 0; while (p != null) { if (p.typ == Node.alt) { Node q = p; s1 = new BitArray(terminals.Count); while (q != null) { // for all alternatives s2 = Expected0(q.sub, curSy); - CheckOverlap(s1, s2, 1); + int overlaped = CheckOverlap(s1, s2, 1); + if (overlaped > 0) + { + int overlapToken = 0; + /* Find the first overlap token */ + foreach (Symbol sym in terminals) + { + if (s1[sym.n] && s2[sym.n]) { overlapToken = sym.n; break; } + } + //Console.WriteLine("\t-> {0}:{1}: {2}", first_overlap.sub.sym.name, first_overlap.sub.sym.line, overlaped); + PrintFirstPath(p, overlapToken); + rc += overlaped; + } s1.Or(s2); CheckAlts(q.sub); q = q.down; @@ -992,7 +1087,19 @@ void CheckAlts(Node p) { else { s1 = Expected0(p.sub, curSy); s2 = Expected(p.next, curSy); - CheckOverlap(s1, s2, 2); + int overlaped = CheckOverlap(s1, s2, 2); + if (overlaped > 0) + { + int overlapToken = 0; + /* Find the first overlap token */ + foreach (Symbol sym in terminals) + { + if (s1[sym.n] && s2[sym.n]) { overlapToken = sym.n; break; } + } + //Console.WriteLine(format("\t=>:{0}: {1}", p.line, overlaped)); + PrintFirstPath(p, overlapToken); + rc += overlaped; + } } CheckAlts(p.sub); } else if (p.typ == Node.any) { @@ -1002,6 +1109,7 @@ void CheckAlts(Node p) { if (p.up) break; p = p.next; } + return rc; } public void CheckLL1() { @@ -1010,13 +1118,13 @@ public void CheckLL1() { CheckAlts(curSy.graph); } } - + //------------- check if resolvers are legal -------------------- - + void ResErr(Node p, string msg) { errors.Warning(p.line, p.pos.col, msg); } - + void CheckRes(Node p, bool rslvAllowed) { while (p != null) { switch (p.typ) { @@ -1041,7 +1149,7 @@ void CheckRes(Node p, bool rslvAllowed) { if (p.sub.typ == Node.rslv) { BitArray fs = First(p.sub.next); BitArray fsNext = Expected(p.next, curSy); - if (!Sets.Intersect(fs, fsNext)) + if (!Sets.Intersect(fs, fsNext)) ResErr(p.sub, "Warning: Misplaced resolver: no LL(1) conflict."); } CheckRes(p.sub, true); @@ -1056,7 +1164,7 @@ void CheckRes(Node p, bool rslvAllowed) { rslvAllowed = false; } } - + public void CheckResolvers() { foreach (Symbol sym in nonterminals) { curSy = sym; @@ -1065,7 +1173,7 @@ public void CheckResolvers() { } //------------- check if every nts has a production -------------------- - + public bool NtsComplete() { bool complete = true; foreach (Symbol sym in nonterminals) { @@ -1076,9 +1184,9 @@ public bool NtsComplete() { } return complete; } - + //-------------- check if every nts can be reached ----------------- - + void MarkReachedNts(Node p) { while (p != null) { if (p.typ == Node.nt && !visited[p.sym.n]) { // new nt reached @@ -1092,7 +1200,7 @@ void MarkReachedNts(Node p) { p = p.next; } } - + public bool AllNtReached() { bool ok = true; visited = new BitArray(nonterminals.Count); @@ -1106,20 +1214,20 @@ public bool AllNtReached() { } return ok; } - + //--------- check if every nts can be derived to terminals ------------ - + bool IsTerm(Node p, BitArray mark) { // true if graph can be derived to terminals while (p != null) { if (p.typ == Node.nt && !mark[p.sym.n]) return false; - if (p.typ == Node.alt && !IsTerm(p.sub, mark) + if (p.typ == Node.alt && !IsTerm(p.sub, mark) && (p.down == null || !IsTerm(p.down, mark))) return false; if (p.up) break; p = p.next; } return true; } - + public bool AllNtToTerm() { bool changed, ok = true; BitArray mark = new BitArray(nonterminals.Count); @@ -1138,11 +1246,11 @@ public bool AllNtToTerm() { } return ok; } - + //--------------------------------------------------------------------- // Cross reference list //--------------------------------------------------------------------- - + public void XRef() { SortedList xref = new SortedList(new SymbolComp()); // collect lines where symbols have been defined @@ -1178,7 +1286,7 @@ public void XRef() { } trace.WriteLine(); trace.WriteLine(); } - + public void SetDDT(string s) { s = s.ToUpper(); foreach (char ch in s) { diff --git a/src/build.sh b/src/build.sh new file mode 100644 index 0000000..e621774 --- /dev/null +++ b/src/build.sh @@ -0,0 +1 @@ +csc /out:Coco.exe /t:exe Coco.cs Scanner.cs Tab.cs DFA.cs ParserGen.cs Parser.cs \ No newline at end of file From 248edf7d84ef3f415c9239969b6f2d02d33dc96d Mon Sep 17 00:00:00 2001 From: mingodad Date: Mon, 7 Jun 2021 21:08:41 +0200 Subject: [PATCH 04/21] Add info between comments to StatOf calls on generated parser --- src/Parser.cs | 61 ++++++++++++++++++++++++++++++++---------------- src/ParserGen.cs | 9 ++++++- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/src/Parser.cs b/src/Parser.cs index 77370a8..894dc4d 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -35,6 +35,27 @@ namespace at.jku.ssw.Coco { public class Parser { + //non terminals + public const int _NT_Coco = 0; + public const int _NT_SetDecl = 1; + public const int _NT_TokenDecl = 2; + public const int _NT_TokenExpr = 3; + public const int _NT_Set = 4; + public const int _NT_AttrDecl = 5; + public const int _NT_SemText = 6; + public const int _NT_Expression = 7; + public const int _NT_SimSet = 8; + public const int _NT_Char = 9; + public const int _NT_Sym = 10; + public const int _NT_Term = 11; + public const int _NT_Resolver = 12; + public const int _NT_Factor = 13; + public const int _NT_Attribs = 14; + public const int _NT_Condition = 15; + public const int _NT_TokenTerm = 16; + public const int _NT_TokenFactor = 17; + public const int maxNT = 17; + //terminals public const int _EOF = 0; public const int _ident = 1; public const int _number = 2; @@ -139,10 +160,10 @@ bool WeakSeparator(int n, int syFol, int repFol) { void Coco() { Symbol sym; Graph g, g1, g2; CharSet s; int beg, line; - if (StartOf(1)) { + if (StartOf(1 /* any */)) { Get(); beg = t.pos; line = t.line; - while (StartOf(1)) { + while (StartOf(1 /* any */)) { Get(); } pgen.usingPos = new Position(beg, la.pos, 0, line); @@ -154,7 +175,7 @@ void Coco() { gramName = t.val; beg = la.pos; line = la.line; - while (StartOf(2)) { + while (StartOf(2 /* any */)) { Get(); } tab.semDeclPos = new Position(beg, la.pos, 0, line); @@ -315,7 +336,7 @@ void TokenDecl(int typ) { } tokenString = null; - while (!(StartOf(5))) {SynErr(44); Get();} + while (!(StartOf(5 /* sync */))) {SynErr(44); Get();} if (la.kind == 18 /* "=" */) { Get(); TokenExpr(out g); @@ -331,7 +352,7 @@ void TokenDecl(int typ) { dfa.MatchLiteral(tokenString, sym); } - } else if (StartOf(6)) { + } else if (StartOf(6 /* sem */)) { if (kind == id) genScanner = false; else dfa.MatchLiteral(sym.name, sym); @@ -374,8 +395,8 @@ void AttrDecl(Symbol sym) { if (la.kind == 25 /* "<" */) { Get(); int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(9)) { - if (StartOf(10)) { + while (StartOf(9 /* alt */)) { + if (StartOf(10 /* any */)) { Get(); } else { Get(); @@ -388,8 +409,8 @@ void AttrDecl(Symbol sym) { } else if (la.kind == 27 /* "<." */) { Get(); int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(11)) { - if (StartOf(12)) { + while (StartOf(11 /* alt */)) { + if (StartOf(12 /* any */)) { Get(); } else { Get(); @@ -405,8 +426,8 @@ void AttrDecl(Symbol sym) { void SemText(out Position pos) { Expect(40 /* "(." */); int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(13)) { - if (StartOf(14)) { + while (StartOf(13 /* alt */)) { + if (StartOf(14 /* any */)) { Get(); } else if (la.kind == _badString) { Get(); @@ -491,7 +512,7 @@ void Sym(out string name, out int kind) { void Term(out Graph g) { Graph g2; Node rslv = null; g = null; - if (StartOf(17)) { + if (StartOf(17 /* opt */)) { if (la.kind == 38 /* "IF" */) { rslv = tab.NewNode(Node.rslv, null, la.line, la.col); Resolver(out rslv.pos); @@ -501,11 +522,11 @@ void Term(out Graph g) { if (rslv != null) tab.MakeSequence(g, g2); else g = g2; - while (StartOf(18)) { + while (StartOf(18 /* nt Factor */)) { Factor(out g2); tab.MakeSequence(g, g2); } - } else if (StartOf(19)) { + } else if (StartOf(19 /* sem */)) { g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); } else SynErr(49); if (g == null) // invalid start of Term @@ -620,8 +641,8 @@ void Attribs(Node p) { if (la.kind == 25 /* "<" */) { Get(); int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(9)) { - if (StartOf(10)) { + while (StartOf(9 /* alt */)) { + if (StartOf(10 /* any */)) { Get(); } else { Get(); @@ -633,8 +654,8 @@ void Attribs(Node p) { } else if (la.kind == 27 /* "<." */) { Get(); int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(11)) { - if (StartOf(12)) { + while (StartOf(11 /* alt */)) { + if (StartOf(12 /* any */)) { Get(); } else { Get(); @@ -647,7 +668,7 @@ void Attribs(Node p) { } void Condition() { - while (StartOf(20)) { + while (StartOf(20 /* alt */)) { if (la.kind == 31 /* "(" */) { Get(); Condition(); @@ -661,7 +682,7 @@ void Condition() { void TokenTerm(out Graph g) { Graph g2; TokenFactor(out g); - while (StartOf(7)) { + while (StartOf(7 /* nt TokenFactor */)) { TokenFactor(out g2); tab.MakeSequence(g, g2); } diff --git a/src/ParserGen.cs b/src/ParserGen.cs index 62e03c5..5fd19e1 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -164,7 +164,8 @@ void GenCond (BitArray s, Node p) { } } else - gen.Write("StartOf({0})", NewCondSet(s)); + gen.Write("StartOf({0} /* {1} {2} */)", NewCondSet(s), + tab.nTyp[p.typ], p.typ == Node.nt ? p.sym.name : ""); } } @@ -312,6 +313,12 @@ void GenCode (Node p, int indent, BitArray isChecked) { } void GenTokens() { + gen.WriteLine("\t//non terminals"); + foreach (Symbol sym in tab.nonterminals) { + gen.WriteLine("\tpublic const int _NT_{0} = {1};", sym.name, sym.n); + } + gen.WriteLine("\tpublic const int maxNT = {0};", tab.nonterminals.Count-1); + gen.WriteLine("\t//terminals"); foreach (Symbol sym in tab.terminals) { if (Char.IsLetter(sym.name[0])) gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n); From 6c1109b1d723a6238b922b310421353807e4d691 Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 8 Jun 2021 09:13:11 +0200 Subject: [PATCH 05/21] Replace recursion by iteration --- src/DFA.cs | 7 ++++--- src/Scanner.cs | 12 ++++++++---- src/Scanner.frame | 8 ++++++-- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/DFA.cs b/src/DFA.cs index 66174df..60679df 100644 --- a/src/DFA.cs +++ b/src/DFA.cs @@ -1041,11 +1041,11 @@ public void WriteScanner() { } g.CopyFramePart("-->literals"); GenLiterals(); g.CopyFramePart("-->scan1"); - gen.Write("\t\t\t"); + gen.Write("\t\t\t\t"); if (tab.ignored.Elements() > 0) { PutRange(tab.ignored); } else { gen.Write("false"); } g.CopyFramePart("-->scan2"); if (firstComment != null) { - gen.Write("\t\tif ("); + gen.Write("\t\t\tif ("); com = firstComment; comIdx = 0; while (com != null) { gen.Write(ChCond(com.start[0])); @@ -1053,8 +1053,9 @@ public void WriteScanner() { if (com.next != null) gen.Write(" ||"); com = com.next; comIdx++; } - gen.Write(") return NextToken();"); + gen.Write(") continue;"); } + g.CopyFramePart("-->scan22"); if (hasCtxMoves) { gen.WriteLine(); gen.Write("\t\tint apx = 0;"); } /* pdt */ g.CopyFramePart("-->scan3"); for (State state = firstState.next; state != null; state = state.next) diff --git a/src/Scanner.cs b/src/Scanner.cs index 33f0e1d..7fa11c0 100644 --- a/src/Scanner.cs +++ b/src/Scanner.cs @@ -405,10 +405,14 @@ void CheckLiteral() { } Token NextToken() { - while (ch == ' ' || - ch >= 9 && ch <= 10 || ch == 13 - ) NextCh(); - if (ch == '/' && Comment0() ||ch == '/' && Comment1()) return NextToken(); + for(;;) { + while (ch == ' ' || + ch >= 9 && ch <= 10 || ch == 13 + ) NextCh(); + if (ch == '/' && Comment0() ||ch == '/' && Comment1()) continue; + break; + } + int recKind = noSym; int recEnd = pos; t = new Token(); diff --git a/src/Scanner.frame b/src/Scanner.frame index eb9b177..6780f2c 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -318,10 +318,14 @@ public class Scanner { } Token NextToken() { - while (ch == ' ' || + for(;;) { + while (ch == ' ' || -->scan1 - ) NextCh(); + ) NextCh(); -->scan2 + break; + } +-->scan22 int recKind = noSym; int recEnd = pos; t = new Token(); From ffe1d0edd0a0ac8566cdcb9a43cfa563c316f70b Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 8 Jun 2021 09:24:28 +0200 Subject: [PATCH 06/21] Allow till 8 characters for comment delimiters --- src/DFA.cs | 44 ++++++++++++++++++++++++++++++++------------ src/Scanner.cs | 2 +- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/DFA.cs b/src/DFA.cs index 60679df..82f38e5 100644 --- a/src/DFA.cs +++ b/src/DFA.cs @@ -851,8 +851,8 @@ string CommentStr(Node p) { } else parser.SemErr("comment delimiters may not be structured"); p = p.next; } - if (s.Length == 0 || s.Length > 2) { - parser.SemErr("comment delimiters must be 1 or 2 characters long"); + if (s.Length == 0 || s.Length > 8) { + parser.SemErr("comment delimiters must be between 1 to 8 characters long"); s = new StringBuilder("?"); } return s.ToString(); @@ -866,7 +866,13 @@ public void NewComment(Node from, Node to, bool nested) { //------------------------ scanner generation ---------------------- + void GenCommentIndented(int n, string s) { + for(int i= 1; i < n; ++i) gen.Write("\t"); + gen.Write(s); + } + void GenComBody(Comment com) { + int imax = com.start.Length-1; gen.WriteLine( "\t\t\tfor(;;) {"); gen.Write ( "\t\t\t\tif ({0}) ", ChCond(com.stop[0])); gen.WriteLine("{"); if (com.stop.Length == 1) { @@ -874,22 +880,31 @@ void GenComBody(Comment com) { gen.WriteLine("\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); gen.WriteLine("\t\t\t\t\tNextCh();"); } else { - gen.WriteLine("\t\t\t\t\tNextCh();"); - gen.WriteLine("\t\t\t\t\tif ({0}) {{", ChCond(com.stop[1])); + for(int sidx = 1; sidx <= imax; ++sidx) { + gen.WriteLine("\t\t\t\t\tNextCh();"); + gen.WriteLine("\t\t\t\t\tif ({0}) {{", ChCond(com.stop[sidx])); + } gen.WriteLine("\t\t\t\t\t\tlevel--;"); - gen.WriteLine("\t\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); + gen.WriteLine("\t\t\t\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }"); gen.WriteLine("\t\t\t\t\t\tNextCh();"); - gen.WriteLine("\t\t\t\t\t}"); + for(int sidx = imax; sidx > 0; --sidx) { + gen.WriteLine("\t\t\t\t\t}"); + } } if (com.nested) { gen.Write ("\t\t\t\t}"); gen.Write(" else if ({0}) ", ChCond(com.start[0])); gen.WriteLine("{"); if (com.start.Length == 1) gen.WriteLine("\t\t\t\t\tlevel++; NextCh();"); else { - gen.WriteLine("\t\t\t\t\tNextCh();"); - gen.Write ("\t\t\t\t\tif ({0}) ", ChCond(com.start[1])); gen.WriteLine("{"); + int imaxN = com.start.Length-1; + for(int sidx = 1; sidx <= imaxN; ++sidx) { + gen.WriteLine("\t\t\t\t\tNextCh();"); + gen.Write ("\t\t\t\t\tif ({0}) ", ChCond(com.start[sidx])); gen.WriteLine("{"); + } gen.WriteLine("\t\t\t\t\t\tlevel++; NextCh();"); - gen.WriteLine("\t\t\t\t\t}"); + for(int sidx = imaxN; sidx > 0; --sidx) { + gen.WriteLine("\t\t\t\t\t}"); + } } } gen.WriteLine( "\t\t\t\t} else if (ch == Buffer.EOF) return false;"); @@ -905,10 +920,15 @@ void GenComment(Comment com, int i) { if (com.start.Length == 1) { GenComBody(com); } else { - gen.Write ("\t\tif ({0}) ", ChCond(com.start[1])); gen.WriteLine("{"); - gen.WriteLine("\t\t\tNextCh();"); + int imax = com.start.Length-1; + for(int sidx = 1; sidx <= imax; ++sidx) { + gen.Write ("\t\tif ({0}) ", ChCond(com.start[sidx])); gen.WriteLine("{"); + gen.WriteLine("\t\t\tNextCh();"); + } GenComBody(com); - gen.WriteLine("\t\t}"); + for(int sidx = imax; sidx > 0; --sidx) { + gen.WriteLine("\t\t}"); + } gen.WriteLine("\t\tbuffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0;"); gen.WriteLine("\t\treturn false;"); } diff --git a/src/Scanner.cs b/src/Scanner.cs index 7fa11c0..b5880b1 100644 --- a/src/Scanner.cs +++ b/src/Scanner.cs @@ -363,7 +363,7 @@ bool Comment1() { NextCh(); if (ch == '/') { level--; - if (level == 0) { oldEols = line - line0; NextCh(); return true; } + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } NextCh(); } } else if (ch == '/') { From 6b8bb326cebcf31f31f7136f6e60f505e57f790c Mon Sep 17 00:00:00 2001 From: mingodad Date: Tue, 8 Jun 2021 13:24:54 +0200 Subject: [PATCH 07/21] Implemented the generation of an EBNF grammar understood by https://www.bottlecaps.de/rr/ui to generate railroad diagrams --- src/Coco.atg | 3 ++ src/Coco.cs | 6 ++-- src/Parser.cs | 3 ++ src/ParserGen.cs | 84 ++++++++++++++++++++++++++++++++++++++++++++++++ src/Tab.cs | 1 + 5 files changed, 95 insertions(+), 2 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 6ea58d0..864000c 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -172,6 +172,9 @@ Coco (. Symbol sym; Graph g, g1, g2; CharSet s; int b tab.GrammarCheckAll(); } else doGenCode = tab.GrammarOk(); + if(tab.genRREBNF && doGenCode) { + pgen.WriteRREBNF(); + } if (doGenCode) { Console.Write("parser"); pgen.WriteParser(); diff --git a/src/Coco.cs b/src/Coco.cs index 438b8c3..5e71c01 100644 --- a/src/Coco.cs +++ b/src/Coco.cs @@ -51,7 +51,7 @@ public static int Main (string[] arg) { Console.WriteLine("Coco/R (Apr 19, 2011)"); string srcName = null, nsName = null, frameDir = null, ddtString = null, traceFileName = null, outDir = null; - bool emitLines = false, ignoreErrors = false, genAST = false; + bool emitLines = false, ignoreErrors = false, genAST = false, genRREBNF = false; int retVal = 1; for (int i = 0; i < arg.Length; i++) { if (arg[i] == "-namespace" && i < arg.Length - 1) nsName = arg[++i].Trim(); @@ -59,6 +59,7 @@ public static int Main (string[] arg) { else if (arg[i] == "-trace" && i < arg.Length - 1) ddtString = arg[++i].Trim(); else if (arg[i] == "-o" && i < arg.Length - 1) outDir = arg[++i].Trim(); else if (arg[i] == "-lines") emitLines = true; + else if (arg[i] == "-genRREBNF") genRREBNF = true; else if (arg[i] == "-genAST") genAST = true; else if (arg[i] == "-ignoreErrors") ignoreErrors = true; else srcName = arg[i]; @@ -84,6 +85,7 @@ public static int Main (string[] arg) { parser.tab.frameDir = frameDir; parser.tab.outDir = (outDir != null) ? outDir : srcDir; parser.tab.emitLines = emitLines; + parser.tab.genRREBNF = genRREBNF; if (ddtString != null) parser.tab.SetDDT(ddtString); parser.Parse(); @@ -107,7 +109,7 @@ public static int Main (string[] arg) { + " -trace {0}" + " -o {0}" + " -lines{0}" - + " -genAST" + + " -genRREBNF" + " -ignoreErrors ignore grammar errors for developing purposes" + "Valid characters in the trace string:{0}" + " A trace automaton{0}" diff --git a/src/Parser.cs b/src/Parser.cs index 894dc4d..0350c23 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -293,6 +293,9 @@ void Coco() { tab.GrammarCheckAll(); } else doGenCode = tab.GrammarOk(); + if(tab.genRREBNF && doGenCode) { + pgen.WriteRREBNF(); + } if (doGenCode) { Console.Write("parser"); pgen.WriteParser(); diff --git a/src/ParserGen.cs b/src/ParserGen.cs index 5fd19e1..434e04a 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -404,6 +404,90 @@ public void WriteParser () { buffer.Pos = oldPos; } + public int GenCodeRREBNF (Node p) { + int rc = 0; + Node p2; + while (p != null) { + switch (p.typ) { + case Node.nt: + case Node.t: { + gen.Write(p.sym.name); + gen.Write(" "); + ++rc; + break; + } + case Node.wt: { + break; + } + case Node.any: { + break; + } + case Node.eps: break; // nothing + case Node.rslv: break; // nothing + case Node.sem: { + break; + } + case Node.sync: { + break; + } + case Node.alt: { + gen.Write("( "); + p2 = p; + while (p2 != null) { + rc += GenCodeRREBNF(p2.sub); + p2 = p2.down; + if(p2 != null) gen.Write("| "); + } + gen.Write(") "); + break; + } + case Node.iter: { + gen.Write("( "); + rc += GenCodeRREBNF(p.sub); + gen.Write(")* "); + break; + } + case Node.opt: + gen.Write("( "); + rc += GenCodeRREBNF(p.sub); + gen.Write(")? "); + break; + } + if (p.up) break; + p = p.next; + } + return rc; + } + + public void WriteRREBNF () { + Generator g = new Generator(tab); + gen = g.OpenGen("Parser.ebnf"); + + gen.Write("//\n// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui\n//\n"); + gen.Write("\n//\n// productions\n//\n\n"); + foreach (Symbol sym in tab.nonterminals) { + gen.Write("{0} ::= ", sym.name); + if(GenCodeRREBNF(sym.graph) == 0) { + gen.Write("\"??()??\""); + } + gen.Write("\n"); + } + gen.Write("\n//\n// tokens\n//\n\n"); + foreach (Symbol sym in tab.terminals) { + if (Char.IsLetter(sym.name[0])) { // real name value is stored in Tab.literals + foreach (DictionaryEntry e in tab.literals) { + if ((Symbol)e.Value == sym) { + gen.Write("{0} ::= {1}\n", sym.name, (string)e.Key); + break; + } + } + } else { + //gen.Write("{0} /* {1} */", sym.n, sym.name); + } + } + gen.Close(); + } + public void WriteStatistics () { trace.WriteLine(); trace.WriteLine("{0} terminals", tab.terminals.Count); diff --git a/src/Tab.cs b/src/Tab.cs index 34abc45..636d8be 100644 --- a/src/Tab.cs +++ b/src/Tab.cs @@ -205,6 +205,7 @@ public class Tab { public Position semDeclPos; // position of global semantic declarations public CharSet ignored; // characters ignored by the scanner public bool genAST = false; // generate parser tree generation code + public bool genRREBNF = false; //generate EBNF for railroad diagram public bool ignoreErrors = false; // ignore grammar errors for developing purposes public bool[] ddt = new bool[10]; // debug and test switches public Symbol gramSy; // root nonterminal; filled by ATG From b87a449e0ce0d807457dcce9085998f2d7e09901 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 14:06:27 +0200 Subject: [PATCH 08/21] Allow semantic action that will run on the Scanner for token declarations --- src/Coco.atg | 2 +- src/DFA.cs | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 864000c..482ea08 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -273,7 +273,7 @@ TokenDecl (. string name; int kind; Symbol sym; Graph g; . else dfa.MatchLiteral(sym.name, sym); .) ) - [ SemText (. if (typ != Node.pr) SemErr("semantic action not allowed here"); .) + [ SemText (. if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); .) //(. if (typ != Node.pr) SemErr("semantic action not allowed here"); .) ] . diff --git a/src/DFA.cs b/src/DFA.cs index 82f38e5..d7a5cd2 100644 --- a/src/DFA.cs +++ b/src/DFA.cs @@ -304,7 +304,7 @@ public void Fill() { //----------------------------------------------------------------------------- // Generator //----------------------------------------------------------------------------- -class Generator { +public class Generator { private const int EOF = -1; private FileStream fram; @@ -962,7 +962,7 @@ void GenLiterals () { gen.WriteLine("\t\t\tdefault: break;"); gen.Write("\t\t}"); } - + void WriteState(State state) { Symbol endOf = state.endOf; gen.WriteLine("\t\t\tcase {0}:", state.nr); @@ -1000,6 +1000,11 @@ void WriteState(State state) { if (endOf.tokenKind == Symbol.classLitToken) { gen.WriteLine("t.val = new String(tval, 0, tlen); CheckLiteral(); return t;}"); } else { + if(endOf.semPos != null && endOf.typ == Node.t) { + gen.Write(" {"); + parser.pgen.CopySourcePart(parser, gen, endOf.semPos, 0); + gen.Write("};"); + } gen.WriteLine("break;}"); } } From cf714777ad0c68ba16d2a26a8126ddb81a173443 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 14:08:06 +0200 Subject: [PATCH 09/21] Implement basic parser tree generation functionality --- src/Coco.cs | 2 + src/Parser.cs | 120 ++++++++++++++++++++++++++++++++++++++++++++++- src/Parser.frame | 81 ++++++++++++++++++++++++++++++++ src/ParserGen.cs | 41 +++++++++++++++- 4 files changed, 242 insertions(+), 2 deletions(-) diff --git a/src/Coco.cs b/src/Coco.cs index 5e71c01..5f0d929 100644 --- a/src/Coco.cs +++ b/src/Coco.cs @@ -86,6 +86,7 @@ public static int Main (string[] arg) { parser.tab.outDir = (outDir != null) ? outDir : srcDir; parser.tab.emitLines = emitLines; parser.tab.genRREBNF = genRREBNF; + parser.tab.genAST = genAST; if (ddtString != null) parser.tab.SetDDT(ddtString); parser.Parse(); @@ -110,6 +111,7 @@ public static int Main (string[] arg) { + " -o {0}" + " -lines{0}" + " -genRREBNF" + + " -genAST" + " -ignoreErrors ignore grammar errors for developing purposes" + "Valid characters in the trace string:{0}" + " A trace automaton{0}" diff --git a/src/Parser.cs b/src/Parser.cs index 0350c23..75f921b 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -29,11 +29,66 @@ used as a plugin in non-free software. using System; +using System.Collections; namespace at.jku.ssw.Coco { +#if PARSER_WITH_AST +public class SynTree { + public SynTree(Token t ) { + tok = t; + children = new ArrayList(); + } + + public Token tok; + public ArrayList children; + + static void printIndent(int n) { + for(int i=0; i < n; ++i) Console.Write(" "); + } + + public void dump(int indent=0, bool isLast=false) { + int last_idx = children.Count; + if(tok.col > 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); + } + else { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump(indent+4, idx == last_idx); + } + } + + public void dump2(int maxT, int indent=0, bool isLast=false) { + int last_idx = children.Count; + if(tok.col > 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); + } + else { + if(last_idx == 1) { + if(((SynTree)children[0]).tok.kind < maxT) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + } + else { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump2(maxT, indent+4, idx == last_idx); + } + } +}; +#endif + public class Parser { //non terminals public const int _NT_Coco = 0; @@ -62,6 +117,43 @@ public class Parser { public const int _string = 3; public const int _badString = 4; public const int _char = 5; +// public const int _("COMPILER") = 6; +// public const int _("IGNORECASE") = 7; +// public const int _("TERMINALS") = 8; +// public const int _("CHARACTERS") = 9; +// public const int _("TOKENS") = 10; +// public const int _("PRAGMAS") = 11; +// public const int _("COMMENTS") = 12; +// public const int _("FROM") = 13; +// public const int _("TO") = 14; +// public const int _("NESTED") = 15; +// public const int _("IGNORE") = 16; +// public const int _("PRODUCTIONS") = 17; +// public const int _("=") = 18; +// public const int _(".") = 19; +// public const int _("END") = 20; +// public const int _("+") = 21; +// public const int _("-") = 22; +// public const int _("..") = 23; +// public const int _("ANY") = 24; +// public const int _("<") = 25; +// public const int _(">") = 26; +// public const int _("<.") = 27; +// public const int _(".>") = 28; +// public const int _("|") = 29; +// public const int _("WEAK") = 30; +// public const int _("(") = 31; +// public const int _(")") = 32; +// public const int _("[") = 33; +// public const int _("]") = 34; +// public const int _("{") = 35; +// public const int _("}") = 36; +// public const int _("SYNC") = 37; +// public const int _("IF") = 38; +// public const int _("CONTEXT") = 39; +// public const int _("(.") = 40; +// public const int _(".)") = 41; +// public const int _(???) = 42; public const int maxT = 42; public const int _ddtSym = 43; public const int _optionSym = 44; @@ -362,7 +454,7 @@ void TokenDecl(int typ) { } else SynErr(45); if (la.kind == 40 /* "(." */) { SemText(out sym.semPos); - if (typ != Node.pr) SemErr("semantic action not allowed here"); + if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); } } @@ -773,6 +865,32 @@ public void Parse() { {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x} }; + +#if PARSER_WITH_AST + public SynTree ast_root; + Stack ast_stack; + + void AstAddTerminal() { + SynTree st = new SynTree( t ); + ((SynTree)(ast_stack.Peek())).children.Add(st); + } + + bool AstAddNonTerminal(int kind, string nt_name, int line) { + Token ntTok = new Token(); + ntTok.kind = kind; + ntTok.line = line; + ntTok.val = nt_name; + SynTree st = new SynTree( ntTok ); + ((SynTree)(ast_stack.Peek())).children.Add(st); + ast_stack.Push(st); + return true; + } + + void AstPopNonTerminal() { + ast_stack.Pop(); + } +#endif + } // end Parser diff --git a/src/Parser.frame b/src/Parser.frame index e8932d3..0424b64 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -26,9 +26,64 @@ Coco/R itself) does not fall under the GNU General Public License. ----------------------------------------------------------------------*/ -->begin using System; +using System.Collections; -->namespace +#if PARSER_WITH_AST +public class SynTree { + public SynTree(Token t ) { + tok = t; + children = new ArrayList(); + } + + public Token tok; + public ArrayList children; + + static void printIndent(int n) { + for(int i=0; i < n; ++i) Console.Write(" "); + } + + public void dump(int indent=0, bool isLast=false) { + int last_idx = children.Count; + if(tok.col > 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); + } + else { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump(indent+4, idx == last_idx); + } + } + + public void dump2(int maxT, int indent=0, bool isLast=false) { + int last_idx = children.Count; + if(tok.col > 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); + } + else { + if(last_idx == 1) { + if(((SynTree)children[0]).tok.kind < maxT) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + } + else { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump2(maxT, indent+4, idx == last_idx); + } + } +}; +#endif + public class Parser { -->constants const bool _T = true; @@ -113,6 +168,32 @@ public class Parser { static readonly bool[,] set = { -->initialization }; + +#if PARSER_WITH_AST + public SynTree ast_root; + Stack ast_stack; + + void AstAddTerminal() { + SynTree st = new SynTree( t ); + ((SynTree)(ast_stack.Peek())).children.Add(st); + } + + bool AstAddNonTerminal(int kind, string nt_name, int line) { + Token ntTok = new Token(); + ntTok.kind = kind; + ntTok.line = line; + ntTok.val = nt_name; + SynTree st = new SynTree( ntTok ); + ((SynTree)(ast_stack.Peek())).children.Add(st); + ast_stack.Push(st); + return true; + } + + void AstPopNonTerminal() { + ast_stack.Pop(); + } +#endif + } // end Parser diff --git a/src/ParserGen.cs b/src/ParserGen.cs index 434e04a..5955064 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -128,6 +128,17 @@ void CopySourcePart (Position pos, int indent) { } } + /* TODO better interface for CopySourcePart */ + public void CopySourcePart (Parser parser, StreamWriter gen, Position pos, int indent) { + // Copy text described by pos from atg to gen + int oldPos = parser.pgen.buffer.Pos; // Pos is modified by CopySourcePart + StreamWriter prevGen = parser.pgen.gen; + parser.pgen.gen = gen; + parser.pgen.CopySourcePart(pos, 0); + parser.pgen.gen = prevGen; + parser.pgen.buffer.Pos = oldPos; + } + void GenErrorMsg (int errTyp, Symbol sym) { errorNr++; err.Write("\t\t\tcase " + errorNr + ": s = \""); @@ -193,11 +204,23 @@ void GenCode (Node p, int indent, BitArray isChecked) { case Node.t: { Indent(indent); // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n - if (isChecked[p.sym.n]) gen.WriteLine("Get();"); + if (isChecked[p.sym.n]) { + gen.WriteLine("Get();"); + if(tab.genAST) { + gen.WriteLine("#if PARSER_WITH_AST"); + gen.WriteLine("\tAstAddTerminal();"); + gen.WriteLine("#endif"); + } + } else { gen.Write("Expect("); WriteSymbolOrCode(p.sym); gen.WriteLine(");"); + if(tab.genAST) { + gen.WriteLine("#if PARSER_WITH_AST"); + gen.WriteLine("\tAstAddTerminal();"); + gen.WriteLine("#endif"); + } } break; } @@ -322,6 +345,8 @@ void GenTokens() { foreach (Symbol sym in tab.terminals) { if (Char.IsLetter(sym.name[0])) gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n); + else + gen.WriteLine("//\tpublic const int _({0}) = {1};", sym.name, sym.n); } } @@ -342,14 +367,28 @@ void GenCodePragmas() { } void GenProductions() { + int idx = 0; foreach (Symbol sym in tab.nonterminals) { curSy = sym; gen.Write("\tvoid {0}(", sym.name); CopySourcePart(sym.attrPos, 0); gen.WriteLine(") {"); CopySourcePart(sym.semPos, 2); + if(tab.genAST) { + gen.WriteLine("#if PARSER_WITH_AST"); + if(idx == 0) gen.WriteLine("\tToken rt = new Token(); rt.kind = _NT_{0}; rt.val = \"{0}\";ast_root = new SynTree( rt ); ast_stack = new Stack(); ast_stack.Push(ast_root);", sym.name); + else gen.WriteLine("\tbool ntAdded = AstAddNonTerminal(_NT_{0}, \"{0}\", la.line);", sym.name); + gen.WriteLine("#endif"); + } GenCode(sym.graph, 2, new BitArray(tab.terminals.Count)); + if(tab.genAST) { + gen.WriteLine("#if PARSER_WITH_AST"); + if(idx == 0) gen.WriteLine("\tAstPopNonTerminal();"); + else gen.WriteLine("\tif(ntAdded) AstPopNonTerminal();"); + gen.WriteLine("#endif"); + } gen.WriteLine("\t}"); gen.WriteLine(); + ++idx; } } From 98c227c1d69ad1c287d9c055e4c1399737445372 Mon Sep 17 00:00:00 2001 From: mingodad Date: Wed, 9 Jun 2021 16:27:17 +0200 Subject: [PATCH 10/21] Add 'ANY' when generating RREBNF --- src/ParserGen.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ParserGen.cs b/src/ParserGen.cs index 5955064..a575e55 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -459,6 +459,7 @@ public int GenCodeRREBNF (Node p) { break; } case Node.any: { + gen.Write("ANY "); break; } case Node.eps: break; // nothing From bf901e7ea795f9ec0d9a758f70ce81c4befb0143 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 10 Jun 2021 09:00:26 +0200 Subject: [PATCH 11/21] Reorganize the code removing duplication --- src/ParserGen.cs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/ParserGen.cs b/src/ParserGen.cs index a575e55..437d903 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -206,21 +206,16 @@ void GenCode (Node p, int indent, BitArray isChecked) { // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n if (isChecked[p.sym.n]) { gen.WriteLine("Get();"); - if(tab.genAST) { - gen.WriteLine("#if PARSER_WITH_AST"); - gen.WriteLine("\tAstAddTerminal();"); - gen.WriteLine("#endif"); - } } else { gen.Write("Expect("); WriteSymbolOrCode(p.sym); gen.WriteLine(");"); - if(tab.genAST) { - gen.WriteLine("#if PARSER_WITH_AST"); - gen.WriteLine("\tAstAddTerminal();"); - gen.WriteLine("#endif"); - } + } + if(tab.genAST) { + gen.WriteLine("#if PARSER_WITH_AST"); + gen.WriteLine("\tAstAddTerminal();"); + gen.WriteLine("#endif"); } break; } From 65735a1676fb4b5b03bf661c6cef653ab0c59ddd Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 16:46:19 +0200 Subject: [PATCH 12/21] Add an overview of my main changes --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index 8071639..5b11480 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,23 @@ Coco/R is a compiler generator, which takes an attributed grammar of a source language and generates a scanner and a parser for this language. The scanner works as a deterministic finite automaton. The parser uses recursive descent. LL(1) conflicts can be resolved by a multi-symbol lookahead or by semantic checks. Thus the class of accepted grammars is LL(k) for an arbitrary k. http://ssw.jku.at/coco/ + +And this are my main modifications to the original: + +- Enhance left recursion detection + +- Allow semantic actions on `token declaration` similar to `pragmas` but the code executes on the Scanner + +- Allow till 8 characters as comment delimiters + +- Add option `-genRREBNF` to generate an EBNF grammar to crate railroad diagrams at https://www.bottlecaps.de/rr/ui + +- Add option `-geAST` to generate code to generate `parser syntax tree` based on https://github.com/rochus-keller/EbnfStudio + +- Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals + +- Add a `TERMINAS` section to generate user define tokens not managed by the Scanner (from cocoxml) + +- Refactor the code to allow compile with and without wchar_t depending on the definition of `PARSER_WITH_AST` compiler macro + +- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) From 58ba844e43e10124a8fe55b1bac7602dff923806 Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 19:19:18 +0200 Subject: [PATCH 13/21] Remove line only relevant to C++ --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5b11480..3bb8f2f 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,6 @@ And this are my main modifications to the original: - Add a `TERMINAS` section to generate user define tokens not managed by the Scanner (from cocoxml) -- Refactor the code to allow compile with and without wchar_t depending on the definition of `PARSER_WITH_AST` compiler macro - - Generate between comments the correspondent representation of several magic numbers (mainly Tokens) + +See also https://github.com/mingodad/CocoR-CPP and https://github.com/mingodad/CocoR-CSharp From 2ee25620c132b9bb2da7467fb36fea7d2161377d Mon Sep 17 00:00:00 2001 From: Domingo Alvarez Duarte Date: Thu, 10 Jun 2021 19:21:27 +0200 Subject: [PATCH 14/21] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3bb8f2f..7bf6215 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ And this are my main modifications to the original: - Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals -- Add a `TERMINAS` section to generate user define tokens not managed by the Scanner (from cocoxml) +- Add a `TERMINALS` section to generate user define tokens not managed by the Scanner (from cocoxml) - Generate between comments the correspondent representation of several magic numbers (mainly Tokens) From ba047f93f264cc104fbabe46301894dc4dcbbcd7 Mon Sep 17 00:00:00 2001 From: mingodad Date: Fri, 11 Jun 2021 15:04:09 +0200 Subject: [PATCH 15/21] My last fix for left recursion detection didn't worked for any depth, this now seems to work in all cases --- src/Tab.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Tab.cs b/src/Tab.cs index 636d8be..cb6c759 100644 --- a/src/Tab.cs +++ b/src/Tab.cs @@ -924,7 +924,7 @@ public bool GrammarCheckAll() { int errors = 0; if(!NtsComplete()) ++errors; if(!AllNtReached()) ++errors; - if(!NoCircularProductions()) ++errors; + if(!NoCircularProductions()) System.Environment.Exit(1); if(!AllNtToTerm()) ++errors; CheckResolvers(); CheckLL1(); return errors == 0; @@ -940,17 +940,17 @@ public CNode (Symbol l, Symbol r) { } } - void GetSingles(Node p, ArrayList singles, Node rule) { + void GetSingles(Node p, ArrayList singles) { if (p == null) return; // end of graph if (p.typ == Node.nt) { - if (p.up || DelGraph(p.next) || p.sym.graph == rule) singles.Add(p.sym); + singles.Add(p.sym); } else if (p.typ == Node.alt || p.typ == Node.iter || p.typ == Node.opt) { if (p.up || DelGraph(p.next)) { - GetSingles(p.sub, singles, rule); - if (p.typ == Node.alt) GetSingles(p.down, singles, rule); + GetSingles(p.sub, singles); + if (p.typ == Node.alt) GetSingles(p.down, singles); } } - if (!p.up && DelNode(p)) GetSingles(p.next, singles, rule); + if (!p.up && DelNode(p)) GetSingles(p.next, singles); } public bool NoCircularProductions() { @@ -958,7 +958,7 @@ public bool NoCircularProductions() { ArrayList list = new ArrayList(); foreach (Symbol sym in nonterminals) { ArrayList singles = new ArrayList(); - GetSingles(sym.graph, singles, sym.graph); // get nonterminals s such that sym-->s + GetSingles(sym.graph, singles); // get nonterminals s such that sym-->s foreach (Symbol s in singles) list.Add(new CNode(sym, s)); } do { @@ -978,7 +978,7 @@ public bool NoCircularProductions() { ok = true; foreach (CNode n in list) { ok = false; - errors.SemErr(" " + n.left.name + " --> " + n.right.name); + errors.SemErr(" " + n.left.name + ":" + n.left.line + " --> " + n.right.name + ":" + n.right.line); } return ok; } @@ -1059,7 +1059,7 @@ void PrintFirstPath(Node p, int tok, string indent = "\t", int depth = 0) int CheckAlts(Node p) { BitArray s1, s2; - int rc = 0; + int rc = 0; while (p != null) { if (p.typ == Node.alt) { Node q = p; From 0f1e9a4e0c50eeeb391caacbf364a37be0513356 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 12 Jun 2021 12:59:34 +0200 Subject: [PATCH 16/21] Fix SynTree.dump2 that is supposed to show a pruned tree --- src/Parser.cs | 8 +++++--- src/Parser.frame | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Parser.cs b/src/Parser.cs index 75f921b..a97c538 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -64,18 +64,20 @@ public void dump(int indent=0, bool isLast=false) { } } - public void dump2(int maxT, int indent=0, bool isLast=false) { + public void dump2(int indent=0, bool isLast=false) { int last_idx = children.Count; + int indentPlus = 4; if(tok.col > 0) { printIndent(indent); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); } else { if(last_idx == 1) { - if(((SynTree)children[0]).tok.kind < maxT) { + if(((SynTree)children[0]).cildren.Count == 0) { printIndent(indent); Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); } + else indentPlus = 0; } else { printIndent(indent); @@ -83,7 +85,7 @@ public void dump2(int maxT, int indent=0, bool isLast=false) { } } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump2(indent+indentPlus, idx == last_idx); } } }; diff --git a/src/Parser.frame b/src/Parser.frame index 0424b64..48ef3b0 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -59,18 +59,20 @@ public class SynTree { } } - public void dump2(int maxT, int indent=0, bool isLast=false) { + public void dump2(int indent=0, bool isLast=false) { int last_idx = children.Count; + int indentPlus = 4; if(tok.col > 0) { printIndent(indent); Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); } else { if(last_idx == 1) { - if(((SynTree)children[0]).tok.kind < maxT) { + if(((SynTree)children[0]).cildren.Count == 0) { printIndent(indent); Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); } + else indentPlus = 0; } else { printIndent(indent); @@ -78,7 +80,7 @@ public class SynTree { } } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump2(maxT, indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump2(indent+indentPlus, idx == last_idx); } } }; From 786d88a19803391e8f51cdcb917e53588871db1c Mon Sep 17 00:00:00 2001 From: mingodad Date: Mon, 14 Jun 2021 14:10:36 +0200 Subject: [PATCH 17/21] Rename SynTree::dump to SynTree::dump_all and SynTree::dump to SynTree::dump_pruned --- src/Parser.cs | 8 ++++---- src/Parser.frame | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Parser.cs b/src/Parser.cs index a97c538..f082211 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -49,7 +49,7 @@ static void printIndent(int n) { for(int i=0; i < n; ++i) Console.Write(" "); } - public void dump(int indent=0, bool isLast=false) { + public void dump_all(int indent=0, bool isLast=false) { int last_idx = children.Count; if(tok.col > 0) { printIndent(indent); @@ -60,11 +60,11 @@ public void dump(int indent=0, bool isLast=false) { Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump_all(indent+4, idx == last_idx); } } - public void dump2(int indent=0, bool isLast=false) { + public void dump_pruned(int indent=0, bool isLast=false) { int last_idx = children.Count; int indentPlus = 4; if(tok.col > 0) { @@ -85,7 +85,7 @@ public void dump2(int indent=0, bool isLast=false) { } } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump_pruned(indent+indentPlus, idx == last_idx); } } }; diff --git a/src/Parser.frame b/src/Parser.frame index 48ef3b0..cb75984 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -44,7 +44,7 @@ public class SynTree { for(int i=0; i < n; ++i) Console.Write(" "); } - public void dump(int indent=0, bool isLast=false) { + public void dump_all(int indent=0, bool isLast=false) { int last_idx = children.Count; if(tok.col > 0) { printIndent(indent); @@ -55,11 +55,11 @@ public class SynTree { Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump(indent+4, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump_all(indent+4, idx == last_idx); } } - public void dump2(int indent=0, bool isLast=false) { + public void dump_pruned(int indent=0, bool isLast=false) { int last_idx = children.Count; int indentPlus = 4; if(tok.col > 0) { @@ -80,7 +80,7 @@ public class SynTree { } } if(last_idx > 0) { - for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump2(indent+indentPlus, idx == last_idx); + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump_pruned(indent+indentPlus, idx == last_idx); } } }; From 9a54fc3cc813e4dc2e66f36ddbf1edf4f4c1e009 Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 17 Jun 2021 08:57:10 +0200 Subject: [PATCH 18/21] Fix for the command line option -ignoreErrors --- src/Coco.atg | 3 +-- src/Coco.cs | 7 ++++--- src/Parser.cs | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 482ea08..9167643 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -44,7 +44,6 @@ COMPILER Coco public ParserGen pgen; bool genScanner; - bool ignoreGammarErrors = false; string tokenString; // used in declarations of literal tokens string noString = "-none-"; // used in declarations of literal tokens string gramName; // grammar name @@ -167,7 +166,7 @@ Coco (. Symbol sym; Graph g, g1, g2; CharSet s; int b tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); bool doGenCode = false; - if(ignoreGammarErrors) { + if(tab.ignoreErrors) { doGenCode = true; tab.GrammarCheckAll(); } diff --git a/src/Coco.cs b/src/Coco.cs index 5f0d929..8f79651 100644 --- a/src/Coco.cs +++ b/src/Coco.cs @@ -87,6 +87,7 @@ public static int Main (string[] arg) { parser.tab.emitLines = emitLines; parser.tab.genRREBNF = genRREBNF; parser.tab.genAST = genAST; + parser.tab.ignoreErrors = ignoreErrors; if (ddtString != null) parser.tab.SetDDT(ddtString); parser.Parse(); @@ -110,9 +111,9 @@ public static int Main (string[] arg) { + " -trace {0}" + " -o {0}" + " -lines{0}" - + " -genRREBNF" - + " -genAST" - + " -ignoreErrors ignore grammar errors for developing purposes" + + " -genRREBNF{0}" + + " -genAST{0}" + + " -ignoreErrors ignore grammar errors for developing purposes{0}" + "Valid characters in the trace string:{0}" + " A trace automaton{0}" + " F list first/follow sets{0}" diff --git a/src/Parser.cs b/src/Parser.cs index f082211..59f38f0 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -180,7 +180,6 @@ public class Parser { public ParserGen pgen; bool genScanner; - bool ignoreGammarErrors = false; string tokenString; // used in declarations of literal tokens string noString = "-none-"; // used in declarations of literal tokens string gramName; // grammar name @@ -382,7 +381,7 @@ void Coco() { tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); bool doGenCode = false; - if(ignoreGammarErrors) { + if(tab.ignoreErrors) { doGenCode = true; tab.GrammarCheckAll(); } From 3b705b400c3853d5cd74136ed29a9706d0c9875e Mon Sep 17 00:00:00 2001 From: mingodad Date: Thu, 1 Jul 2021 12:10:17 +0200 Subject: [PATCH 19/21] Fix for endless loop with some ill grammars --- src/Tab.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Tab.cs b/src/Tab.cs index cb6c759..a2deef4 100644 --- a/src/Tab.cs +++ b/src/Tab.cs @@ -1048,7 +1048,8 @@ void PrintFirstPath(Node p, int tok, string indent = "\t", int depth = 0) case Node.iter: case Node.opt: { - PrintFirstPath(p.sub, tok, indent, depth + 1); + if (!DelNode(p.sub)) //prevent endless loop with some ill grammars + PrintFirstPath(p.sub, tok, indent, depth + 1); break; } } From 171fe32a8239630382da4122726da3a285c6bd11 Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 14 Aug 2021 14:25:21 +0200 Subject: [PATCH 20/21] Add the suffix "_NT" to non terminal generated functions --- src/ParserGen.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ParserGen.cs b/src/ParserGen.cs index 437d903..bc5b9e7 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -196,7 +196,7 @@ void GenCode (Node p, int indent, BitArray isChecked) { switch (p.typ) { case Node.nt: { Indent(indent); - gen.Write(p.sym.name + "("); + gen.Write(p.sym.name + "_NT("); CopySourcePart(p.pos, 0); gen.WriteLine(");"); break; @@ -341,7 +341,7 @@ void GenTokens() { if (Char.IsLetter(sym.name[0])) gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n); else - gen.WriteLine("//\tpublic const int _({0}) = {1};", sym.name, sym.n); + gen.WriteLine("//\tpublic const int _({0}) = {1};", sym.name, sym.n); } } @@ -365,7 +365,7 @@ void GenProductions() { int idx = 0; foreach (Symbol sym in tab.nonterminals) { curSy = sym; - gen.Write("\tvoid {0}(", sym.name); + gen.Write("\tvoid {0}_NT(", sym.name); CopySourcePart(sym.attrPos, 0); gen.WriteLine(") {"); CopySourcePart(sym.semPos, 2); @@ -428,7 +428,7 @@ public void WriteParser () { g.CopyFramePart("-->declarations"); CopySourcePart(tab.semDeclPos, 0); g.CopyFramePart("-->pragmas"); GenCodePragmas(); g.CopyFramePart("-->productions"); GenProductions(); - g.CopyFramePart("-->parseRoot"); gen.WriteLine("\t\t{0}();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine("\t\tExpect(0);"); + g.CopyFramePart("-->parseRoot"); gen.WriteLine("\t\t{0}_NT();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine("\t\tExpect(0);"); g.CopyFramePart("-->initialization"); InitSets(); g.CopyFramePart("-->errors"); gen.Write(err.ToString()); g.CopyFramePart(null); From af4b887fa7903edbdbb9d67278dd5855ce717fac Mon Sep 17 00:00:00 2001 From: mingodad Date: Sat, 14 Aug 2021 15:09:53 +0200 Subject: [PATCH 21/21] Add token inheritance from https://github.com/Lercher/CocoR --- src/Coco.atg | 12 +- src/Parser.cs | 637 +++++++++++++++++++++++++---------------------- src/Parser.frame | 24 +- src/ParserGen.cs | 22 +- src/Scanner.cs | 141 ++++++----- src/Tab.cs | 1 + 6 files changed, 456 insertions(+), 381 deletions(-) diff --git a/src/Coco.atg b/src/Coco.atg index 9167643..074587e 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -246,7 +246,9 @@ Char /*------------------------------------------------------------------------------------*/ -TokenDecl (. string name; int kind; Symbol sym; Graph g; .) +TokenDecl (. string name; int kind; Symbol sym; Graph g; + string inheritsName; int inheritsKind; Symbol inheritsSym; + .) = Sym (. sym = tab.FindSym(name); if (sym != null) SemErr("name declared twice"); @@ -256,6 +258,14 @@ TokenDecl (. string name; int kind; Symbol sym; Graph g; . } tokenString = null; .) + [ ':' Sym + (. inheritsSym = tab.FindSym(inheritsName); + if (inheritsSym == null) SemErr(string.Format("token '{0}' can't inherit from '{1}', name not declared", sym.name, inheritsName)); + else if (inheritsSym == sym) SemErr(string.Format("token '{0}' must not inherit from self", sym.name)); + else if (inheritsSym.typ != typ) SemErr(string.Format("token '{0}' can't inherit from '{1}'", sym.name, inheritsSym.name)); + else sym.inherits = inheritsSym; + .) + ] SYNC ( '=' TokenExpr '.' (. if (kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); diff --git a/src/Parser.cs b/src/Parser.cs index 59f38f0..479c706 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -138,27 +138,28 @@ public class Parser { // public const int _("-") = 22; // public const int _("..") = 23; // public const int _("ANY") = 24; -// public const int _("<") = 25; -// public const int _(">") = 26; -// public const int _("<.") = 27; -// public const int _(".>") = 28; -// public const int _("|") = 29; -// public const int _("WEAK") = 30; -// public const int _("(") = 31; -// public const int _(")") = 32; -// public const int _("[") = 33; -// public const int _("]") = 34; -// public const int _("{") = 35; -// public const int _("}") = 36; -// public const int _("SYNC") = 37; -// public const int _("IF") = 38; -// public const int _("CONTEXT") = 39; -// public const int _("(.") = 40; -// public const int _(".)") = 41; -// public const int _(???) = 42; - public const int maxT = 42; - public const int _ddtSym = 43; - public const int _optionSym = 44; +// public const int _(":") = 25; +// public const int _("<") = 26; +// public const int _(">") = 27; +// public const int _("<.") = 28; +// public const int _(".>") = 29; +// public const int _("|") = 30; +// public const int _("WEAK") = 31; +// public const int _("(") = 32; +// public const int _(")") = 33; +// public const int _("[") = 34; +// public const int _("]") = 35; +// public const int _("{") = 36; +// public const int _("}") = 37; +// public const int _("SYNC") = 38; +// public const int _("IF") = 39; +// public const int _("CONTEXT") = 40; +// public const int _("(.") = 41; +// public const int _(".)") = 42; +// public const int _(???) = 43; + public const int maxT = 43; + public const int _ddtSym = 44; + public const int _optionSym = 45; const bool _T = true; const bool _x = false; @@ -209,18 +210,27 @@ void Get () { la = scanner.Scan(); if (la.kind <= maxT) { ++errDist; break; } if (la.kind == _ddtSym) { - tab.SetDDT(la.val); + tab.SetDDT(la.val); } if (la.kind == _optionSym) { - tab.SetOption(la.val); + tab.SetOption(la.val); } la = t; } } + bool isKind(Token t, int n) { + int k = t.kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; + } + void Expect (int n) { - if (la.kind==n) Get(); else { SynErr(n); } + if (isKind(la, n)) Get(); else { SynErr(n); } } bool StartOf (int s) { @@ -228,7 +238,7 @@ bool StartOf (int s) { } void ExpectWeak (int n, int follow) { - if (la.kind == n) Get(); + if (isKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -238,7 +248,7 @@ void ExpectWeak (int n, int follow) { bool WeakSeparator(int n, int syFol, int repFol) { int kind = la.kind; - if (kind == n) {Get(); return true;} + if (isKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -251,34 +261,34 @@ bool WeakSeparator(int n, int syFol, int repFol) { } - void Coco() { - Symbol sym; Graph g, g1, g2; CharSet s; int beg, line; + void Coco_NT() { + Symbol sym; Graph g, g1, g2; CharSet s; int beg, line; if (StartOf(1 /* any */)) { Get(); - beg = t.pos; line = t.line; + beg = t.pos; line = t.line; while (StartOf(1 /* any */)) { Get(); } - pgen.usingPos = new Position(beg, la.pos, 0, line); + pgen.usingPos = new Position(beg, la.pos, 0, line); } Expect(6 /* "COMPILER" */); genScanner = true; - tab.ignored = new CharSet(); + tab.ignored = new CharSet(); Expect(_ident); gramName = t.val; beg = la.pos; line = la.line; - + while (StartOf(2 /* any */)) { Get(); } - tab.semDeclPos = new Position(beg, la.pos, 0, line); - if (la.kind == 7 /* "IGNORECASE" */) { + tab.semDeclPos = new Position(beg, la.pos, 0, line); + if (isKind(la, 7 /* "IGNORECASE" */)) { Get(); - dfa.ignoreCase = true; + dfa.ignoreCase = true; } - if (la.kind == 8 /* "TERMINALS" */) { + if (isKind(la, 8 /* "TERMINALS" */)) { Get(); - while (la.kind == _ident) { + while (isKind(la, _ident)) { Get(); sym = tab.FindSym(t.val); if (sym != null) SemErr("name declared twice"); @@ -288,48 +298,48 @@ void Coco() { } } } - if (la.kind == 9 /* "CHARACTERS" */) { + if (isKind(la, 9 /* "CHARACTERS" */)) { Get(); - while (la.kind == _ident) { - SetDecl(); + while (isKind(la, _ident)) { + SetDecl_NT(); } } - if (la.kind == 10 /* "TOKENS" */) { + if (isKind(la, 10 /* "TOKENS" */)) { Get(); - while (la.kind == _ident || la.kind == _string || la.kind == _char) { - TokenDecl(Node.t); + while (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + TokenDecl_NT(Node.t); } } - if (la.kind == 11 /* "PRAGMAS" */) { + if (isKind(la, 11 /* "PRAGMAS" */)) { Get(); - while (la.kind == _ident || la.kind == _string || la.kind == _char) { - TokenDecl(Node.pr); + while (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + TokenDecl_NT(Node.pr); } } - while (la.kind == 12 /* "COMMENTS" */) { + while (isKind(la, 12 /* "COMMENTS" */)) { Get(); - bool nested = false; + bool nested = false; Expect(13 /* "FROM" */); - TokenExpr(out g1); + TokenExpr_NT(out g1); Expect(14 /* "TO" */); - TokenExpr(out g2); - if (la.kind == 15 /* "NESTED" */) { + TokenExpr_NT(out g2); + if (isKind(la, 15 /* "NESTED" */)) { Get(); - nested = true; + nested = true; } - dfa.NewComment(g1.l, g2.l, nested); + dfa.NewComment(g1.l, g2.l, nested); } - while (la.kind == 16 /* "IGNORE" */) { + while (isKind(la, 16 /* "IGNORE" */)) { Get(); - Set(out s); - tab.ignored.Or(s); + Set_NT(out s); + tab.ignored.Or(s); } - while (!(la.kind == _EOF || la.kind == 17 /* "PRODUCTIONS" */)) {SynErr(43); Get();} + while (!(isKind(la, _EOF) || isKind(la, 17 /* "PRODUCTIONS" */))) {SynErr(44); Get();} Expect(17 /* "PRODUCTIONS" */); if (genScanner) dfa.MakeDeterministic(); tab.DeleteNodes(); - - while (la.kind == _ident) { + + while (isKind(la, _ident)) { Get(); sym = tab.FindSym(t.val); bool undef = sym == null; @@ -342,22 +352,22 @@ void Coco() { } bool noAttrs = sym.attrPos == null; sym.attrPos = null; - - if (la.kind == 25 /* "<" */ || la.kind == 27 /* "<." */) { - AttrDecl(sym); + + if (isKind(la, 26 /* "<" */) || isKind(la, 28 /* "<." */)) { + AttrDecl_NT(sym); } if (!undef) if (noAttrs != (sym.attrPos == null)) SemErr("attribute mismatch between declaration and use of this symbol"); - - if (la.kind == 40 /* "(." */) { - SemText(out sym.semPos); + + if (isKind(la, 41 /* "(." */)) { + SemText_NT(out sym.semPos); } ExpectWeak(18 /* "=" */, 3); - Expression(out g); + Expression_NT(out g); sym.graph = g.l; tab.Finish(g); - + ExpectWeak(19 /* "." */, 4); } Expect(20 /* "END" */); @@ -402,28 +412,30 @@ void Coco() { } } if (tab.ddt[6]) tab.PrintSymbolTable(); - + Expect(19 /* "." */); } - void SetDecl() { - CharSet s; + void SetDecl_NT() { + CharSet s; Expect(_ident); string name = t.val; CharClass c = tab.FindCharClass(name); if (c != null) SemErr("name declared twice"); - + Expect(18 /* "=" */); - Set(out s); + Set_NT(out s); if (s.Elements() == 0) SemErr("character set must not be empty"); tab.NewCharClass(name, s); - + Expect(19 /* "." */); } - void TokenDecl(int typ) { - string name; int kind; Symbol sym; Graph g; - Sym(out name, out kind); + void TokenDecl_NT(int typ) { + string name; int kind; Symbol sym; Graph g; + string inheritsName; int inheritsKind; Symbol inheritsSym; + + Sym_NT(out name, out kind); sym = tab.FindSym(name); if (sym != null) SemErr("name declared twice"); else { @@ -431,11 +443,21 @@ void TokenDecl(int typ) { sym.tokenKind = Symbol.fixedToken; } tokenString = null; - - while (!(StartOf(5 /* sync */))) {SynErr(44); Get();} - if (la.kind == 18 /* "=" */) { + + if (isKind(la, 25 /* ":" */)) { Get(); - TokenExpr(out g); + Sym_NT(out inheritsName, out inheritsKind); + inheritsSym = tab.FindSym(inheritsName); + if (inheritsSym == null) SemErr(string.Format("token '{0}' can't inherit from '{1}', name not declared", sym.name, inheritsName)); + else if (inheritsSym == sym) SemErr(string.Format("token '{0}' must not inherit from self", sym.name)); + else if (inheritsSym.typ != typ) SemErr(string.Format("token '{0}' can't inherit from '{1}'", sym.name, inheritsSym.name)); + else sym.inherits = inheritsSym; + + } + while (!(StartOf(5 /* sync */))) {SynErr(45); Get();} + if (isKind(la, 18 /* "=" */)) { + Get(); + TokenExpr_NT(out g); Expect(19 /* "." */); if (kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); @@ -447,208 +469,208 @@ void TokenDecl(int typ) { tab.literals[tokenString] = sym; dfa.MatchLiteral(tokenString, sym); } - + } else if (StartOf(6 /* sem */)) { if (kind == id) genScanner = false; else dfa.MatchLiteral(sym.name, sym); - - } else SynErr(45); - if (la.kind == 40 /* "(." */) { - SemText(out sym.semPos); - if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); + + } else SynErr(46); + if (isKind(la, 41 /* "(." */)) { + SemText_NT(out sym.semPos); + if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); } } - void TokenExpr(out Graph g) { - Graph g2; - TokenTerm(out g); - bool first = true; - while (WeakSeparator(29 /* "|" */,7,8) ) { - TokenTerm(out g2); + void TokenExpr_NT(out Graph g) { + Graph g2; + TokenTerm_NT(out g); + bool first = true; + while (WeakSeparator(30 /* "|" */,7,8) ) { + TokenTerm_NT(out g2); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); - + } } - void Set(out CharSet s) { - CharSet s2; - SimSet(out s); - while (la.kind == 21 /* "+" */ || la.kind == 22 /* "-" */) { - if (la.kind == 21 /* "+" */) { + void Set_NT(out CharSet s) { + CharSet s2; + SimSet_NT(out s); + while (isKind(la, 21 /* "+" */) || isKind(la, 22 /* "-" */)) { + if (isKind(la, 21 /* "+" */)) { Get(); - SimSet(out s2); - s.Or(s2); + SimSet_NT(out s2); + s.Or(s2); } else { Get(); - SimSet(out s2); - s.Subtract(s2); + SimSet_NT(out s2); + s.Subtract(s2); } } } - void AttrDecl(Symbol sym) { - if (la.kind == 25 /* "<" */) { + void AttrDecl_NT(Symbol sym) { + if (isKind(la, 26 /* "<" */)) { Get(); - int beg = la.pos; int col = la.col; int line = la.line; + int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(9 /* alt */)) { if (StartOf(10 /* any */)) { Get(); } else { Get(); - SemErr("bad string in attributes"); + SemErr("bad string in attributes"); } } - Expect(26 /* ">" */); + Expect(27 /* ">" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col, line); - } else if (la.kind == 27 /* "<." */) { + sym.attrPos = new Position(beg, t.pos, col, line); + } else if (isKind(la, 28 /* "<." */)) { Get(); - int beg = la.pos; int col = la.col; int line = la.line; + int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(11 /* alt */)) { if (StartOf(12 /* any */)) { Get(); } else { Get(); - SemErr("bad string in attributes"); + SemErr("bad string in attributes"); } } - Expect(28 /* ".>" */); + Expect(29 /* ".>" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col, line); - } else SynErr(46); + sym.attrPos = new Position(beg, t.pos, col, line); + } else SynErr(47); } - void SemText(out Position pos) { - Expect(40 /* "(." */); - int beg = la.pos; int col = la.col; int line = la.line; + void SemText_NT(out Position pos) { + Expect(41 /* "(." */); + int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(13 /* alt */)) { if (StartOf(14 /* any */)) { Get(); - } else if (la.kind == _badString) { + } else if (isKind(la, _badString)) { Get(); - SemErr("bad string in semantic action"); + SemErr("bad string in semantic action"); } else { Get(); - SemErr("missing end of previous semantic action"); + SemErr("missing end of previous semantic action"); } } - Expect(41 /* ".)" */); - pos = new Position(beg, t.pos, col, line); + Expect(42 /* ".)" */); + pos = new Position(beg, t.pos, col, line); } - void Expression(out Graph g) { - Graph g2; - Term(out g); - bool first = true; - while (WeakSeparator(29 /* "|" */,15,16) ) { - Term(out g2); + void Expression_NT(out Graph g) { + Graph g2; + Term_NT(out g); + bool first = true; + while (WeakSeparator(30 /* "|" */,15,16) ) { + Term_NT(out g2); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); - + } } - void SimSet(out CharSet s) { - int n1, n2; - s = new CharSet(); - if (la.kind == _ident) { + void SimSet_NT(out CharSet s) { + int n1, n2; + s = new CharSet(); + if (isKind(la, _ident)) { Get(); CharClass c = tab.FindCharClass(t.val); if (c == null) SemErr("undefined name"); else s.Or(c.set); - - } else if (la.kind == _string) { + + } else if (isKind(la, _string)) { Get(); string name = tab.Unstring(t.val); foreach (char ch in name) if (dfa.ignoreCase) s.Set(char.ToLower(ch)); - else s.Set(ch); - } else if (la.kind == _char) { - Char(out n1); - s.Set(n1); - if (la.kind == 23 /* ".." */) { + else s.Set(ch); + } else if (isKind(la, _char)) { + Char_NT(out n1); + s.Set(n1); + if (isKind(la, 23 /* ".." */)) { Get(); - Char(out n2); - for (int i = n1; i <= n2; i++) s.Set(i); + Char_NT(out n2); + for (int i = n1; i <= n2; i++) s.Set(i); } - } else if (la.kind == 24 /* "ANY" */) { + } else if (isKind(la, 24 /* "ANY" */)) { Get(); - s = new CharSet(); s.Fill(); - } else SynErr(47); + s = new CharSet(); s.Fill(); + } else SynErr(48); } - void Char(out int n) { + void Char_NT(out int n) { Expect(_char); string name = tab.Unstring(t.val); n = 0; if (name.Length == 1) n = name[0]; else SemErr("unacceptable character value"); if (dfa.ignoreCase && (char)n >= 'A' && (char)n <= 'Z') n += 32; - + } - void Sym(out string name, out int kind) { - name = "???"; kind = id; - if (la.kind == _ident) { + void Sym_NT(out string name, out int kind) { + name = "???"; kind = id; + if (isKind(la, _ident)) { Get(); - kind = id; name = t.val; - } else if (la.kind == _string || la.kind == _char) { - if (la.kind == _string) { + kind = id; name = t.val; + } else if (isKind(la, _string) || isKind(la, _char)) { + if (isKind(la, _string)) { Get(); - name = t.val; + name = t.val; } else { Get(); - name = "\"" + t.val.Substring(1, t.val.Length-2) + "\""; + name = "\"" + t.val.Substring(1, t.val.Length-2) + "\""; } kind = str; if (dfa.ignoreCase) name = name.ToLower(); if (name.IndexOf(' ') >= 0) - SemErr("literal tokens must not contain blanks"); - } else SynErr(48); + SemErr("literal tokens must not contain blanks"); + } else SynErr(49); } - void Term(out Graph g) { - Graph g2; Node rslv = null; g = null; + void Term_NT(out Graph g) { + Graph g2; Node rslv = null; g = null; if (StartOf(17 /* opt */)) { - if (la.kind == 38 /* "IF" */) { - rslv = tab.NewNode(Node.rslv, null, la.line, la.col); - Resolver(out rslv.pos); - g = new Graph(rslv); + if (isKind(la, 39 /* "IF" */)) { + rslv = tab.NewNode(Node.rslv, null, la.line, la.col); + Resolver_NT(out rslv.pos); + g = new Graph(rslv); } - Factor(out g2); + Factor_NT(out g2); if (rslv != null) tab.MakeSequence(g, g2); else g = g2; - + while (StartOf(18 /* nt Factor */)) { - Factor(out g2); - tab.MakeSequence(g, g2); + Factor_NT(out g2); + tab.MakeSequence(g, g2); } } else if (StartOf(19 /* sem */)) { - g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); - } else SynErr(49); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); + } else SynErr(50); if (g == null) // invalid start of Term g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); - + } - void Resolver(out Position pos) { - Expect(38 /* "IF" */); - Expect(31 /* "(" */); - int beg = la.pos; int col = la.col; int line = la.line; - Condition(); - pos = new Position(beg, t.pos, col, line); + void Resolver_NT(out Position pos) { + Expect(39 /* "IF" */); + Expect(32 /* "(" */); + int beg = la.pos; int col = la.col; int line = la.line; + Condition_NT(); + pos = new Position(beg, t.pos, col, line); } - void Factor(out Graph g) { + void Factor_NT(out Graph g) { string name; int kind; Position pos; bool weak = false; g = null; - + switch (la.kind) { - case _ident: case _string: case _char: case 30 /* "WEAK" */: { - if (la.kind == 30 /* "WEAK" */) { + case _ident: case _string: case _char: case 31 /* "WEAK" */: { + if (isKind(la, 31 /* "WEAK" */)) { Get(); - weak = true; + weak = true; } - Sym(out name, out kind); + Sym_NT(out name, out kind); Symbol sym = tab.FindSym(name); if (sym == null && kind == str) sym = tab.literals[name] as Symbol; @@ -672,131 +694,131 @@ void Factor(out Graph g) { else SemErr("only terminals may be weak"); Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); - - if (la.kind == 25 /* "<" */ || la.kind == 27 /* "<." */) { - Attribs(p); - if (kind != id) SemErr("a literal must not have attributes"); + + if (isKind(la, 26 /* "<" */) || isKind(la, 28 /* "<." */)) { + Attribs_NT(p); + if (kind != id) SemErr("a literal must not have attributes"); } if (undef) sym.attrPos = p.pos; // dummy else if ((p.pos == null) != (sym.attrPos == null)) SemErr("attribute mismatch between declaration and use of this symbol"); - + break; } - case 31 /* "(" */: { + case 32 /* "(" */: { Get(); - Expression(out g); - Expect(32 /* ")" */); + Expression_NT(out g); + Expect(33 /* ")" */); break; } - case 33 /* "[" */: { + case 34 /* "[" */: { Get(); - Expression(out g); - Expect(34 /* "]" */); - tab.MakeOption(g); + Expression_NT(out g); + Expect(35 /* "]" */); + tab.MakeOption(g); break; } - case 35 /* "{" */: { + case 36 /* "{" */: { Get(); - Expression(out g); - Expect(36 /* "}" */); - tab.MakeIteration(g); + Expression_NT(out g); + Expect(37 /* "}" */); + tab.MakeIteration(g); break; } - case 40 /* "(." */: { - SemText(out pos); + case 41 /* "(." */: { + SemText_NT(out pos); Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); - + break; } case 24 /* "ANY" */: { Get(); Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); - + break; } - case 37 /* "SYNC" */: { + case 38 /* "SYNC" */: { Get(); Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); - + break; } - default: SynErr(50); break; + default: SynErr(51); break; } if (g == null) // invalid start of Factor g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); - + } - void Attribs(Node p) { - if (la.kind == 25 /* "<" */) { + void Attribs_NT(Node p) { + if (isKind(la, 26 /* "<" */)) { Get(); - int beg = la.pos; int col = la.col; int line = la.line; + int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(9 /* alt */)) { if (StartOf(10 /* any */)) { Get(); } else { Get(); - SemErr("bad string in attributes"); + SemErr("bad string in attributes"); } } - Expect(26 /* ">" */); - if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); - } else if (la.kind == 27 /* "<." */) { + Expect(27 /* ">" */); + if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); + } else if (isKind(la, 28 /* "<." */)) { Get(); - int beg = la.pos; int col = la.col; int line = la.line; + int beg = la.pos; int col = la.col; int line = la.line; while (StartOf(11 /* alt */)) { if (StartOf(12 /* any */)) { Get(); } else { Get(); - SemErr("bad string in attributes"); + SemErr("bad string in attributes"); } } - Expect(28 /* ".>" */); - if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); - } else SynErr(51); + Expect(29 /* ".>" */); + if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); + } else SynErr(52); } - void Condition() { + void Condition_NT() { while (StartOf(20 /* alt */)) { - if (la.kind == 31 /* "(" */) { + if (isKind(la, 32 /* "(" */)) { Get(); - Condition(); + Condition_NT(); } else { Get(); } } - Expect(32 /* ")" */); + Expect(33 /* ")" */); } - void TokenTerm(out Graph g) { - Graph g2; - TokenFactor(out g); + void TokenTerm_NT(out Graph g) { + Graph g2; + TokenFactor_NT(out g); while (StartOf(7 /* nt TokenFactor */)) { - TokenFactor(out g2); - tab.MakeSequence(g, g2); + TokenFactor_NT(out g2); + tab.MakeSequence(g, g2); } - if (la.kind == 39 /* "CONTEXT" */) { + if (isKind(la, 40 /* "CONTEXT" */)) { Get(); - Expect(31 /* "(" */); - TokenExpr(out g2); + Expect(32 /* "(" */); + TokenExpr_NT(out g2); tab.SetContextTrans(g2.l); dfa.hasCtxMoves = true; - tab.MakeSequence(g, g2); - Expect(32 /* ")" */); + tab.MakeSequence(g, g2); + Expect(33 /* ")" */); } } - void TokenFactor(out Graph g) { - string name; int kind; - g = null; - if (la.kind == _ident || la.kind == _string || la.kind == _char) { - Sym(out name, out kind); + void TokenFactor_NT(out Graph g) { + string name; int kind; + g = null; + if (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + Sym_NT(out name, out kind); if (kind == id) { CharClass c = tab.FindCharClass(name); if (c == null) { @@ -811,24 +833,24 @@ void TokenFactor(out Graph g) { if (tokenString == null) tokenString = name; else tokenString = noString; } - - } else if (la.kind == 31 /* "(" */) { + + } else if (isKind(la, 32 /* "(" */)) { Get(); - TokenExpr(out g); - Expect(32 /* ")" */); - } else if (la.kind == 33 /* "[" */) { + TokenExpr_NT(out g); + Expect(33 /* ")" */); + } else if (isKind(la, 34 /* "[" */)) { Get(); - TokenExpr(out g); - Expect(34 /* "]" */); - tab.MakeOption(g); tokenString = noString; - } else if (la.kind == 35 /* "{" */) { + TokenExpr_NT(out g); + Expect(35 /* "]" */); + tab.MakeOption(g); tokenString = noString; + } else if (isKind(la, 36 /* "{" */)) { Get(); - TokenExpr(out g); - Expect(36 /* "}" */); - tab.MakeIteration(g); tokenString = noString; - } else SynErr(52); + TokenExpr_NT(out g); + Expect(37 /* "}" */); + tab.MakeIteration(g); tokenString = noString; + } else SynErr(53); if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); } @@ -837,40 +859,48 @@ public void Parse() { la = new Token(); la.val = ""; Get(); - Coco(); + Coco_NT(); Expect(0); } + // a token's base type + public static readonly int[] tBase = { + + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1, + }; + static readonly bool[,] set = { - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _x,_T,_T,_T, _x,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_T, _x,_x,_x,_x, _T,_x,_x,_x, _x,_T,_T,_T, _x,_T,_x,_T, _x,_T,_T,_x, _T,_x,_x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_x,_T,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _T,_x,_x,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_T,_T, _x,_T,_x,_T, _x,_T,_T,_x, _T,_x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_T,_T, _x,_T,_x,_T, _x,_T,_x,_x, _T,_x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _T,_x,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x} + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x}, + {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _x,_T,_T,_T, _x,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_T, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_T,_T, _T,_x,_T,_x, _T,_x,_T,_T, _x,_T,_x,_x, _x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x, _x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_T, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x,_x, _x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_x,_T, _T,_x,_T,_x, _T,_x,_T,_T, _x,_T,_x,_x, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_x,_T, _T,_x,_T,_x, _T,_x,_T,_x, _x,_T,_x,_x, _x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x} }; #if PARSER_WITH_AST public SynTree ast_root; Stack ast_stack; - + void AstAddTerminal() { SynTree st = new SynTree( t ); ((SynTree)(ast_stack.Peek())).children.Add(st); @@ -891,7 +921,7 @@ void AstPopNonTerminal() { ast_stack.Pop(); } #endif - + } // end Parser @@ -928,34 +958,35 @@ public virtual void SynErr (int line, int col, int n) { case 22: s = "\"-\" expected"; break; case 23: s = "\"..\" expected"; break; case 24: s = "\"ANY\" expected"; break; - case 25: s = "\"<\" expected"; break; - case 26: s = "\">\" expected"; break; - case 27: s = "\"<.\" expected"; break; - case 28: s = "\".>\" expected"; break; - case 29: s = "\"|\" expected"; break; - case 30: s = "\"WEAK\" expected"; break; - case 31: s = "\"(\" expected"; break; - case 32: s = "\")\" expected"; break; - case 33: s = "\"[\" expected"; break; - case 34: s = "\"]\" expected"; break; - case 35: s = "\"{\" expected"; break; - case 36: s = "\"}\" expected"; break; - case 37: s = "\"SYNC\" expected"; break; - case 38: s = "\"IF\" expected"; break; - case 39: s = "\"CONTEXT\" expected"; break; - case 40: s = "\"(.\" expected"; break; - case 41: s = "\".)\" expected"; break; - case 42: s = "??? expected"; break; - case 43: s = "this symbol not expected in Coco"; break; - case 44: s = "this symbol not expected in TokenDecl"; break; - case 45: s = "invalid TokenDecl"; break; - case 46: s = "invalid AttrDecl"; break; - case 47: s = "invalid SimSet"; break; - case 48: s = "invalid Sym"; break; - case 49: s = "invalid Term"; break; - case 50: s = "invalid Factor"; break; - case 51: s = "invalid Attribs"; break; - case 52: s = "invalid TokenFactor"; break; + case 25: s = "\":\" expected"; break; + case 26: s = "\"<\" expected"; break; + case 27: s = "\">\" expected"; break; + case 28: s = "\"<.\" expected"; break; + case 29: s = "\".>\" expected"; break; + case 30: s = "\"|\" expected"; break; + case 31: s = "\"WEAK\" expected"; break; + case 32: s = "\"(\" expected"; break; + case 33: s = "\")\" expected"; break; + case 34: s = "\"[\" expected"; break; + case 35: s = "\"]\" expected"; break; + case 36: s = "\"{\" expected"; break; + case 37: s = "\"}\" expected"; break; + case 38: s = "\"SYNC\" expected"; break; + case 39: s = "\"IF\" expected"; break; + case 40: s = "\"CONTEXT\" expected"; break; + case 41: s = "\"(.\" expected"; break; + case 42: s = "\".)\" expected"; break; + case 43: s = "??? expected"; break; + case 44: s = "this symbol not expected in Coco"; break; + case 45: s = "this symbol not expected in TokenDecl"; break; + case 46: s = "invalid TokenDecl"; break; + case 47: s = "invalid AttrDecl"; break; + case 48: s = "invalid SimSet"; break; + case 49: s = "invalid Sym"; break; + case 50: s = "invalid Term"; break; + case 51: s = "invalid Factor"; break; + case 52: s = "invalid Attribs"; break; + case 53: s = "invalid TokenFactor"; break; default: s = "error " + n; break; } diff --git a/src/Parser.frame b/src/Parser.frame index cb75984..75c626c 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -126,8 +126,17 @@ public class Parser { } } + bool isKind(Token t, int n) { + int k = t.kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; + } + void Expect (int n) { - if (la.kind==n) Get(); else { SynErr(n); } + if (isKind(la, n)) Get(); else { SynErr(n); } } bool StartOf (int s) { @@ -135,7 +144,7 @@ public class Parser { } void ExpectWeak (int n, int follow) { - if (la.kind == n) Get(); + if (isKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -145,7 +154,7 @@ public class Parser { bool WeakSeparator(int n, int syFol, int repFol) { int kind = la.kind; - if (kind == n) {Get(); return true;} + if (isKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -167,6 +176,11 @@ public class Parser { -->parseRoot } + // a token's base type + public static readonly int[] tBase = { +-->tbase + }; + static readonly bool[,] set = { -->initialization }; @@ -174,7 +188,7 @@ public class Parser { #if PARSER_WITH_AST public SynTree ast_root; Stack ast_stack; - + void AstAddTerminal() { SynTree st = new SynTree( t ); ((SynTree)(ast_stack.Peek())).children.Add(st); @@ -195,7 +209,7 @@ public class Parser { ast_stack.Pop(); } #endif - + } // end Parser diff --git a/src/ParserGen.cs b/src/ParserGen.cs index bc5b9e7..9088d3f 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -168,8 +168,9 @@ void GenCond (BitArray s, Node p) { else if (n <= maxTerm) foreach (Symbol sym in tab.terminals) { if (s[sym.n]) { - gen.Write("la.kind == "); + gen.Write("isKind(la, "); WriteSymbolOrCode(sym); + gen.Write(")"); --n; if (n > 0) gen.Write(" || "); } @@ -330,6 +331,17 @@ void GenCode (Node p, int indent, BitArray isChecked) { } } + void GenTokenBase() { + int idx = 0; + foreach (Symbol sym in tab.terminals) { + if((idx++ % 20) == 0) gen.Write("\n\t\t"); + if (sym.inherits == null) + gen.Write("{0,2},", -1); // not inherited + else + gen.Write("{0,2},", sym.inherits.n); + } + } + void GenTokens() { gen.WriteLine("\t//non terminals"); foreach (Symbol sym in tab.nonterminals) { @@ -339,9 +351,12 @@ void GenTokens() { gen.WriteLine("\t//terminals"); foreach (Symbol sym in tab.terminals) { if (Char.IsLetter(sym.name[0])) - gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n); + gen.Write("\tpublic const int _{0} = {1};", sym.name, sym.n); else - gen.WriteLine("//\tpublic const int _({0}) = {1};", sym.name, sym.n); + gen.Write("//\tpublic const int _({0}) = {1};", sym.name, sym.n); + if(sym.inherits != null) + gen.Write(" // INHERITS -> {0}", sym.inherits.name); + gen.WriteLine(); } } @@ -429,6 +444,7 @@ public void WriteParser () { g.CopyFramePart("-->pragmas"); GenCodePragmas(); g.CopyFramePart("-->productions"); GenProductions(); g.CopyFramePart("-->parseRoot"); gen.WriteLine("\t\t{0}_NT();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine("\t\tExpect(0);"); + g.CopyFramePart("-->tbase"); GenTokenBase(); // write all tokens base types g.CopyFramePart("-->initialization"); InitSets(); g.CopyFramePart("-->errors"); gen.Write(err.ToString()); g.CopyFramePart(null); diff --git a/src/Scanner.cs b/src/Scanner.cs index b5880b1..9dbb51d 100644 --- a/src/Scanner.cs +++ b/src/Scanner.cs @@ -61,10 +61,10 @@ public class Buffer { int bufPos; // current position in buffer Stream stream; // input stream (seekable) bool isUserStream; // was the stream opened by the user? - + public Buffer (Stream s, bool isUserStream) { stream = s; this.isUserStream = isUserStream; - + if (stream.CanSeek) { fileLen = (int) stream.Length; bufLen = Math.Min(fileLen, MAX_BUFFER_LENGTH); @@ -78,7 +78,7 @@ public Buffer (Stream s, bool isUserStream) { else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid if (bufLen == fileLen && stream.CanSeek) Close(); } - + protected Buffer(Buffer b) { // called in UTF8Buffer constructor buf = b.buf; bufStart = b.bufStart; @@ -92,14 +92,14 @@ protected Buffer(Buffer b) { // called in UTF8Buffer constructor } ~Buffer() { Close(); } - + protected void Close() { if (!isUserStream && stream != null) { stream.Close(); stream = null; } } - + public virtual int Read () { if (bufPos < bufLen) { return buf[bufPos++]; @@ -119,7 +119,7 @@ public int Peek () { Pos = curPos; return ch; } - + // beg .. begin, zero-based, inclusive, in byte // end .. end, zero-based, exclusive, in byte public string GetString (int beg, int end) { @@ -159,7 +159,7 @@ public int Pos { } } } - + // Read the next chunk of bytes from the stream, increases the buffer // if needed and updates the fields fileLen and bufLen. // Returns the number of bytes read. @@ -229,12 +229,12 @@ public override int Read() { public class Scanner { const char EOL = '\n'; const int eofSym = 0; /* pdt */ - const int maxT = 42; - const int noSym = 42; + const int maxT = 43; + const int noSym = 43; public Buffer buffer; // scanner buffer - + Token t; // current token int ch; // current input character int pos; // byte position of current character @@ -246,36 +246,37 @@ public class Scanner { Token tokens; // list of tokens already peeked (first token is a dummy) Token pt; // current peek token - + char[] tval = new char[128]; // text of current token int tlen; // length of current token - + static Scanner() { start = new Dictionary(128); for (int i = 65; i <= 90; ++i) start[i] = 1; for (int i = 95; i <= 95; ++i) start[i] = 1; for (int i = 97; i <= 122; ++i) start[i] = 1; for (int i = 48; i <= 57; ++i) start[i] = 2; - start[34] = 12; - start[39] = 5; - start[36] = 13; - start[61] = 16; - start[46] = 31; - start[43] = 17; - start[45] = 18; - start[60] = 32; - start[62] = 20; - start[124] = 23; - start[40] = 33; - start[41] = 24; - start[91] = 25; - start[93] = 26; - start[123] = 27; - start[125] = 28; + start[34] = 12; + start[39] = 5; + start[36] = 13; + start[61] = 16; + start[46] = 32; + start[43] = 17; + start[45] = 18; + start[58] = 20; + start[60] = 33; + start[62] = 21; + start[124] = 24; + start[40] = 34; + start[41] = 25; + start[91] = 26; + start[93] = 27; + start[123] = 28; + start[125] = 29; start[Buffer.EOF] = -1; } - + public Scanner (string fileName) { try { Stream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); @@ -285,12 +286,12 @@ public Scanner (string fileName) { throw new FatalError("Cannot open file " + fileName); } } - + public Scanner (Stream s) { buffer = new Buffer(s, true); Init(); } - + void Init() { pos = -1; line = 1; col = 0; charPos = -1; oldEols = 0; @@ -306,9 +307,9 @@ void Init() { } pt = tokens = new Token(); // first token is a dummy } - + void NextCh() { - if (oldEols > 0) { ch = EOL; oldEols--; } + if (oldEols > 0) { ch = EOL; oldEols--; } else { pos = buffer.Pos; // buffer reads unicode chars, if UTF8 has been detected @@ -396,10 +397,10 @@ void CheckLiteral() { case "PRODUCTIONS": t.kind = 17; break; case "END": t.kind = 20; break; case "ANY": t.kind = 24; break; - case "WEAK": t.kind = 30; break; - case "SYNC": t.kind = 37; break; - case "IF": t.kind = 38; break; - case "CONTEXT": t.kind = 39; break; + case "WEAK": t.kind = 31; break; + case "SYNC": t.kind = 38; break; + case "IF": t.kind = 39; break; + case "CONTEXT": t.kind = 40; break; default: break; } } @@ -420,7 +421,7 @@ Token NextToken() { int state; state = start.ContainsKey(ch) ? start[ch] : 0; tlen = 0; AddCh(); - + switch (state) { case -1: { t.kind = eofSym; break; } // NextCh already done case 0: { @@ -459,13 +460,13 @@ Token NextToken() { case 9: {t.kind = 5 /* char */; break;} case 10: - recEnd = pos; recKind = 43 /* ddtSym */; + recEnd = pos; recKind = 44 /* ddtSym */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 10;} - else {t.kind = 43 /* ddtSym */; break;} + else {t.kind = 44 /* ddtSym */; break;} case 11: - recEnd = pos; recKind = 44 /* optionSym */; + recEnd = pos; recKind = 45 /* optionSym */; if (ch >= '-' && ch <= '.' || ch >= '0' && ch <= ':' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 11;} - else {t.kind = 44 /* optionSym */; break;} + else {t.kind = 45 /* optionSym */; break;} case 12: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); goto case 12;} else if (ch == 10 || ch == 13) {AddCh(); goto case 4;} @@ -473,19 +474,19 @@ Token NextToken() { else if (ch == 92) {AddCh(); goto case 14;} else {goto case 0;} case 13: - recEnd = pos; recKind = 43 /* ddtSym */; + recEnd = pos; recKind = 44 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); goto case 10;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 15;} - else {t.kind = 43 /* ddtSym */; break;} + else {t.kind = 44 /* ddtSym */; break;} case 14: if (ch >= ' ' && ch <= '~') {AddCh(); goto case 12;} else {goto case 0;} case 15: - recEnd = pos; recKind = 43 /* ddtSym */; + recEnd = pos; recKind = 44 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); goto case 10;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 15;} else if (ch == '=') {AddCh(); goto case 11;} - else {t.kind = 43 /* ddtSym */; break;} + else {t.kind = 44 /* ddtSym */; break;} case 16: {t.kind = 18 /* "=" */; break;} case 17: @@ -495,54 +496,56 @@ Token NextToken() { case 19: {t.kind = 23 /* ".." */; break;} case 20: - {t.kind = 26 /* ">" */; break;} + {t.kind = 25 /* ":" */; break;} case 21: - {t.kind = 27 /* "<." */; break;} + {t.kind = 27 /* ">" */; break;} case 22: - {t.kind = 28 /* ".>" */; break;} + {t.kind = 28 /* "<." */; break;} case 23: - {t.kind = 29 /* "|" */; break;} + {t.kind = 29 /* ".>" */; break;} case 24: - {t.kind = 32 /* ")" */; break;} + {t.kind = 30 /* "|" */; break;} case 25: - {t.kind = 33 /* "[" */; break;} + {t.kind = 33 /* ")" */; break;} case 26: - {t.kind = 34 /* "]" */; break;} + {t.kind = 34 /* "[" */; break;} case 27: - {t.kind = 35 /* "{" */; break;} + {t.kind = 35 /* "]" */; break;} case 28: - {t.kind = 36 /* "}" */; break;} + {t.kind = 36 /* "{" */; break;} case 29: - {t.kind = 40 /* "(." */; break;} + {t.kind = 37 /* "}" */; break;} case 30: - {t.kind = 41 /* ".)" */; break;} + {t.kind = 41 /* "(." */; break;} case 31: + {t.kind = 42 /* ".)" */; break;} + case 32: recEnd = pos; recKind = 19 /* "." */; if (ch == '.') {AddCh(); goto case 19;} - else if (ch == '>') {AddCh(); goto case 22;} - else if (ch == ')') {AddCh(); goto case 30;} + else if (ch == '>') {AddCh(); goto case 23;} + else if (ch == ')') {AddCh(); goto case 31;} else {t.kind = 19 /* "." */; break;} - case 32: - recEnd = pos; recKind = 25 /* "<" */; - if (ch == '.') {AddCh(); goto case 21;} - else {t.kind = 25 /* "<" */; break;} case 33: - recEnd = pos; recKind = 31 /* "(" */; - if (ch == '.') {AddCh(); goto case 29;} - else {t.kind = 31 /* "(" */; break;} + recEnd = pos; recKind = 26 /* "<" */; + if (ch == '.') {AddCh(); goto case 22;} + else {t.kind = 26 /* "<" */; break;} + case 34: + recEnd = pos; recKind = 32 /* "(" */; + if (ch == '.') {AddCh(); goto case 30;} + else {t.kind = 32 /* "(" */; break;} } t.val = new String(tval, 0, tlen); return t; } - + private void SetScannerBehindT() { buffer.Pos = t.pos; NextCh(); line = t.line; col = t.col; charPos = t.charPos; for (int i = 0; i < tlen; i++) NextCh(); } - + // get the next token (possibly a token already seen during peeking) public Token Scan () { if (tokens.next == null) { @@ -561,7 +564,7 @@ public Token Peek () { } pt = pt.next; } while (pt.kind > maxT); // skip pragmas - + return pt; } diff --git a/src/Tab.cs b/src/Tab.cs index a2deef4..920411f 100644 --- a/src/Tab.cs +++ b/src/Tab.cs @@ -71,6 +71,7 @@ public class Symbol { public Position attrPos; // nt: position of attributes in source text (or null) public Position semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) + public Symbol inherits; // optional, token from which this token derives public Symbol(int typ, string name, int line, int col) { this.typ = typ; this.name = name; this.line = line; this.col = col;