diff --git a/README.md b/README.md index 8071639..7bf6215 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,23 @@ Coco/R is a compiler generator, which takes an attributed grammar of a source language and generates a scanner and a parser for this language. The scanner works as a deterministic finite automaton. The parser uses recursive descent. LL(1) conflicts can be resolved by a multi-symbol lookahead or by semantic checks. Thus the class of accepted grammars is LL(k) for an arbitrary k. http://ssw.jku.at/coco/ + +And this are my main modifications to the original: + +- Enhance left recursion detection + +- Allow semantic actions on `token declaration` similar to `pragmas` but the code executes on the Scanner + +- Allow till 8 characters as comment delimiters + +- Add option `-genRREBNF` to generate an EBNF grammar to crate railroad diagrams at https://www.bottlecaps.de/rr/ui + +- Add option `-geAST` to generate code to generate `parser syntax tree` based on https://github.com/rochus-keller/EbnfStudio + +- Add option `-ignoreGammarErrors` to make easier to develop grammars, like commenting one non terminal and still generating the parser and scanner even with sevral non reachable non terminals + +- Add a `TERMINALS` section to generate user define tokens not managed by the Scanner (from cocoxml) + +- Generate between comments the correspondent representation of several magic numbers (mainly Tokens) + +See also https://github.com/mingodad/CocoR-CPP and https://github.com/mingodad/CocoR-CSharp diff --git a/src/Coco.atg b/src/Coco.atg index 1312f64..074587e 100644 --- a/src/Coco.atg +++ b/src/Coco.atg @@ -5,24 +5,24 @@ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- @@ -37,7 +37,7 @@ COMPILER Coco const int id = 0; const int str = 1; - + public TextWriter trace; // other Coco objects referenced in this ATG public Tab tab; public DFA dfa; @@ -46,6 +46,7 @@ COMPILER Coco bool genScanner; string tokenString; // used in declarations of literal tokens string noString = "-none-"; // used in declarations of literal tokens + string gramName; // grammar name /*-------------------------------------------------------------------------*/ @@ -85,25 +86,32 @@ IGNORE cr + lf + tab PRODUCTIONS -Coco (. Symbol sym; Graph g, g1, g2; string gramName; CharSet s; int beg, line; .) +Coco (. Symbol sym; Graph g, g1, g2; CharSet s; int beg, line; .) = [ // using statements ANY (. beg = t.pos; line = t.line; .) { ANY } (. pgen.usingPos = new Position(beg, la.pos, 0, line); .) ] - "COMPILER" (. genScanner = true; - tab.ignored = new CharSet(); .) + "COMPILER" (. genScanner = true; + tab.ignored = new CharSet(); .) ident (. gramName = t.val; beg = la.pos; line = la.line; .) { ANY } (. tab.semDeclPos = new Position(beg, la.pos, 0, line); .) [ "IGNORECASE" (. dfa.ignoreCase = true; .) ] /* pdt */ + [ "TERMINALS" { ident (.sym = tab.FindSym(t.val); + if (sym != null) SemErr("name declared twice"); + else { + sym = tab.NewSym(Node.t, t.val, t.line, t.col); + sym.tokenKind = Symbol.fixedToken; + }.) + } ] /*from cocoxml*/ [ "CHARACTERS" { SetDecl }] [ "TOKENS" { TokenDecl }] [ "PRAGMAS" { TokenDecl }] { "COMMENTS" (. bool nested = false; .) - "FROM" TokenExpr + "FROM" TokenExpr "TO" TokenExpr [ "NESTED" (. nested = true; .) ] (. dfa.NewComment(g1.l, g2.l, nested); .) @@ -111,13 +119,13 @@ Coco (. Symbol sym; Graph g, g1, g2; string gramName; { "IGNORE" Set (. tab.ignored.Or(s); .) } - SYNC + SYNC "PRODUCTIONS" (. if (genScanner) dfa.MakeDeterministic(); tab.DeleteNodes(); .) { ident (. sym = tab.FindSym(t.val); bool undef = sym == null; - if (undef) sym = tab.NewSym(Node.nt, t.val, t.line); + if (undef) sym = tab.NewSym(Node.nt, t.val, t.line, t.col); else { if (sym.typ == Node.nt) { if (sym.graph != null) SemErr("name declared twice"); @@ -149,7 +157,7 @@ Coco (. Symbol sym; Graph g, g1, g2; string gramName; if (sym.attrPos != null) SemErr("grammar symbol must not have attributes"); } - tab.noSym = tab.NewSym(Node.t, "???", 0); // noSym gets highest number + tab.noSym = tab.NewSym(Node.t, "???", 0, 0); // noSym gets highest number tab.SetupAnys(); tab.RenumberPragmas(); if (tab.ddt[2]) tab.PrintNodes(); @@ -157,7 +165,16 @@ Coco (. Symbol sym; Graph g, g1, g2; string gramName; Console.WriteLine("checking"); tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); - if (tab.GrammarOk()) { + bool doGenCode = false; + if(tab.ignoreErrors) { + doGenCode = true; + tab.GrammarCheckAll(); + } + else doGenCode = tab.GrammarOk(); + if(tab.genRREBNF && doGenCode) { + pgen.WriteRREBNF(); + } + if (doGenCode) { Console.Write("parser"); pgen.WriteParser(); if (genScanner) { @@ -205,8 +222,7 @@ SimSet (. int n1, n2; .) ( ident (. CharClass c = tab.FindCharClass(t.val); if (c == null) SemErr("undefined name"); else s.Or(c.set); .) -| string (. string name = t.val; - name = tab.Unescape(name.Substring(1, name.Length-2)); +| string (. string name = tab.Unstring(t.val); foreach (char ch in name) if (dfa.ignoreCase) s.Set(char.ToLower(ch)); else s.Set(ch); .) @@ -221,8 +237,7 @@ SimSet (. int n1, n2; .) Char = - char (. string name = t.val; n = 0; - name = tab.Unescape(name.Substring(1, name.Length-2)); + char (. string name = tab.Unstring(t.val); n = 0; if (name.Length == 1) n = name[0]; else SemErr("unacceptable character value"); if (dfa.ignoreCase && (char)n >= 'A' && (char)n <= 'Z') n += 32; @@ -231,16 +246,26 @@ Char /*------------------------------------------------------------------------------------*/ -TokenDecl (. string name; int kind; Symbol sym; Graph g; .) +TokenDecl (. string name; int kind; Symbol sym; Graph g; + string inheritsName; int inheritsKind; Symbol inheritsSym; + .) = Sym (. sym = tab.FindSym(name); if (sym != null) SemErr("name declared twice"); else { - sym = tab.NewSym(typ, name, t.line); + sym = tab.NewSym(typ, name, t.line, t.col); sym.tokenKind = Symbol.fixedToken; } tokenString = null; .) + [ ':' Sym + (. inheritsSym = tab.FindSym(inheritsName); + if (inheritsSym == null) SemErr(string.Format("token '{0}' can't inherit from '{1}', name not declared", sym.name, inheritsName)); + else if (inheritsSym == sym) SemErr(string.Format("token '{0}' must not inherit from self", sym.name)); + else if (inheritsSym.typ != typ) SemErr(string.Format("token '{0}' can't inherit from '{1}'", sym.name, inheritsSym.name)); + else sym.inherits = inheritsSym; + .) + ] SYNC ( '=' TokenExpr '.' (. if (kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); @@ -257,7 +282,7 @@ TokenDecl (. string name; int kind; Symbol sym; Graph g; . else dfa.MatchLiteral(sym.name, sym); .) ) - [ SemText (. if (typ != Node.pr) SemErr("semantic action not allowed here"); .) + [ SemText (. if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); .) //(. if (typ != Node.pr) SemErr("semantic action not allowed here"); .) ] . @@ -282,7 +307,7 @@ AttrDecl /*------------------------------------------------------------------------------------*/ Expression (. Graph g2; .) -= += Term (. bool first = true; .) { WEAK '|' @@ -296,7 +321,7 @@ Expression (. Graph g2; .) Term (. Graph g2; Node rslv = null; g = null; .) = -( [ (. rslv = tab.NewNode(Node.rslv, null, la.line); .) +( [ (. rslv = tab.NewNode(Node.rslv, null, la.line, la.col); .) Resolver (. g = new Graph(rslv); .) ] Factor (. if (rslv != null) tab.MakeSequence(g, g2); @@ -304,15 +329,15 @@ Term (. Graph g2; Node rslv = null; g = null; .) .) { Factor (. tab.MakeSequence(g, g2); .) } -| (. g = new Graph(tab.NewNode(Node.eps, null, 0)); .) +| (. g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) ) (. if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . /*------------------------------------------------------------------------------------*/ -Factor (. string name; int kind; Position pos; bool weak = false; +Factor (. string name; int kind; Position pos; bool weak = false; g = null; .) = @@ -324,9 +349,9 @@ Factor (. string name; int kind; Position pos; bool wea bool undef = sym == null; if (undef) { if (kind == id) - sym = tab.NewSym(Node.nt, name, 0); // forward nt - else if (genScanner) { - sym = tab.NewSym(Node.t, name, t.line); + sym = tab.NewSym(Node.nt, name, 0, 0); // forward nt + else if (genScanner) { + sym = tab.NewSym(Node.t, name, t.line, t.col); dfa.MatchLiteral(sym.name, sym); } else { // undefined string in production SemErr("undefined string in production"); @@ -339,7 +364,7 @@ Factor (. string name; int kind; Position pos; bool wea if (weak) if (typ == Node.t) typ = Node.wt; else SemErr("only terminals may be weak"); - Node p = tab.NewNode(typ, sym, t.line); + Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); .) [ Attribs

(. if (kind != id) SemErr("a literal must not have attributes"); .) @@ -351,24 +376,24 @@ Factor (. string name; int kind; Position pos; bool wea | '(' Expression ')' | '[' Expression ']' (. tab.MakeOption(g); .) | '{' Expression '}' (. tab.MakeIteration(g); .) -| SemText (. Node p = tab.NewNode(Node.sem, null, 0); +| SemText (. Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); .) -| "ANY" (. Node p = tab.NewNode(Node.any, null, 0); // p.set is set in tab.SetupAnys +| "ANY" (. Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); .) -| "SYNC" (. Node p = tab.NewNode(Node.sync, null, 0); +| "SYNC" (. Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); .) ) (. if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . /*------------------------------------------------------------------------------------*/ -Resolver +Resolver = "IF" "(" (. int beg = la.pos; int col = la.col; int line = la.line; .) Condition (. pos = new Position(beg, t.pos, col, line); .) @@ -413,10 +438,10 @@ TokenFactor (. string name; int kind; .) ( Sym (. if (kind == id) { CharClass c = tab.FindCharClass(name); if (c == null) { - SemErr("undefined name"); + SemErr("undefined name: " + name); c = tab.NewCharClass(name, new CharSet()); } - Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; + Node p = tab.NewNode(Node.clas, null, t.line, t.col); p.val = c.n; g = new Graph(p); tokenString = noString; } else { // str @@ -429,7 +454,7 @@ TokenFactor (. string name; int kind; .) | '[' TokenExpr ']' (. tab.MakeOption(g); tokenString = noString; .) | '{' TokenExpr '}' (. tab.MakeIteration(g); tokenString = noString; .) ) (. if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, 0)); .) + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); .) . /*------------------------------------------------------------------------------------*/ @@ -451,12 +476,12 @@ Sym Attribs = '<' (. int beg = la.pos; int col = la.col; int line = la.line; .) - { ANY + { ANY | badString (. SemErr("bad string in attributes"); .) } '>' (. if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); .) | "<." (. int beg = la.pos; int col = la.col; int line = la.line; .) - { ANY + { ANY | badString (. SemErr("bad string in attributes"); .) } ".>" (. if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); .) diff --git a/src/Coco.cs b/src/Coco.cs index 385c3c6..8f79651 100644 --- a/src/Coco.cs +++ b/src/Coco.cs @@ -4,24 +4,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ /*------------------------------------------------------------------------- @@ -34,7 +34,7 @@ Trace output options 6 | S: prints the symbol table (terminals, nonterminals, pragmas) 7 | X: prints a cross reference list of all syntax symbols 8 | P: prints statistics about the Coco run - + Trace output can be switched on by the pragma $ { digit | letter } in the attributed grammar or as a command-line option @@ -46,12 +46,12 @@ in the attributed grammar or as a command-line option namespace at.jku.ssw.Coco { public class Coco { - + public static int Main (string[] arg) { Console.WriteLine("Coco/R (Apr 19, 2011)"); string srcName = null, nsName = null, frameDir = null, ddtString = null, traceFileName = null, outDir = null; - bool emitLines = false; + bool emitLines = false, ignoreErrors = false, genAST = false, genRREBNF = false; int retVal = 1; for (int i = 0; i < arg.Length; i++) { if (arg[i] == "-namespace" && i < arg.Length - 1) nsName = arg[++i].Trim(); @@ -59,18 +59,23 @@ public static int Main (string[] arg) { else if (arg[i] == "-trace" && i < arg.Length - 1) ddtString = arg[++i].Trim(); else if (arg[i] == "-o" && i < arg.Length - 1) outDir = arg[++i].Trim(); else if (arg[i] == "-lines") emitLines = true; + else if (arg[i] == "-genRREBNF") genRREBNF = true; + else if (arg[i] == "-genAST") genAST = true; + else if (arg[i] == "-ignoreErrors") ignoreErrors = true; else srcName = arg[i]; } if (arg.Length > 0 && srcName != null) { try { string srcDir = Path.GetDirectoryName(srcName); - + Scanner scanner = new Scanner(srcName); Parser parser = new Parser(scanner); traceFileName = Path.Combine(srcDir, "trace.txt"); parser.trace = new StreamWriter(new FileStream(traceFileName, FileMode.Create)); parser.tab = new Tab(parser); + if(genAST) parser.tab.genAST = true; + if(ignoreErrors) parser.tab.ignoreErrors = true; parser.dfa = new DFA(parser); parser.pgen = new ParserGen(parser); @@ -80,6 +85,9 @@ public static int Main (string[] arg) { parser.tab.frameDir = frameDir; parser.tab.outDir = (outDir != null) ? outDir : srcDir; parser.tab.emitLines = emitLines; + parser.tab.genRREBNF = genRREBNF; + parser.tab.genAST = genAST; + parser.tab.ignoreErrors = ignoreErrors; if (ddtString != null) parser.tab.SetDDT(ddtString); parser.Parse(); @@ -88,7 +96,7 @@ public static int Main (string[] arg) { FileInfo f = new FileInfo(traceFileName); if (f.Length == 0) f.Delete(); else Console.WriteLine("trace output is in " + traceFileName); - Console.WriteLine("{0} errors detected", parser.errors.count); + Console.WriteLine("{0} error(s) detected", parser.errors.count); if (parser.errors.count == 0) { retVal = 0; } } catch (IOException) { Console.WriteLine("-- could not open " + traceFileName); @@ -103,6 +111,9 @@ public static int Main (string[] arg) { + " -trace {0}" + " -o {0}" + " -lines{0}" + + " -genRREBNF{0}" + + " -genAST{0}" + + " -ignoreErrors ignore grammar errors for developing purposes{0}" + "Valid characters in the trace string:{0}" + " A trace automaton{0}" + " F list first/follow sets{0}" @@ -118,7 +129,7 @@ public static int Main (string[] arg) { } return retVal; } - + } // end Coco } // end namespace diff --git a/src/Copyright.frame b/src/Copyright.frame new file mode 100644 index 0000000..a792aa6 --- /dev/null +++ b/src/Copyright.frame @@ -0,0 +1,26 @@ +/*---------------------------------------------------------------------- +Compiler Generator Coco/R, +Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz +extended by M. Loeberbauer & A. Woess, Univ. of Linz +with improvements by Pat Terry, Rhodes University + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +As an exception, it is allowed to write an extension of Coco/R that is +used as a plugin in non-free software. + +If not otherwise stated, any source code generated by Coco/R (other than +Coco/R itself) does not fall under the GNU General Public License. +-----------------------------------------------------------------------*/ diff --git a/src/DFA.cs b/src/DFA.cs index fa0fada..d7a5cd2 100644 --- a/src/DFA.cs +++ b/src/DFA.cs @@ -177,10 +177,30 @@ public class Range { public int from, to; public Range next; public Range(int from, int to) { this.from = from; this.to = to; } + + public override string ToString() { + if (from == to) + return from.ToString("X"); + if (from <= 256 && to <= 256) + return string.Format("{0:X2}-{1:X2}", from, to); + return string.Format("{0:X4}-{1:X4}", from, to); + } } public Range head; + public override string ToString() { + if (head == null) return "[]"; + StringBuilder sb = new StringBuilder(); + sb.Append('['); + for (Range cur = head; cur != null; cur = cur.next) { + if (cur != head) sb.Append('|'); + sb.Append(cur.ToString()); + } + sb.Append(']'); + return sb.ToString(); + } + public bool this[int i] { get { for (Range p = head; p != null; p = p.next) @@ -284,7 +304,7 @@ public void Fill() { //----------------------------------------------------------------------------- // Generator //----------------------------------------------------------------------------- -class Generator { +public class Generator { private const int EOF = -1; private FileStream fram; @@ -478,7 +498,7 @@ void DeleteRedundantStates() { for (State s1 = firstState.next; s1 != null; s1 = s1.next) // firstState cannot be final if (used[s1.nr] && s1.endOf != null && s1.firstAction == null && !s1.ctx) for (State s2 = s1.next; s2 != null; s2 = s2.next) - if (used[s2.nr] && s1.endOf == s2.endOf && s2.firstAction == null & !s2.ctx) { + if (used[s2.nr] && s1.endOf == s2.endOf && s2.firstAction == null && !s2.ctx) { used[s2.nr] = false; newState[s2.nr] = s1; } for (State state = firstState; state != null; state = state.next) @@ -831,8 +851,8 @@ string CommentStr(Node p) { } else parser.SemErr("comment delimiters may not be structured"); p = p.next; } - if (s.Length == 0 || s.Length > 2) { - parser.SemErr("comment delimiters must be 1 or 2 characters long"); + if (s.Length == 0 || s.Length > 8) { + parser.SemErr("comment delimiters must be between 1 to 8 characters long"); s = new StringBuilder("?"); } return s.ToString(); @@ -846,7 +866,13 @@ public void NewComment(Node from, Node to, bool nested) { //------------------------ scanner generation ---------------------- + void GenCommentIndented(int n, string s) { + for(int i= 1; i < n; ++i) gen.Write("\t"); + gen.Write(s); + } + void GenComBody(Comment com) { + int imax = com.start.Length-1; gen.WriteLine( "\t\t\tfor(;;) {"); gen.Write ( "\t\t\t\tif ({0}) ", ChCond(com.stop[0])); gen.WriteLine("{"); if (com.stop.Length == 1) { @@ -854,22 +880,31 @@ void GenComBody(Comment com) { gen.WriteLine("\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); gen.WriteLine("\t\t\t\t\tNextCh();"); } else { - gen.WriteLine("\t\t\t\t\tNextCh();"); - gen.WriteLine("\t\t\t\t\tif ({0}) {{", ChCond(com.stop[1])); + for(int sidx = 1; sidx <= imax; ++sidx) { + gen.WriteLine("\t\t\t\t\tNextCh();"); + gen.WriteLine("\t\t\t\t\tif ({0}) {{", ChCond(com.stop[sidx])); + } gen.WriteLine("\t\t\t\t\t\tlevel--;"); - gen.WriteLine("\t\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); + gen.WriteLine("\t\t\t\t\t\tif (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; }"); gen.WriteLine("\t\t\t\t\t\tNextCh();"); - gen.WriteLine("\t\t\t\t\t}"); + for(int sidx = imax; sidx > 0; --sidx) { + gen.WriteLine("\t\t\t\t\t}"); + } } if (com.nested) { gen.Write ("\t\t\t\t}"); gen.Write(" else if ({0}) ", ChCond(com.start[0])); gen.WriteLine("{"); if (com.start.Length == 1) gen.WriteLine("\t\t\t\t\tlevel++; NextCh();"); else { - gen.WriteLine("\t\t\t\t\tNextCh();"); - gen.Write ("\t\t\t\t\tif ({0}) ", ChCond(com.start[1])); gen.WriteLine("{"); + int imaxN = com.start.Length-1; + for(int sidx = 1; sidx <= imaxN; ++sidx) { + gen.WriteLine("\t\t\t\t\tNextCh();"); + gen.Write ("\t\t\t\t\tif ({0}) ", ChCond(com.start[sidx])); gen.WriteLine("{"); + } gen.WriteLine("\t\t\t\t\t\tlevel++; NextCh();"); - gen.WriteLine("\t\t\t\t\t}"); + for(int sidx = imaxN; sidx > 0; --sidx) { + gen.WriteLine("\t\t\t\t\t}"); + } } } gen.WriteLine( "\t\t\t\t} else if (ch == Buffer.EOF) return false;"); @@ -881,17 +916,20 @@ void GenComment(Comment com, int i) { gen.WriteLine(); gen.Write ("\tbool Comment{0}() ", i); gen.WriteLine("{"); gen.WriteLine("\t\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;"); + gen.WriteLine("\t\tNextCh();"); if (com.start.Length == 1) { - gen.WriteLine("\t\tNextCh();"); GenComBody(com); } else { - gen.WriteLine("\t\tNextCh();"); - gen.Write ("\t\tif ({0}) ", ChCond(com.start[1])); gen.WriteLine("{"); - gen.WriteLine("\t\t\tNextCh();"); + int imax = com.start.Length-1; + for(int sidx = 1; sidx <= imax; ++sidx) { + gen.Write ("\t\tif ({0}) ", ChCond(com.start[sidx])); gen.WriteLine("{"); + gen.WriteLine("\t\t\tNextCh();"); + } GenComBody(com); - gen.WriteLine("\t\t} else {"); - gen.WriteLine("\t\t\tbuffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0;"); - gen.WriteLine("\t\t}"); + for(int sidx = imax; sidx > 0; --sidx) { + gen.WriteLine("\t\t}"); + } + gen.WriteLine("\t\tbuffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0;"); gen.WriteLine("\t\treturn false;"); } gen.WriteLine("\t}"); @@ -924,12 +962,12 @@ void GenLiterals () { gen.WriteLine("\t\t\tdefault: break;"); gen.Write("\t\t}"); } - + void WriteState(State state) { Symbol endOf = state.endOf; gen.WriteLine("\t\t\tcase {0}:", state.nr); if (endOf != null && state.firstAction != null) { - gen.WriteLine("\t\t\t\trecEnd = pos; recKind = {0};", endOf.n); + gen.WriteLine("\t\t\t\trecEnd = pos; recKind = {0} /* {1} */;", endOf.n, endOf.name); } bool ctxEnd = state.ctx; for (Action action = state.firstAction; action != null; action = action.next) { @@ -958,10 +996,15 @@ void WriteState(State state) { if (endOf == null) { gen.WriteLine("goto case 0;}"); } else { - gen.Write("t.kind = {0}; ", endOf.n); + gen.Write("t.kind = {0} /* {1} */; ", endOf.n, endOf.name); if (endOf.tokenKind == Symbol.classLitToken) { gen.WriteLine("t.val = new String(tval, 0, tlen); CheckLiteral(); return t;}"); } else { + if(endOf.semPos != null && endOf.typ == Node.t) { + gen.Write(" {"); + parser.pgen.CopySourcePart(parser, gen, endOf.semPos, 0); + gen.Write("};"); + } gen.WriteLine("break;}"); } } @@ -1023,11 +1066,11 @@ public void WriteScanner() { } g.CopyFramePart("-->literals"); GenLiterals(); g.CopyFramePart("-->scan1"); - gen.Write("\t\t\t"); + gen.Write("\t\t\t\t"); if (tab.ignored.Elements() > 0) { PutRange(tab.ignored); } else { gen.Write("false"); } g.CopyFramePart("-->scan2"); if (firstComment != null) { - gen.Write("\t\tif ("); + gen.Write("\t\t\tif ("); com = firstComment; comIdx = 0; while (com != null) { gen.Write(ChCond(com.start[0])); @@ -1035,8 +1078,9 @@ public void WriteScanner() { if (com.next != null) gen.Write(" ||"); com = com.next; comIdx++; } - gen.Write(") return NextToken();"); + gen.Write(") continue;"); } + g.CopyFramePart("-->scan22"); if (hasCtxMoves) { gen.WriteLine(); gen.Write("\t\tint apx = 0;"); } /* pdt */ g.CopyFramePart("-->scan3"); for (State state = firstState.next; state != null; state = state.next) diff --git a/src/Parser.cs b/src/Parser.cs index bf5713f..479c706 100644 --- a/src/Parser.cs +++ b/src/Parser.cs @@ -4,24 +4,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ using System.IO; @@ -29,26 +29,142 @@ used as a plugin in non-free software. using System; +using System.Collections; namespace at.jku.ssw.Coco { +#if PARSER_WITH_AST +public class SynTree { + public SynTree(Token t ) { + tok = t; + children = new ArrayList(); + } + + public Token tok; + public ArrayList children; + + static void printIndent(int n) { + for(int i=0; i < n; ++i) Console.Write(" "); + } + + public void dump_all(int indent=0, bool isLast=false) { + int last_idx = children.Count; + if(tok.col > 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); + } + else { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump_all(indent+4, idx == last_idx); + } + } + + public void dump_pruned(int indent=0, bool isLast=false) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok.col > 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); + } + else { + if(last_idx == 1) { + if(((SynTree)children[0]).cildren.Count == 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump_pruned(indent+indentPlus, idx == last_idx); + } + } +}; +#endif + public class Parser { + //non terminals + public const int _NT_Coco = 0; + public const int _NT_SetDecl = 1; + public const int _NT_TokenDecl = 2; + public const int _NT_TokenExpr = 3; + public const int _NT_Set = 4; + public const int _NT_AttrDecl = 5; + public const int _NT_SemText = 6; + public const int _NT_Expression = 7; + public const int _NT_SimSet = 8; + public const int _NT_Char = 9; + public const int _NT_Sym = 10; + public const int _NT_Term = 11; + public const int _NT_Resolver = 12; + public const int _NT_Factor = 13; + public const int _NT_Attribs = 14; + public const int _NT_Condition = 15; + public const int _NT_TokenTerm = 16; + public const int _NT_TokenFactor = 17; + public const int maxNT = 17; + //terminals public const int _EOF = 0; public const int _ident = 1; public const int _number = 2; public const int _string = 3; public const int _badString = 4; public const int _char = 5; - public const int maxT = 41; - public const int _ddtSym = 42; - public const int _optionSym = 43; +// public const int _("COMPILER") = 6; +// public const int _("IGNORECASE") = 7; +// public const int _("TERMINALS") = 8; +// public const int _("CHARACTERS") = 9; +// public const int _("TOKENS") = 10; +// public const int _("PRAGMAS") = 11; +// public const int _("COMMENTS") = 12; +// public const int _("FROM") = 13; +// public const int _("TO") = 14; +// public const int _("NESTED") = 15; +// public const int _("IGNORE") = 16; +// public const int _("PRODUCTIONS") = 17; +// public const int _("=") = 18; +// public const int _(".") = 19; +// public const int _("END") = 20; +// public const int _("+") = 21; +// public const int _("-") = 22; +// public const int _("..") = 23; +// public const int _("ANY") = 24; +// public const int _(":") = 25; +// public const int _("<") = 26; +// public const int _(">") = 27; +// public const int _("<.") = 28; +// public const int _(".>") = 29; +// public const int _("|") = 30; +// public const int _("WEAK") = 31; +// public const int _("(") = 32; +// public const int _(")") = 33; +// public const int _("[") = 34; +// public const int _("]") = 35; +// public const int _("{") = 36; +// public const int _("}") = 37; +// public const int _("SYNC") = 38; +// public const int _("IF") = 39; +// public const int _("CONTEXT") = 40; +// public const int _("(.") = 41; +// public const int _(".)") = 42; +// public const int _(???) = 43; + public const int maxT = 43; + public const int _ddtSym = 44; + public const int _optionSym = 45; const bool _T = true; const bool _x = false; const int minErrDist = 2; - + public Scanner scanner; public Errors errors; @@ -58,7 +174,7 @@ public class Parser { const int id = 0; const int str = 1; - + public TextWriter trace; // other Coco objects referenced in this ATG public Tab tab; public DFA dfa; @@ -67,6 +183,7 @@ public class Parser { bool genScanner; string tokenString; // used in declarations of literal tokens string noString = "-none-"; // used in declarations of literal tokens + string gramName; // grammar name /*-------------------------------------------------------------------------*/ @@ -86,33 +203,42 @@ public void SemErr (string msg) { if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); errDist = 0; } - + void Get () { for (;;) { t = la; la = scanner.Scan(); if (la.kind <= maxT) { ++errDist; break; } - if (la.kind == 42) { - tab.SetDDT(la.val); - } - if (la.kind == 43) { - tab.SetOption(la.val); - } + if (la.kind == _ddtSym) { + tab.SetDDT(la.val); + } + if (la.kind == _optionSym) { + tab.SetOption(la.val); + } la = t; } } - + + bool isKind(Token t, int n) { + int k = t.kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; + } + void Expect (int n) { - if (la.kind==n) Get(); else { SynErr(n); } + if (isKind(la, n)) Get(); else { SynErr(n); } } - + bool StartOf (int s) { return set[s, la.kind]; } - + void ExpectWeak (int n, int follow) { - if (la.kind == n) Get(); + if (isKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -122,7 +248,7 @@ void ExpectWeak (int n, int follow) { bool WeakSeparator(int n, int syFol, int repFol) { int kind = la.kind; - if (kind == n) {Get(); return true;} + if (isKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -134,78 +260,90 @@ bool WeakSeparator(int n, int syFol, int repFol) { } } - - void Coco() { - Symbol sym; Graph g, g1, g2; string gramName; CharSet s; int beg, line; - if (StartOf(1)) { + + void Coco_NT() { + Symbol sym; Graph g, g1, g2; CharSet s; int beg, line; + if (StartOf(1 /* any */)) { Get(); - beg = t.pos; line = t.line; - while (StartOf(1)) { + beg = t.pos; line = t.line; + while (StartOf(1 /* any */)) { Get(); } - pgen.usingPos = new Position(beg, la.pos, 0, line); + pgen.usingPos = new Position(beg, la.pos, 0, line); } - Expect(6); - genScanner = true; - tab.ignored = new CharSet(); - Expect(1); + Expect(6 /* "COMPILER" */); + genScanner = true; + tab.ignored = new CharSet(); + Expect(_ident); gramName = t.val; beg = la.pos; line = la.line; - - while (StartOf(2)) { + + while (StartOf(2 /* any */)) { + Get(); + } + tab.semDeclPos = new Position(beg, la.pos, 0, line); + if (isKind(la, 7 /* "IGNORECASE" */)) { Get(); + dfa.ignoreCase = true; } - tab.semDeclPos = new Position(beg, la.pos, 0, line); - if (la.kind == 7) { + if (isKind(la, 8 /* "TERMINALS" */)) { Get(); - dfa.ignoreCase = true; + while (isKind(la, _ident)) { + Get(); + sym = tab.FindSym(t.val); + if (sym != null) SemErr("name declared twice"); + else { + sym = tab.NewSym(Node.t, t.val, t.line, t.col); + sym.tokenKind = Symbol.fixedToken; + } + } } - if (la.kind == 8) { + if (isKind(la, 9 /* "CHARACTERS" */)) { Get(); - while (la.kind == 1) { - SetDecl(); + while (isKind(la, _ident)) { + SetDecl_NT(); } } - if (la.kind == 9) { + if (isKind(la, 10 /* "TOKENS" */)) { Get(); - while (la.kind == 1 || la.kind == 3 || la.kind == 5) { - TokenDecl(Node.t); + while (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + TokenDecl_NT(Node.t); } } - if (la.kind == 10) { + if (isKind(la, 11 /* "PRAGMAS" */)) { Get(); - while (la.kind == 1 || la.kind == 3 || la.kind == 5) { - TokenDecl(Node.pr); + while (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + TokenDecl_NT(Node.pr); } } - while (la.kind == 11) { + while (isKind(la, 12 /* "COMMENTS" */)) { Get(); - bool nested = false; - Expect(12); - TokenExpr(out g1); - Expect(13); - TokenExpr(out g2); - if (la.kind == 14) { + bool nested = false; + Expect(13 /* "FROM" */); + TokenExpr_NT(out g1); + Expect(14 /* "TO" */); + TokenExpr_NT(out g2); + if (isKind(la, 15 /* "NESTED" */)) { Get(); - nested = true; + nested = true; } - dfa.NewComment(g1.l, g2.l, nested); + dfa.NewComment(g1.l, g2.l, nested); } - while (la.kind == 15) { + while (isKind(la, 16 /* "IGNORE" */)) { Get(); - Set(out s); - tab.ignored.Or(s); + Set_NT(out s); + tab.ignored.Or(s); } - while (!(la.kind == 0 || la.kind == 16)) {SynErr(42); Get();} - Expect(16); + while (!(isKind(la, _EOF) || isKind(la, 17 /* "PRODUCTIONS" */))) {SynErr(44); Get();} + Expect(17 /* "PRODUCTIONS" */); if (genScanner) dfa.MakeDeterministic(); tab.DeleteNodes(); - - while (la.kind == 1) { + + while (isKind(la, _ident)) { Get(); sym = tab.FindSym(t.val); bool undef = sym == null; - if (undef) sym = tab.NewSym(Node.nt, t.val, t.line); + if (undef) sym = tab.NewSym(Node.nt, t.val, t.line, t.col); else { if (sym.typ == Node.nt) { if (sym.graph != null) SemErr("name declared twice"); @@ -214,26 +352,26 @@ void Coco() { } bool noAttrs = sym.attrPos == null; sym.attrPos = null; - - if (la.kind == 24 || la.kind == 26) { - AttrDecl(sym); + + if (isKind(la, 26 /* "<" */) || isKind(la, 28 /* "<." */)) { + AttrDecl_NT(sym); } if (!undef) if (noAttrs != (sym.attrPos == null)) SemErr("attribute mismatch between declaration and use of this symbol"); - - if (la.kind == 39) { - SemText(out sym.semPos); + + if (isKind(la, 41 /* "(." */)) { + SemText_NT(out sym.semPos); } - ExpectWeak(17, 3); - Expression(out g); + ExpectWeak(18 /* "=" */, 3); + Expression_NT(out g); sym.graph = g.l; tab.Finish(g); - - ExpectWeak(18, 4); + + ExpectWeak(19 /* "." */, 4); } - Expect(19); - Expect(1); + Expect(20 /* "END" */); + Expect(_ident); if (gramName != t.val) SemErr("name does not match grammar name"); tab.gramSy = tab.FindSym(gramName); @@ -244,7 +382,7 @@ void Coco() { if (sym.attrPos != null) SemErr("grammar symbol must not have attributes"); } - tab.noSym = tab.NewSym(Node.t, "???", 0); // noSym gets highest number + tab.noSym = tab.NewSym(Node.t, "???", 0, 0); // noSym gets highest number tab.SetupAnys(); tab.RenumberPragmas(); if (tab.ddt[2]) tab.PrintNodes(); @@ -252,7 +390,16 @@ void Coco() { Console.WriteLine("checking"); tab.CompSymbolSets(); if (tab.ddt[7]) tab.XRef(); - if (tab.GrammarOk()) { + bool doGenCode = false; + if(tab.ignoreErrors) { + doGenCode = true; + tab.GrammarCheckAll(); + } + else doGenCode = tab.GrammarOk(); + if(tab.genRREBNF && doGenCode) { + pgen.WriteRREBNF(); + } + if (doGenCode) { Console.Write("parser"); pgen.WriteParser(); if (genScanner) { @@ -265,41 +412,53 @@ void Coco() { } } if (tab.ddt[6]) tab.PrintSymbolTable(); - - Expect(18); + + Expect(19 /* "." */); } - void SetDecl() { - CharSet s; - Expect(1); + void SetDecl_NT() { + CharSet s; + Expect(_ident); string name = t.val; CharClass c = tab.FindCharClass(name); if (c != null) SemErr("name declared twice"); - - Expect(17); - Set(out s); + + Expect(18 /* "=" */); + Set_NT(out s); if (s.Elements() == 0) SemErr("character set must not be empty"); tab.NewCharClass(name, s); - - Expect(18); + + Expect(19 /* "." */); } - void TokenDecl(int typ) { - string name; int kind; Symbol sym; Graph g; - Sym(out name, out kind); + void TokenDecl_NT(int typ) { + string name; int kind; Symbol sym; Graph g; + string inheritsName; int inheritsKind; Symbol inheritsSym; + + Sym_NT(out name, out kind); sym = tab.FindSym(name); if (sym != null) SemErr("name declared twice"); else { - sym = tab.NewSym(typ, name, t.line); + sym = tab.NewSym(typ, name, t.line, t.col); sym.tokenKind = Symbol.fixedToken; } tokenString = null; - - while (!(StartOf(5))) {SynErr(43); Get();} - if (la.kind == 17) { + + if (isKind(la, 25 /* ":" */)) { Get(); - TokenExpr(out g); - Expect(18); + Sym_NT(out inheritsName, out inheritsKind); + inheritsSym = tab.FindSym(inheritsName); + if (inheritsSym == null) SemErr(string.Format("token '{0}' can't inherit from '{1}', name not declared", sym.name, inheritsName)); + else if (inheritsSym == sym) SemErr(string.Format("token '{0}' must not inherit from self", sym.name)); + else if (inheritsSym.typ != typ) SemErr(string.Format("token '{0}' can't inherit from '{1}'", sym.name, inheritsSym.name)); + else sym.inherits = inheritsSym; + + } + while (!(StartOf(5 /* sync */))) {SynErr(45); Get();} + if (isKind(la, 18 /* "=" */)) { + Get(); + TokenExpr_NT(out g); + Expect(19 /* "." */); if (kind == str) SemErr("a literal must not be declared with a structure"); tab.Finish(g); if (tokenString == null || tokenString.Equals(noString)) @@ -310,219 +469,217 @@ void TokenDecl(int typ) { tab.literals[tokenString] = sym; dfa.MatchLiteral(tokenString, sym); } - - } else if (StartOf(6)) { + + } else if (StartOf(6 /* sem */)) { if (kind == id) genScanner = false; else dfa.MatchLiteral(sym.name, sym); - - } else SynErr(44); - if (la.kind == 39) { - SemText(out sym.semPos); - if (typ != Node.pr) SemErr("semantic action not allowed here"); + + } else SynErr(46); + if (isKind(la, 41 /* "(." */)) { + SemText_NT(out sym.semPos); + if (typ == Node.t) errors.Warning("Warning semantic action on token declarations require a custom Scanner"); } } - void TokenExpr(out Graph g) { - Graph g2; - TokenTerm(out g); - bool first = true; - while (WeakSeparator(28,7,8) ) { - TokenTerm(out g2); + void TokenExpr_NT(out Graph g) { + Graph g2; + TokenTerm_NT(out g); + bool first = true; + while (WeakSeparator(30 /* "|" */,7,8) ) { + TokenTerm_NT(out g2); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); - + } } - void Set(out CharSet s) { - CharSet s2; - SimSet(out s); - while (la.kind == 20 || la.kind == 21) { - if (la.kind == 20) { + void Set_NT(out CharSet s) { + CharSet s2; + SimSet_NT(out s); + while (isKind(la, 21 /* "+" */) || isKind(la, 22 /* "-" */)) { + if (isKind(la, 21 /* "+" */)) { Get(); - SimSet(out s2); - s.Or(s2); + SimSet_NT(out s2); + s.Or(s2); } else { Get(); - SimSet(out s2); - s.Subtract(s2); + SimSet_NT(out s2); + s.Subtract(s2); } } } - void AttrDecl(Symbol sym) { - if (la.kind == 24) { + void AttrDecl_NT(Symbol sym) { + if (isKind(la, 26 /* "<" */)) { Get(); - int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(9)) { - if (StartOf(10)) { + int beg = la.pos; int col = la.col; int line = la.line; + while (StartOf(9 /* alt */)) { + if (StartOf(10 /* any */)) { Get(); } else { Get(); - SemErr("bad string in attributes"); + SemErr("bad string in attributes"); } } - Expect(25); + Expect(27 /* ">" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col, line); - } else if (la.kind == 26) { + sym.attrPos = new Position(beg, t.pos, col, line); + } else if (isKind(la, 28 /* "<." */)) { Get(); - int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(11)) { - if (StartOf(12)) { + int beg = la.pos; int col = la.col; int line = la.line; + while (StartOf(11 /* alt */)) { + if (StartOf(12 /* any */)) { Get(); } else { Get(); - SemErr("bad string in attributes"); + SemErr("bad string in attributes"); } } - Expect(27); + Expect(29 /* ".>" */); if (t.pos > beg) - sym.attrPos = new Position(beg, t.pos, col, line); - } else SynErr(45); + sym.attrPos = new Position(beg, t.pos, col, line); + } else SynErr(47); } - void SemText(out Position pos) { - Expect(39); - int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(13)) { - if (StartOf(14)) { + void SemText_NT(out Position pos) { + Expect(41 /* "(." */); + int beg = la.pos; int col = la.col; int line = la.line; + while (StartOf(13 /* alt */)) { + if (StartOf(14 /* any */)) { Get(); - } else if (la.kind == 4) { + } else if (isKind(la, _badString)) { Get(); - SemErr("bad string in semantic action"); + SemErr("bad string in semantic action"); } else { Get(); - SemErr("missing end of previous semantic action"); + SemErr("missing end of previous semantic action"); } } - Expect(40); - pos = new Position(beg, t.pos, col, line); + Expect(42 /* ".)" */); + pos = new Position(beg, t.pos, col, line); } - void Expression(out Graph g) { - Graph g2; - Term(out g); - bool first = true; - while (WeakSeparator(28,15,16) ) { - Term(out g2); + void Expression_NT(out Graph g) { + Graph g2; + Term_NT(out g); + bool first = true; + while (WeakSeparator(30 /* "|" */,15,16) ) { + Term_NT(out g2); if (first) { tab.MakeFirstAlt(g); first = false; } tab.MakeAlternative(g, g2); - + } } - void SimSet(out CharSet s) { - int n1, n2; - s = new CharSet(); - if (la.kind == 1) { + void SimSet_NT(out CharSet s) { + int n1, n2; + s = new CharSet(); + if (isKind(la, _ident)) { Get(); CharClass c = tab.FindCharClass(t.val); if (c == null) SemErr("undefined name"); else s.Or(c.set); - - } else if (la.kind == 3) { + + } else if (isKind(la, _string)) { Get(); - string name = t.val; - name = tab.Unescape(name.Substring(1, name.Length-2)); + string name = tab.Unstring(t.val); foreach (char ch in name) if (dfa.ignoreCase) s.Set(char.ToLower(ch)); - else s.Set(ch); - } else if (la.kind == 5) { - Char(out n1); - s.Set(n1); - if (la.kind == 22) { + else s.Set(ch); + } else if (isKind(la, _char)) { + Char_NT(out n1); + s.Set(n1); + if (isKind(la, 23 /* ".." */)) { Get(); - Char(out n2); - for (int i = n1; i <= n2; i++) s.Set(i); + Char_NT(out n2); + for (int i = n1; i <= n2; i++) s.Set(i); } - } else if (la.kind == 23) { + } else if (isKind(la, 24 /* "ANY" */)) { Get(); - s = new CharSet(); s.Fill(); - } else SynErr(46); + s = new CharSet(); s.Fill(); + } else SynErr(48); } - void Char(out int n) { - Expect(5); - string name = t.val; n = 0; - name = tab.Unescape(name.Substring(1, name.Length-2)); + void Char_NT(out int n) { + Expect(_char); + string name = tab.Unstring(t.val); n = 0; if (name.Length == 1) n = name[0]; else SemErr("unacceptable character value"); if (dfa.ignoreCase && (char)n >= 'A' && (char)n <= 'Z') n += 32; - + } - void Sym(out string name, out int kind) { - name = "???"; kind = id; - if (la.kind == 1) { + void Sym_NT(out string name, out int kind) { + name = "???"; kind = id; + if (isKind(la, _ident)) { Get(); - kind = id; name = t.val; - } else if (la.kind == 3 || la.kind == 5) { - if (la.kind == 3) { + kind = id; name = t.val; + } else if (isKind(la, _string) || isKind(la, _char)) { + if (isKind(la, _string)) { Get(); - name = t.val; + name = t.val; } else { Get(); - name = "\"" + t.val.Substring(1, t.val.Length-2) + "\""; + name = "\"" + t.val.Substring(1, t.val.Length-2) + "\""; } kind = str; if (dfa.ignoreCase) name = name.ToLower(); if (name.IndexOf(' ') >= 0) - SemErr("literal tokens must not contain blanks"); - } else SynErr(47); + SemErr("literal tokens must not contain blanks"); + } else SynErr(49); } - void Term(out Graph g) { - Graph g2; Node rslv = null; g = null; - if (StartOf(17)) { - if (la.kind == 37) { - rslv = tab.NewNode(Node.rslv, null, la.line); - Resolver(out rslv.pos); - g = new Graph(rslv); + void Term_NT(out Graph g) { + Graph g2; Node rslv = null; g = null; + if (StartOf(17 /* opt */)) { + if (isKind(la, 39 /* "IF" */)) { + rslv = tab.NewNode(Node.rslv, null, la.line, la.col); + Resolver_NT(out rslv.pos); + g = new Graph(rslv); } - Factor(out g2); + Factor_NT(out g2); if (rslv != null) tab.MakeSequence(g, g2); else g = g2; - - while (StartOf(18)) { - Factor(out g2); - tab.MakeSequence(g, g2); + + while (StartOf(18 /* nt Factor */)) { + Factor_NT(out g2); + tab.MakeSequence(g, g2); } - } else if (StartOf(19)) { - g = new Graph(tab.NewNode(Node.eps, null, 0)); - } else SynErr(48); + } else if (StartOf(19 /* sem */)) { + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); + } else SynErr(50); if (g == null) // invalid start of Term - g = new Graph(tab.NewNode(Node.eps, null, 0)); - + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); + } - void Resolver(out Position pos) { - Expect(37); - Expect(30); - int beg = la.pos; int col = la.col; int line = la.line; - Condition(); - pos = new Position(beg, t.pos, col, line); + void Resolver_NT(out Position pos) { + Expect(39 /* "IF" */); + Expect(32 /* "(" */); + int beg = la.pos; int col = la.col; int line = la.line; + Condition_NT(); + pos = new Position(beg, t.pos, col, line); } - void Factor(out Graph g) { - string name; int kind; Position pos; bool weak = false; + void Factor_NT(out Graph g) { + string name; int kind; Position pos; bool weak = false; g = null; - + switch (la.kind) { - case 1: case 3: case 5: case 29: { - if (la.kind == 29) { + case _ident: case _string: case _char: case 31 /* "WEAK" */: { + if (isKind(la, 31 /* "WEAK" */)) { Get(); - weak = true; + weak = true; } - Sym(out name, out kind); + Sym_NT(out name, out kind); Symbol sym = tab.FindSym(name); if (sym == null && kind == str) sym = tab.literals[name] as Symbol; bool undef = sym == null; if (undef) { if (kind == id) - sym = tab.NewSym(Node.nt, name, 0); // forward nt - else if (genScanner) { - sym = tab.NewSym(Node.t, name, t.line); + sym = tab.NewSym(Node.nt, name, 0, 0); // forward nt + else if (genScanner) { + sym = tab.NewSym(Node.t, name, t.line, t.col); dfa.MatchLiteral(sym.name, sym); } else { // undefined string in production SemErr("undefined string in production"); @@ -535,140 +692,140 @@ void Factor(out Graph g) { if (weak) if (typ == Node.t) typ = Node.wt; else SemErr("only terminals may be weak"); - Node p = tab.NewNode(typ, sym, t.line); + Node p = tab.NewNode(typ, sym, t.line, t.col); g = new Graph(p); - - if (la.kind == 24 || la.kind == 26) { - Attribs(p); - if (kind != id) SemErr("a literal must not have attributes"); + + if (isKind(la, 26 /* "<" */) || isKind(la, 28 /* "<." */)) { + Attribs_NT(p); + if (kind != id) SemErr("a literal must not have attributes"); } if (undef) sym.attrPos = p.pos; // dummy else if ((p.pos == null) != (sym.attrPos == null)) SemErr("attribute mismatch between declaration and use of this symbol"); - + break; } - case 30: { + case 32 /* "(" */: { Get(); - Expression(out g); - Expect(31); + Expression_NT(out g); + Expect(33 /* ")" */); break; } - case 32: { + case 34 /* "[" */: { Get(); - Expression(out g); - Expect(33); - tab.MakeOption(g); + Expression_NT(out g); + Expect(35 /* "]" */); + tab.MakeOption(g); break; } - case 34: { + case 36 /* "{" */: { Get(); - Expression(out g); - Expect(35); - tab.MakeIteration(g); + Expression_NT(out g); + Expect(37 /* "}" */); + tab.MakeIteration(g); break; } - case 39: { - SemText(out pos); - Node p = tab.NewNode(Node.sem, null, 0); + case 41 /* "(." */: { + SemText_NT(out pos); + Node p = tab.NewNode(Node.sem, null, t.line, t.col); p.pos = pos; g = new Graph(p); - + break; } - case 23: { + case 24 /* "ANY" */: { Get(); - Node p = tab.NewNode(Node.any, null, 0); // p.set is set in tab.SetupAnys + Node p = tab.NewNode(Node.any, null, t.line, t.col); // p.set is set in tab.SetupAnys g = new Graph(p); - + break; } - case 36: { + case 38 /* "SYNC" */: { Get(); - Node p = tab.NewNode(Node.sync, null, 0); + Node p = tab.NewNode(Node.sync, null, t.line, t.col); g = new Graph(p); - + break; } - default: SynErr(49); break; + default: SynErr(51); break; } if (g == null) // invalid start of Factor - g = new Graph(tab.NewNode(Node.eps, null, 0)); - + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); + } - void Attribs(Node p) { - if (la.kind == 24) { + void Attribs_NT(Node p) { + if (isKind(la, 26 /* "<" */)) { Get(); - int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(9)) { - if (StartOf(10)) { + int beg = la.pos; int col = la.col; int line = la.line; + while (StartOf(9 /* alt */)) { + if (StartOf(10 /* any */)) { Get(); } else { Get(); - SemErr("bad string in attributes"); + SemErr("bad string in attributes"); } } - Expect(25); - if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); - } else if (la.kind == 26) { + Expect(27 /* ">" */); + if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); + } else if (isKind(la, 28 /* "<." */)) { Get(); - int beg = la.pos; int col = la.col; int line = la.line; - while (StartOf(11)) { - if (StartOf(12)) { + int beg = la.pos; int col = la.col; int line = la.line; + while (StartOf(11 /* alt */)) { + if (StartOf(12 /* any */)) { Get(); } else { Get(); - SemErr("bad string in attributes"); + SemErr("bad string in attributes"); } } - Expect(27); - if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); - } else SynErr(50); + Expect(29 /* ".>" */); + if (t.pos > beg) p.pos = new Position(beg, t.pos, col, line); + } else SynErr(52); } - void Condition() { - while (StartOf(20)) { - if (la.kind == 30) { + void Condition_NT() { + while (StartOf(20 /* alt */)) { + if (isKind(la, 32 /* "(" */)) { Get(); - Condition(); + Condition_NT(); } else { Get(); } } - Expect(31); + Expect(33 /* ")" */); } - void TokenTerm(out Graph g) { - Graph g2; - TokenFactor(out g); - while (StartOf(7)) { - TokenFactor(out g2); - tab.MakeSequence(g, g2); + void TokenTerm_NT(out Graph g) { + Graph g2; + TokenFactor_NT(out g); + while (StartOf(7 /* nt TokenFactor */)) { + TokenFactor_NT(out g2); + tab.MakeSequence(g, g2); } - if (la.kind == 38) { + if (isKind(la, 40 /* "CONTEXT" */)) { Get(); - Expect(30); - TokenExpr(out g2); + Expect(32 /* "(" */); + TokenExpr_NT(out g2); tab.SetContextTrans(g2.l); dfa.hasCtxMoves = true; - tab.MakeSequence(g, g2); - Expect(31); + tab.MakeSequence(g, g2); + Expect(33 /* ")" */); } } - void TokenFactor(out Graph g) { - string name; int kind; - g = null; - if (la.kind == 1 || la.kind == 3 || la.kind == 5) { - Sym(out name, out kind); + void TokenFactor_NT(out Graph g) { + string name; int kind; + g = null; + if (isKind(la, _ident) || isKind(la, _string) || isKind(la, _char)) { + Sym_NT(out name, out kind); if (kind == id) { CharClass c = tab.FindCharClass(name); if (c == null) { - SemErr("undefined name"); + SemErr("undefined name: " + name); c = tab.NewCharClass(name, new CharSet()); } - Node p = tab.NewNode(Node.clas, null, 0); p.val = c.n; + Node p = tab.NewNode(Node.clas, null, t.line, t.col); p.val = c.n; g = new Graph(p); tokenString = noString; } else { // str @@ -676,61 +833,95 @@ void TokenFactor(out Graph g) { if (tokenString == null) tokenString = name; else tokenString = noString; } - - } else if (la.kind == 30) { + + } else if (isKind(la, 32 /* "(" */)) { Get(); - TokenExpr(out g); - Expect(31); - } else if (la.kind == 32) { + TokenExpr_NT(out g); + Expect(33 /* ")" */); + } else if (isKind(la, 34 /* "[" */)) { Get(); - TokenExpr(out g); - Expect(33); - tab.MakeOption(g); tokenString = noString; - } else if (la.kind == 34) { + TokenExpr_NT(out g); + Expect(35 /* "]" */); + tab.MakeOption(g); tokenString = noString; + } else if (isKind(la, 36 /* "{" */)) { Get(); - TokenExpr(out g); - Expect(35); - tab.MakeIteration(g); tokenString = noString; - } else SynErr(51); + TokenExpr_NT(out g); + Expect(37 /* "}" */); + tab.MakeIteration(g); tokenString = noString; + } else SynErr(53); if (g == null) // invalid start of TokenFactor - g = new Graph(tab.NewNode(Node.eps, null, 0)); + g = new Graph(tab.NewNode(Node.eps, null, t.line, t.col)); } public void Parse() { la = new Token(); - la.val = ""; + la.val = ""; Get(); - Coco(); + Coco_NT(); Expect(0); } - + + // a token's base type + public static readonly int[] tBase = { + + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1,-1, + }; + static readonly bool[,] set = { - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _T,_T,_T,_x, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _T,_T,_T,_x, _T,_x,_T,_x, _T,_T,_x,_T, _x,_x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, - {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_T,_T, _x,_x,_x,_T, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_T,_T, _T,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x}, - {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _x,_T,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_T, _x,_x,_x,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_T,_T,_x, _T,_x,_T,_x, _T,_T,_x,_T, _x,_x,_x}, - {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_T,_T,_x, _T,_x,_T,_x, _T,_x,_x,_T, _x,_x,_x}, - {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_T, _x,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x}, - {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x} + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x}, + {_x,_T,_T,_T, _T,_T,_x,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _T,_T,_T,_x, _x,_x,_x,_x, _x,_T,_T,_T, _x,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_T, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_T,_T, _T,_x,_T,_x, _T,_x,_T,_T, _x,_T,_x,_x, _x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x}, + {_T,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_T,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_T, _T,_x,_x,_x, _T,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_x, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_x, _T,_x,_x,_x, _x,_x,_x,_x, _x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_T,_T, _T,_T,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_x, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_x,_T, _x}, + {_x,_T,_T,_T, _x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_x,_T, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x,_T,_x,_x, _x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_x,_T, _T,_x,_T,_x, _T,_x,_T,_T, _x,_T,_x,_x, _x}, + {_x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _T,_x,_x,_x, _x,_x,_x,_T, _T,_x,_T,_x, _T,_x,_T,_x, _x,_T,_x,_x, _x}, + {_x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_x,_T, _x,_x,_x,_x, _x,_x,_x,_x, _x,_x,_T,_x, _x,_T,_x,_T, _x,_T,_x,_x, _x,_x,_x,_x, _x}, + {_x,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _T,_x,_T,_T, _T,_T,_T,_T, _T,_T,_T,_T, _x} }; + +#if PARSER_WITH_AST + public SynTree ast_root; + Stack ast_stack; + + void AstAddTerminal() { + SynTree st = new SynTree( t ); + ((SynTree)(ast_stack.Peek())).children.Add(st); + } + + bool AstAddNonTerminal(int kind, string nt_name, int line) { + Token ntTok = new Token(); + ntTok.kind = kind; + ntTok.line = line; + ntTok.val = nt_name; + SynTree st = new SynTree( ntTok ); + ((SynTree)(ast_stack.Peek())).children.Add(st); + ast_stack.Push(st); + return true; + } + + void AstPopNonTerminal() { + ast_stack.Pop(); + } +#endif + } // end Parser @@ -750,50 +941,52 @@ public virtual void SynErr (int line, int col, int n) { case 5: s = "char expected"; break; case 6: s = "\"COMPILER\" expected"; break; case 7: s = "\"IGNORECASE\" expected"; break; - case 8: s = "\"CHARACTERS\" expected"; break; - case 9: s = "\"TOKENS\" expected"; break; - case 10: s = "\"PRAGMAS\" expected"; break; - case 11: s = "\"COMMENTS\" expected"; break; - case 12: s = "\"FROM\" expected"; break; - case 13: s = "\"TO\" expected"; break; - case 14: s = "\"NESTED\" expected"; break; - case 15: s = "\"IGNORE\" expected"; break; - case 16: s = "\"PRODUCTIONS\" expected"; break; - case 17: s = "\"=\" expected"; break; - case 18: s = "\".\" expected"; break; - case 19: s = "\"END\" expected"; break; - case 20: s = "\"+\" expected"; break; - case 21: s = "\"-\" expected"; break; - case 22: s = "\"..\" expected"; break; - case 23: s = "\"ANY\" expected"; break; - case 24: s = "\"<\" expected"; break; - case 25: s = "\">\" expected"; break; - case 26: s = "\"<.\" expected"; break; - case 27: s = "\".>\" expected"; break; - case 28: s = "\"|\" expected"; break; - case 29: s = "\"WEAK\" expected"; break; - case 30: s = "\"(\" expected"; break; - case 31: s = "\")\" expected"; break; - case 32: s = "\"[\" expected"; break; - case 33: s = "\"]\" expected"; break; - case 34: s = "\"{\" expected"; break; - case 35: s = "\"}\" expected"; break; - case 36: s = "\"SYNC\" expected"; break; - case 37: s = "\"IF\" expected"; break; - case 38: s = "\"CONTEXT\" expected"; break; - case 39: s = "\"(.\" expected"; break; - case 40: s = "\".)\" expected"; break; - case 41: s = "??? expected"; break; - case 42: s = "this symbol not expected in Coco"; break; - case 43: s = "this symbol not expected in TokenDecl"; break; - case 44: s = "invalid TokenDecl"; break; - case 45: s = "invalid AttrDecl"; break; - case 46: s = "invalid SimSet"; break; - case 47: s = "invalid Sym"; break; - case 48: s = "invalid Term"; break; - case 49: s = "invalid Factor"; break; - case 50: s = "invalid Attribs"; break; - case 51: s = "invalid TokenFactor"; break; + case 8: s = "\"TERMINALS\" expected"; break; + case 9: s = "\"CHARACTERS\" expected"; break; + case 10: s = "\"TOKENS\" expected"; break; + case 11: s = "\"PRAGMAS\" expected"; break; + case 12: s = "\"COMMENTS\" expected"; break; + case 13: s = "\"FROM\" expected"; break; + case 14: s = "\"TO\" expected"; break; + case 15: s = "\"NESTED\" expected"; break; + case 16: s = "\"IGNORE\" expected"; break; + case 17: s = "\"PRODUCTIONS\" expected"; break; + case 18: s = "\"=\" expected"; break; + case 19: s = "\".\" expected"; break; + case 20: s = "\"END\" expected"; break; + case 21: s = "\"+\" expected"; break; + case 22: s = "\"-\" expected"; break; + case 23: s = "\"..\" expected"; break; + case 24: s = "\"ANY\" expected"; break; + case 25: s = "\":\" expected"; break; + case 26: s = "\"<\" expected"; break; + case 27: s = "\">\" expected"; break; + case 28: s = "\"<.\" expected"; break; + case 29: s = "\".>\" expected"; break; + case 30: s = "\"|\" expected"; break; + case 31: s = "\"WEAK\" expected"; break; + case 32: s = "\"(\" expected"; break; + case 33: s = "\")\" expected"; break; + case 34: s = "\"[\" expected"; break; + case 35: s = "\"]\" expected"; break; + case 36: s = "\"{\" expected"; break; + case 37: s = "\"}\" expected"; break; + case 38: s = "\"SYNC\" expected"; break; + case 39: s = "\"IF\" expected"; break; + case 40: s = "\"CONTEXT\" expected"; break; + case 41: s = "\"(.\" expected"; break; + case 42: s = "\".)\" expected"; break; + case 43: s = "??? expected"; break; + case 44: s = "this symbol not expected in Coco"; break; + case 45: s = "this symbol not expected in TokenDecl"; break; + case 46: s = "invalid TokenDecl"; break; + case 47: s = "invalid AttrDecl"; break; + case 48: s = "invalid SimSet"; break; + case 49: s = "invalid Sym"; break; + case 50: s = "invalid Term"; break; + case 51: s = "invalid Factor"; break; + case 52: s = "invalid Attribs"; break; + case 53: s = "invalid TokenFactor"; break; default: s = "error " + n; break; } @@ -805,16 +998,16 @@ public virtual void SemErr (int line, int col, string s) { errorStream.WriteLine(errMsgFormat, line, col, s); count++; } - + public virtual void SemErr (string s) { errorStream.WriteLine(s); count++; } - + public virtual void Warning (int line, int col, string s) { errorStream.WriteLine(errMsgFormat, line, col, s); } - + public virtual void Warning(string s) { errorStream.WriteLine(s); } diff --git a/src/Parser.frame b/src/Parser.frame index 6878147..75c626c 100644 --- a/src/Parser.frame +++ b/src/Parser.frame @@ -4,37 +4,94 @@ Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. ----------------------------------------------------------------------*/ -->begin using System; +using System.Collections; -->namespace +#if PARSER_WITH_AST +public class SynTree { + public SynTree(Token t ) { + tok = t; + children = new ArrayList(); + } + + public Token tok; + public ArrayList children; + + static void printIndent(int n) { + for(int i=0; i < n; ++i) Console.Write(" "); + } + + public void dump_all(int indent=0, bool isLast=false) { + int last_idx = children.Count; + if(tok.col > 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); + } + else { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump_all(indent+4, idx == last_idx); + } + } + + public void dump_pruned(int indent=0, bool isLast=false) { + int last_idx = children.Count; + int indentPlus = 4; + if(tok.col > 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}\t{4}", ((isLast || (last_idx == 0)) ? "= " : " "), tok.line, tok.col, tok.kind, tok.val); + } + else { + if(last_idx == 1) { + if(((SynTree)children[0]).cildren.Count == 0) { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + else indentPlus = 0; + } + else { + printIndent(indent); + Console.WriteLine("{0}\t{1}\t{2}\t{3}", children.Count, tok.line, tok.kind, tok.val); + } + } + if(last_idx > 0) { + for(int idx=0; idx < last_idx; ++idx) ((SynTree)children[idx]).dump_pruned(indent+indentPlus, idx == last_idx); + } + } +}; +#endif + public class Parser { -->constants const bool _T = true; const bool _x = false; const int minErrDist = 2; - + public Scanner scanner; public Errors errors; @@ -58,7 +115,7 @@ public class Parser { if (errDist >= minErrDist) errors.SemErr(t.line, t.col, msg); errDist = 0; } - + void Get () { for (;;) { t = la; @@ -68,17 +125,26 @@ public class Parser { la = t; } } - + + bool isKind(Token t, int n) { + int k = t.kind; + while(k >= 0) { + if (k == n) return true; + k = tBase[k]; + } + return false; + } + void Expect (int n) { - if (la.kind==n) Get(); else { SynErr(n); } + if (isKind(la, n)) Get(); else { SynErr(n); } } - + bool StartOf (int s) { return set[s, la.kind]; } - + void ExpectWeak (int n, int follow) { - if (la.kind == n) Get(); + if (isKind(la, n)) Get(); else { SynErr(n); while (!StartOf(follow)) Get(); @@ -88,7 +154,7 @@ public class Parser { bool WeakSeparator(int n, int syFol, int repFol) { int kind = la.kind; - if (kind == n) {Get(); return true;} + if (isKind(la, n)) {Get(); return true;} else if (StartOf(repFol)) {return false;} else { SynErr(n); @@ -100,19 +166,50 @@ public class Parser { } } - + -->productions public void Parse() { la = new Token(); - la.val = ""; + la.val = ""; Get(); -->parseRoot } - + + // a token's base type + public static readonly int[] tBase = { +-->tbase + }; + static readonly bool[,] set = { -->initialization }; + +#if PARSER_WITH_AST + public SynTree ast_root; + Stack ast_stack; + + void AstAddTerminal() { + SynTree st = new SynTree( t ); + ((SynTree)(ast_stack.Peek())).children.Add(st); + } + + bool AstAddNonTerminal(int kind, string nt_name, int line) { + Token ntTok = new Token(); + ntTok.kind = kind; + ntTok.line = line; + ntTok.val = nt_name; + SynTree st = new SynTree( ntTok ); + ((SynTree)(ast_stack.Peek())).children.Add(st); + ast_stack.Push(st); + return true; + } + + void AstPopNonTerminal() { + ast_stack.Pop(); + } +#endif + } // end Parser @@ -135,16 +232,16 @@ public class Errors { errorStream.WriteLine(errMsgFormat, line, col, s); count++; } - + public virtual void SemErr (string s) { errorStream.WriteLine(s); count++; } - + public virtual void Warning (int line, int col, string s) { errorStream.WriteLine(errMsgFormat, line, col, s); } - + public virtual void Warning(string s) { errorStream.WriteLine(s); } diff --git a/src/ParserGen.cs b/src/ParserGen.cs index cfcb814..9088d3f 100644 --- a/src/ParserGen.cs +++ b/src/ParserGen.cs @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ using System; @@ -42,7 +42,7 @@ public class ParserGen { const int tErr = 0; // error codes const int altErr = 1; const int syncErr = 2; - + public Position usingPos; // "using" definitions from the attributed grammar int errorNr; // highest parser error number @@ -51,17 +51,17 @@ public class ParserGen { StreamWriter gen; // generated parser source file StringWriter err; // generated parser error messages ArrayList symSet = new ArrayList(); - + Tab tab; // other Coco objects TextWriter trace; Errors errors; Buffer buffer; - + void Indent (int n) { for (int i = 1; i <= n; i++) gen.Write('\t'); } - - + + bool Overlaps(BitArray s1, BitArray s2) { int len = s1.Count; for (int i = 0; i < len; ++i) { @@ -71,7 +71,15 @@ bool Overlaps(BitArray s1, BitArray s2) { } return false; } - + + void WriteSymbolOrCode(Symbol sym) { + if (!Char.IsLetter(sym.name[0])) { + gen.Write("{0} /* {1} */", sym.n, sym.name); + } else { + gen.Write("_{0}", sym.name); + } + } + // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning bool UseSwitch (Node p) { BitArray s1, s2; @@ -106,7 +114,7 @@ void CopySourcePart (Position pos, int indent) { gen.WriteLine(); Indent(indent); if (ch == CR) ch = buffer.Read(); // skip CR if (ch == LF) ch = buffer.Read(); // skip LF - for (i = 1; i <= pos.col && (ch == ' ' || ch == '\t'); i++) { + for (i = 1; i <= pos.col && (ch == ' ' || ch == '\t'); i++) { // skip blanks at beginning of line ch = buffer.Read(); } @@ -120,27 +128,38 @@ void CopySourcePart (Position pos, int indent) { } } + /* TODO better interface for CopySourcePart */ + public void CopySourcePart (Parser parser, StreamWriter gen, Position pos, int indent) { + // Copy text described by pos from atg to gen + int oldPos = parser.pgen.buffer.Pos; // Pos is modified by CopySourcePart + StreamWriter prevGen = parser.pgen.gen; + parser.pgen.gen = gen; + parser.pgen.CopySourcePart(pos, 0); + parser.pgen.gen = prevGen; + parser.pgen.buffer.Pos = oldPos; + } + void GenErrorMsg (int errTyp, Symbol sym) { errorNr++; err.Write("\t\t\tcase " + errorNr + ": s = \""); switch (errTyp) { - case tErr: + case tErr: if (sym.name[0] == '"') err.Write(tab.Escape(sym.name) + " expected"); - else err.Write(sym.name + " expected"); + else err.Write(sym.name + " expected"); break; case altErr: err.Write("invalid " + sym.name); break; case syncErr: err.Write("this symbol not expected in " + sym.name); break; } err.WriteLine("\"; break;"); } - + int NewCondSet (BitArray s) { for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets) if (Sets.Equals(s, (BitArray)symSet[i])) return i; symSet.Add(s.Clone()); return symSet.Count - 1; } - + void GenCond (BitArray s, Node p) { if (p.typ == Node.rslv) CopySourcePart(p.pos, 0); else { @@ -149,21 +168,28 @@ void GenCond (BitArray s, Node p) { else if (n <= maxTerm) foreach (Symbol sym in tab.terminals) { if (s[sym.n]) { - gen.Write("la.kind == {0}", sym.n); + gen.Write("isKind(la, "); + WriteSymbolOrCode(sym); + gen.Write(")"); --n; if (n > 0) gen.Write(" || "); } } else - gen.Write("StartOf({0})", NewCondSet(s)); + gen.Write("StartOf({0} /* {1} {2} */)", NewCondSet(s), + tab.nTyp[p.typ], p.typ == Node.nt ? p.sym.name : ""); } } - + void PutCaseLabels (BitArray s) { foreach (Symbol sym in tab.terminals) - if (s[sym.n]) gen.Write("case {0}: ", sym.n); + if (s[sym.n]) { + gen.Write("case "); + WriteSymbolOrCode(sym); + gen.Write(": "); + } } - + void GenCode (Node p, int indent, BitArray isChecked) { Node p2; BitArray s1, s2; @@ -171,7 +197,7 @@ void GenCode (Node p, int indent, BitArray isChecked) { switch (p.typ) { case Node.nt: { Indent(indent); - gen.Write(p.sym.name + "("); + gen.Write(p.sym.name + "_NT("); CopySourcePart(p.pos, 0); gen.WriteLine(");"); break; @@ -179,15 +205,28 @@ void GenCode (Node p, int indent, BitArray isChecked) { case Node.t: { Indent(indent); // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n - if (isChecked[p.sym.n]) gen.WriteLine("Get();"); - else gen.WriteLine("Expect({0});", p.sym.n); + if (isChecked[p.sym.n]) { + gen.WriteLine("Get();"); + } + else { + gen.Write("Expect("); + WriteSymbolOrCode(p.sym); + gen.WriteLine(");"); + } + if(tab.genAST) { + gen.WriteLine("#if PARSER_WITH_AST"); + gen.WriteLine("\tAstAddTerminal();"); + gen.WriteLine("#endif"); + } break; } case Node.wt: { Indent(indent); s1 = tab.Expected(p.next, curSy); s1.Or(tab.allSyncSets); - gen.WriteLine("ExpectWeak({0}, {1});", p.sym.n, NewCondSet(s1)); + gen.Write("ExpectWeak("); + WriteSymbolOrCode(p.sym); + gen.WriteLine(", {0});", NewCondSet(s1)); break; } case Node.any: { @@ -227,13 +266,13 @@ void GenCode (Node p, int indent, BitArray isChecked) { while (p2 != null) { s1 = tab.Expected(p2.sub, curSy); Indent(indent); - if (useSwitch) { + if (useSwitch) { PutCaseLabels(s1); gen.WriteLine("{"); - } else if (p2 == p) { - gen.Write("if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); + } else if (p2 == p) { + gen.Write("if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); } else if (p2.down == null && equal) { gen.WriteLine("} else {"); - } else { - gen.Write("} else if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); + } else { + gen.Write("} else if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); } GenCode(p2.sub, indent + 1, s1); if (useSwitch) { @@ -263,7 +302,9 @@ void GenCode (Node p, int indent, BitArray isChecked) { if (p2.typ == Node.wt) { s1 = tab.Expected(p2.next, curSy); s2 = tab.Expected(p.next, curSy); - gen.Write("WeakSeparator({0},{1},{2}) ", p2.sym.n, NewCondSet(s1), NewCondSet(s2)); + gen.Write("WeakSeparator("); + WriteSymbolOrCode(p2.sym); + gen.Write(",{0},{1}) ", NewCondSet(s1), NewCondSet(s2)); s1 = new BitArray(tab.terminals.Count); // for inner structure if (p2.up || p2.next == null) p2 = null; else p2 = p2.next; } else { @@ -283,20 +324,42 @@ void GenCode (Node p, int indent, BitArray isChecked) { Indent(indent); gen.WriteLine("}"); break; } - if (p.typ != Node.eps && p.typ != Node.sem && p.typ != Node.sync) + if (p.typ != Node.eps && p.typ != Node.sem && p.typ != Node.sync) isChecked.SetAll(false); // = new BitArray(tab.terminals.Count); if (p.up) break; p = p.next; } } - + + void GenTokenBase() { + int idx = 0; + foreach (Symbol sym in tab.terminals) { + if((idx++ % 20) == 0) gen.Write("\n\t\t"); + if (sym.inherits == null) + gen.Write("{0,2},", -1); // not inherited + else + gen.Write("{0,2},", sym.inherits.n); + } + } + void GenTokens() { + gen.WriteLine("\t//non terminals"); + foreach (Symbol sym in tab.nonterminals) { + gen.WriteLine("\tpublic const int _NT_{0} = {1};", sym.name, sym.n); + } + gen.WriteLine("\tpublic const int maxNT = {0};", tab.nonterminals.Count-1); + gen.WriteLine("\t//terminals"); foreach (Symbol sym in tab.terminals) { if (Char.IsLetter(sym.name[0])) - gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n); + gen.Write("\tpublic const int _{0} = {1};", sym.name, sym.n); + else + gen.Write("//\tpublic const int _({0}) = {1};", sym.name, sym.n); + if(sym.inherits != null) + gen.Write(" // INHERITS -> {0}", sym.inherits.name); + gen.WriteLine(); } } - + void GenPragmas() { foreach (Symbol sym in tab.pragmas) { gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n); @@ -305,21 +368,37 @@ void GenPragmas() { void GenCodePragmas() { foreach (Symbol sym in tab.pragmas) { - gen.WriteLine("\t\t\t\tif (la.kind == {0}) {{", sym.n); + gen.Write("\t\t\tif (la.kind == "); + WriteSymbolOrCode(sym); + gen.WriteLine(") {"); CopySourcePart(sym.semPos, 4); - gen.WriteLine("\t\t\t\t}"); + gen.WriteLine("\t\t\t}"); } } void GenProductions() { + int idx = 0; foreach (Symbol sym in tab.nonterminals) { curSy = sym; - gen.Write("\tvoid {0}(", sym.name); + gen.Write("\tvoid {0}_NT(", sym.name); CopySourcePart(sym.attrPos, 0); gen.WriteLine(") {"); CopySourcePart(sym.semPos, 2); + if(tab.genAST) { + gen.WriteLine("#if PARSER_WITH_AST"); + if(idx == 0) gen.WriteLine("\tToken rt = new Token(); rt.kind = _NT_{0}; rt.val = \"{0}\";ast_root = new SynTree( rt ); ast_stack = new Stack(); ast_stack.Push(ast_root);", sym.name); + else gen.WriteLine("\tbool ntAdded = AstAddNonTerminal(_NT_{0}, \"{0}\", la.line);", sym.name); + gen.WriteLine("#endif"); + } GenCode(sym.graph, 2, new BitArray(tab.terminals.Count)); + if(tab.genAST) { + gen.WriteLine("#if PARSER_WITH_AST"); + if(idx == 0) gen.WriteLine("\tAstPopNonTerminal();"); + else gen.WriteLine("\tif(ntAdded) AstPopNonTerminal();"); + gen.WriteLine("#endif"); + } gen.WriteLine("\t}"); gen.WriteLine(); + ++idx; } } @@ -346,7 +425,7 @@ public void WriteParser () { gen = g.OpenGen("Parser.cs"); err = new StringWriter(); foreach (Symbol sym in tab.terminals) GenErrorMsg(tErr, sym); - + g.GenCopyright(); g.SkipFramePart("-->begin"); @@ -364,7 +443,8 @@ public void WriteParser () { g.CopyFramePart("-->declarations"); CopySourcePart(tab.semDeclPos, 0); g.CopyFramePart("-->pragmas"); GenCodePragmas(); g.CopyFramePart("-->productions"); GenProductions(); - g.CopyFramePart("-->parseRoot"); gen.WriteLine("\t\t{0}();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine("\t\tExpect(0);"); + g.CopyFramePart("-->parseRoot"); gen.WriteLine("\t\t{0}_NT();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine("\t\tExpect(0);"); + g.CopyFramePart("-->tbase"); GenTokenBase(); // write all tokens base types g.CopyFramePart("-->initialization"); InitSets(); g.CopyFramePart("-->errors"); gen.Write(err.ToString()); g.CopyFramePart(null); @@ -373,7 +453,92 @@ public void WriteParser () { gen.Close(); buffer.Pos = oldPos; } - + + public int GenCodeRREBNF (Node p) { + int rc = 0; + Node p2; + while (p != null) { + switch (p.typ) { + case Node.nt: + case Node.t: { + gen.Write(p.sym.name); + gen.Write(" "); + ++rc; + break; + } + case Node.wt: { + break; + } + case Node.any: { + gen.Write("ANY "); + break; + } + case Node.eps: break; // nothing + case Node.rslv: break; // nothing + case Node.sem: { + break; + } + case Node.sync: { + break; + } + case Node.alt: { + gen.Write("( "); + p2 = p; + while (p2 != null) { + rc += GenCodeRREBNF(p2.sub); + p2 = p2.down; + if(p2 != null) gen.Write("| "); + } + gen.Write(") "); + break; + } + case Node.iter: { + gen.Write("( "); + rc += GenCodeRREBNF(p.sub); + gen.Write(")* "); + break; + } + case Node.opt: + gen.Write("( "); + rc += GenCodeRREBNF(p.sub); + gen.Write(")? "); + break; + } + if (p.up) break; + p = p.next; + } + return rc; + } + + public void WriteRREBNF () { + Generator g = new Generator(tab); + gen = g.OpenGen("Parser.ebnf"); + + gen.Write("//\n// EBNF generated by CocoR parser generator to be viewed with https://www.bottlecaps.de/rr/ui\n//\n"); + gen.Write("\n//\n// productions\n//\n\n"); + foreach (Symbol sym in tab.nonterminals) { + gen.Write("{0} ::= ", sym.name); + if(GenCodeRREBNF(sym.graph) == 0) { + gen.Write("\"??()??\""); + } + gen.Write("\n"); + } + gen.Write("\n//\n// tokens\n//\n\n"); + foreach (Symbol sym in tab.terminals) { + if (Char.IsLetter(sym.name[0])) { // real name value is stored in Tab.literals + foreach (DictionaryEntry e in tab.literals) { + if ((Symbol)e.Value == sym) { + gen.Write("{0} ::= {1}\n", sym.name, (string)e.Key); + break; + } + } + } else { + //gen.Write("{0} /* {1} */", sym.n, sym.name); + } + } + gen.Close(); + } + public void WriteStatistics () { trace.WriteLine(); trace.WriteLine("{0} terminals", tab.terminals.Count); diff --git a/src/Scanner.cs b/src/Scanner.cs index 0443bb9..9dbb51d 100644 --- a/src/Scanner.cs +++ b/src/Scanner.cs @@ -4,24 +4,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -----------------------------------------------------------------------*/ @@ -61,10 +61,10 @@ public class Buffer { int bufPos; // current position in buffer Stream stream; // input stream (seekable) bool isUserStream; // was the stream opened by the user? - + public Buffer (Stream s, bool isUserStream) { stream = s; this.isUserStream = isUserStream; - + if (stream.CanSeek) { fileLen = (int) stream.Length; bufLen = Math.Min(fileLen, MAX_BUFFER_LENGTH); @@ -78,7 +78,7 @@ public Buffer (Stream s, bool isUserStream) { else bufPos = 0; // index 0 is already after the file, thus Pos = 0 is invalid if (bufLen == fileLen && stream.CanSeek) Close(); } - + protected Buffer(Buffer b) { // called in UTF8Buffer constructor buf = b.buf; bufStart = b.bufStart; @@ -92,14 +92,14 @@ protected Buffer(Buffer b) { // called in UTF8Buffer constructor } ~Buffer() { Close(); } - + protected void Close() { if (!isUserStream && stream != null) { stream.Close(); stream = null; } } - + public virtual int Read () { if (bufPos < bufLen) { return buf[bufPos++]; @@ -119,7 +119,7 @@ public int Peek () { Pos = curPos; return ch; } - + // beg .. begin, zero-based, inclusive, in byte // end .. end, zero-based, exclusive, in byte public string GetString (int beg, int end) { @@ -159,7 +159,7 @@ public int Pos { } } } - + // Read the next chunk of bytes from the stream, increases the buffer // if needed and updates the fields fileLen and bufLen. // Returns the number of bytes read. @@ -229,12 +229,12 @@ public override int Read() { public class Scanner { const char EOL = '\n'; const int eofSym = 0; /* pdt */ - const int maxT = 41; - const int noSym = 41; + const int maxT = 43; + const int noSym = 43; public Buffer buffer; // scanner buffer - + Token t; // current token int ch; // current input character int pos; // byte position of current character @@ -246,36 +246,37 @@ public class Scanner { Token tokens; // list of tokens already peeked (first token is a dummy) Token pt; // current peek token - + char[] tval = new char[128]; // text of current token int tlen; // length of current token - + static Scanner() { start = new Dictionary(128); for (int i = 65; i <= 90; ++i) start[i] = 1; for (int i = 95; i <= 95; ++i) start[i] = 1; for (int i = 97; i <= 122; ++i) start[i] = 1; for (int i = 48; i <= 57; ++i) start[i] = 2; - start[34] = 12; - start[39] = 5; - start[36] = 13; - start[61] = 16; - start[46] = 31; - start[43] = 17; - start[45] = 18; - start[60] = 32; - start[62] = 20; - start[124] = 23; - start[40] = 33; - start[41] = 24; - start[91] = 25; - start[93] = 26; - start[123] = 27; - start[125] = 28; + start[34] = 12; + start[39] = 5; + start[36] = 13; + start[61] = 16; + start[46] = 32; + start[43] = 17; + start[45] = 18; + start[58] = 20; + start[60] = 33; + start[62] = 21; + start[124] = 24; + start[40] = 34; + start[41] = 25; + start[91] = 26; + start[93] = 27; + start[123] = 28; + start[125] = 29; start[Buffer.EOF] = -1; } - + public Scanner (string fileName) { try { Stream stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read); @@ -285,12 +286,12 @@ public Scanner (string fileName) { throw new FatalError("Cannot open file " + fileName); } } - + public Scanner (Stream s) { buffer = new Buffer(s, true); Init(); } - + void Init() { pos = -1; line = 1; col = 0; charPos = -1; oldEols = 0; @@ -306,9 +307,9 @@ void Init() { } pt = tokens = new Token(); // first token is a dummy } - + void NextCh() { - if (oldEols > 0) { ch = EOL; oldEols--; } + if (oldEols > 0) { ch = EOL; oldEols--; } else { pos = buffer.Pos; // buffer reads unicode chars, if UTF8 has been detected @@ -348,9 +349,8 @@ bool Comment0() { } else if (ch == Buffer.EOF) return false; else NextCh(); } - } else { - buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -364,7 +364,7 @@ bool Comment1() { NextCh(); if (ch == '/') { level--; - if (level == 0) { oldEols = line - line0; NextCh(); return true; } + if (level == 0) { /*oldEols = line - line0;*/ NextCh(); return true; } NextCh(); } } else if (ch == '/') { @@ -375,9 +375,8 @@ bool Comment1() { } else if (ch == Buffer.EOF) return false; else NextCh(); } - } else { - buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; } + buffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0; return false; } @@ -386,30 +385,35 @@ void CheckLiteral() { switch (t.val) { case "COMPILER": t.kind = 6; break; case "IGNORECASE": t.kind = 7; break; - case "CHARACTERS": t.kind = 8; break; - case "TOKENS": t.kind = 9; break; - case "PRAGMAS": t.kind = 10; break; - case "COMMENTS": t.kind = 11; break; - case "FROM": t.kind = 12; break; - case "TO": t.kind = 13; break; - case "NESTED": t.kind = 14; break; - case "IGNORE": t.kind = 15; break; - case "PRODUCTIONS": t.kind = 16; break; - case "END": t.kind = 19; break; - case "ANY": t.kind = 23; break; - case "WEAK": t.kind = 29; break; - case "SYNC": t.kind = 36; break; - case "IF": t.kind = 37; break; - case "CONTEXT": t.kind = 38; break; + case "TERMINALS": t.kind = 8; break; + case "CHARACTERS": t.kind = 9; break; + case "TOKENS": t.kind = 10; break; + case "PRAGMAS": t.kind = 11; break; + case "COMMENTS": t.kind = 12; break; + case "FROM": t.kind = 13; break; + case "TO": t.kind = 14; break; + case "NESTED": t.kind = 15; break; + case "IGNORE": t.kind = 16; break; + case "PRODUCTIONS": t.kind = 17; break; + case "END": t.kind = 20; break; + case "ANY": t.kind = 24; break; + case "WEAK": t.kind = 31; break; + case "SYNC": t.kind = 38; break; + case "IF": t.kind = 39; break; + case "CONTEXT": t.kind = 40; break; default: break; } } Token NextToken() { - while (ch == ' ' || - ch >= 9 && ch <= 10 || ch == 13 - ) NextCh(); - if (ch == '/' && Comment0() ||ch == '/' && Comment1()) return NextToken(); + for(;;) { + while (ch == ' ' || + ch >= 9 && ch <= 10 || ch == 13 + ) NextCh(); + if (ch == '/' && Comment0() ||ch == '/' && Comment1()) continue; + break; + } + int recKind = noSym; int recEnd = pos; t = new Token(); @@ -417,7 +421,7 @@ Token NextToken() { int state; state = start.ContainsKey(ch) ? start[ch] : 0; tlen = 0; AddCh(); - + switch (state) { case -1: { t.kind = eofSym; break; } // NextCh already done case 0: { @@ -428,17 +432,17 @@ Token NextToken() { t.kind = recKind; break; } // NextCh already done case 1: - recEnd = pos; recKind = 1; + recEnd = pos; recKind = 1 /* ident */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 1;} - else {t.kind = 1; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} + else {t.kind = 1 /* ident */; t.val = new String(tval, 0, tlen); CheckLiteral(); return t;} case 2: - recEnd = pos; recKind = 2; + recEnd = pos; recKind = 2 /* number */; if (ch >= '0' && ch <= '9') {AddCh(); goto case 2;} - else {t.kind = 2; break;} + else {t.kind = 2 /* number */; break;} case 3: - {t.kind = 3; break;} + {t.kind = 3 /* string */; break;} case 4: - {t.kind = 4; break;} + {t.kind = 4 /* badString */; break;} case 5: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '&' || ch >= '(' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); goto case 6;} else if (ch == 92) {AddCh(); goto case 7;} @@ -454,15 +458,15 @@ Token NextToken() { else if (ch == 39) {AddCh(); goto case 9;} else {goto case 0;} case 9: - {t.kind = 5; break;} + {t.kind = 5 /* char */; break;} case 10: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 44 /* ddtSym */; if (ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 10;} - else {t.kind = 42; break;} + else {t.kind = 44 /* ddtSym */; break;} case 11: - recEnd = pos; recKind = 43; + recEnd = pos; recKind = 45 /* optionSym */; if (ch >= '-' && ch <= '.' || ch >= '0' && ch <= ':' || ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 11;} - else {t.kind = 43; break;} + else {t.kind = 45 /* optionSym */; break;} case 12: if (ch <= 9 || ch >= 11 && ch <= 12 || ch >= 14 && ch <= '!' || ch >= '#' && ch <= '[' || ch >= ']' && ch <= 65535) {AddCh(); goto case 12;} else if (ch == 10 || ch == 13) {AddCh(); goto case 4;} @@ -470,76 +474,78 @@ Token NextToken() { else if (ch == 92) {AddCh(); goto case 14;} else {goto case 0;} case 13: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 44 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); goto case 10;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 15;} - else {t.kind = 42; break;} + else {t.kind = 44 /* ddtSym */; break;} case 14: if (ch >= ' ' && ch <= '~') {AddCh(); goto case 12;} else {goto case 0;} case 15: - recEnd = pos; recKind = 42; + recEnd = pos; recKind = 44 /* ddtSym */; if (ch >= '0' && ch <= '9') {AddCh(); goto case 10;} else if (ch >= 'A' && ch <= 'Z' || ch == '_' || ch >= 'a' && ch <= 'z') {AddCh(); goto case 15;} else if (ch == '=') {AddCh(); goto case 11;} - else {t.kind = 42; break;} + else {t.kind = 44 /* ddtSym */; break;} case 16: - {t.kind = 17; break;} + {t.kind = 18 /* "=" */; break;} case 17: - {t.kind = 20; break;} + {t.kind = 21 /* "+" */; break;} case 18: - {t.kind = 21; break;} + {t.kind = 22 /* "-" */; break;} case 19: - {t.kind = 22; break;} + {t.kind = 23 /* ".." */; break;} case 20: - {t.kind = 25; break;} + {t.kind = 25 /* ":" */; break;} case 21: - {t.kind = 26; break;} + {t.kind = 27 /* ">" */; break;} case 22: - {t.kind = 27; break;} + {t.kind = 28 /* "<." */; break;} case 23: - {t.kind = 28; break;} + {t.kind = 29 /* ".>" */; break;} case 24: - {t.kind = 31; break;} + {t.kind = 30 /* "|" */; break;} case 25: - {t.kind = 32; break;} + {t.kind = 33 /* ")" */; break;} case 26: - {t.kind = 33; break;} + {t.kind = 34 /* "[" */; break;} case 27: - {t.kind = 34; break;} + {t.kind = 35 /* "]" */; break;} case 28: - {t.kind = 35; break;} + {t.kind = 36 /* "{" */; break;} case 29: - {t.kind = 39; break;} + {t.kind = 37 /* "}" */; break;} case 30: - {t.kind = 40; break;} + {t.kind = 41 /* "(." */; break;} case 31: - recEnd = pos; recKind = 18; - if (ch == '.') {AddCh(); goto case 19;} - else if (ch == '>') {AddCh(); goto case 22;} - else if (ch == ')') {AddCh(); goto case 30;} - else {t.kind = 18; break;} + {t.kind = 42 /* ".)" */; break;} case 32: - recEnd = pos; recKind = 24; - if (ch == '.') {AddCh(); goto case 21;} - else {t.kind = 24; break;} + recEnd = pos; recKind = 19 /* "." */; + if (ch == '.') {AddCh(); goto case 19;} + else if (ch == '>') {AddCh(); goto case 23;} + else if (ch == ')') {AddCh(); goto case 31;} + else {t.kind = 19 /* "." */; break;} case 33: - recEnd = pos; recKind = 30; - if (ch == '.') {AddCh(); goto case 29;} - else {t.kind = 30; break;} + recEnd = pos; recKind = 26 /* "<" */; + if (ch == '.') {AddCh(); goto case 22;} + else {t.kind = 26 /* "<" */; break;} + case 34: + recEnd = pos; recKind = 32 /* "(" */; + if (ch == '.') {AddCh(); goto case 30;} + else {t.kind = 32 /* "(" */; break;} } t.val = new String(tval, 0, tlen); return t; } - + private void SetScannerBehindT() { buffer.Pos = t.pos; NextCh(); line = t.line; col = t.col; charPos = t.charPos; for (int i = 0; i < tlen; i++) NextCh(); } - + // get the next token (possibly a token already seen during peeking) public Token Scan () { if (tokens.next == null) { @@ -558,7 +564,7 @@ public Token Peek () { } pt = pt.next; } while (pt.kind > maxT); // skip pragmas - + return pt; } diff --git a/src/Scanner.frame b/src/Scanner.frame index eb9b177..6780f2c 100644 --- a/src/Scanner.frame +++ b/src/Scanner.frame @@ -318,10 +318,14 @@ public class Scanner { } Token NextToken() { - while (ch == ' ' || + for(;;) { + while (ch == ' ' || -->scan1 - ) NextCh(); + ) NextCh(); -->scan2 + break; + } +-->scan22 int recKind = noSym; int recEnd = pos; t = new Token(); diff --git a/src/Tab.cs b/src/Tab.cs index 220417f..920411f 100644 --- a/src/Tab.cs +++ b/src/Tab.cs @@ -5,24 +5,24 @@ extended by M. Loeberbauer & A. Woess, Univ. of Linz with improvements by Pat Terry, Rhodes University -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any later version. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. As an exception, it is allowed to write an extension of Coco/R that is used as a plugin in non-free software. -If not otherwise stated, any source code generated by Coco/R (other than +If not otherwise stated, any source code generated by Coco/R (other than Coco/R itself) does not fall under the GNU General Public License. -------------------------------------------------------------------------*/ using System; @@ -37,7 +37,7 @@ public class Position { // position of source code stretch (e.g. semantic actio public readonly int end; // end of stretch public readonly int col; // column number of start position public readonly int line; // line number of start position - + public Position(int beg, int end, int col, int line) { this.beg = beg; this.end = end; this.col = col; this.line = line; } @@ -47,15 +47,15 @@ public Position(int beg, int end, int col, int line) { //===================================================================== // Symbol //===================================================================== - + public class Symbol { - + // token kinds public const int fixedToken = 0; // e.g. 'a' ('b' | 'c') (structure of literals) public const int classToken = 1; // e.g. digit {digit} (at least one char class) public const int litToken = 2; // e.g. "while" public const int classLitToken = 3; // e.g. letter {letter} but without literals that have the same structure - + public int n; // symbol number public int typ; // t, nt, pr, unknown, rslv /* ML 29_11_2002 slv added */ /* AW slv --> rslv */ public string name; // symbol name @@ -67,12 +67,14 @@ public class Symbol { public BitArray follow; // nt: terminal followers public BitArray nts; // nt: nonterminals whose followers have to be added to this sym public int line; // source text line number of item in this node + public int col; // source text line column number of item in this node public Position attrPos; // nt: position of attributes in source text (or null) public Position semPos; // pr: pos of semantic action in source text (or null) // nt: pos of local declarations in source text (or null) + public Symbol inherits; // optional, token from which this token derives - public Symbol(int typ, string name, int line) { - this.typ = typ; this.name = name; this.line = line; + public Symbol(int typ, string name, int line, int col) { + this.typ = typ; this.name = name; this.line = line; this.col = col; } } @@ -89,7 +91,7 @@ public class Node { public const int clas = 4; // character class public const int chr = 5; // character public const int wt = 6; // weak terminal symbol - public const int any = 7; // + public const int any = 7; // public const int eps = 8; // empty public const int sync = 9; // synchronization symbol public const int sem = 10; // semantic action: (. .) @@ -97,10 +99,10 @@ public class Node { public const int iter = 12; // iteration: { } public const int opt = 13; // option: [ ] public const int rslv = 14; // resolver expr - + public const int normalTrans = 0; // transition codes public const int contextTrans = 1; - + public int n; // node number public int typ; // t, nt, wt, chr, clas, any, eps, sem, sync, alt, iter, opt, rslv public Node next; // to successor node @@ -113,44 +115,45 @@ public class Node { public int code; // chr, clas: transition code public BitArray set; // any, sync: the set represented by this node public Position pos; // nt, t, wt: pos of actual attributes - // sem: pos of semantic action in source text - // rslv: pos of resolver in source text - public int line; // source text line number of item in this node - public State state; // DFA state corresponding to this node + // sem: pos of semantic action in source text + // rslv: pos of resolver in source text + public int line; // source text line number of item in this node + public int col; // source text line column number of item in this node + public State state; // DFA state corresponding to this node // (only used in DFA.ConvertToStates) - public Node(int typ, Symbol sym, int line) { - this.typ = typ; this.sym = sym; this.line = line; + public Node(int typ, Symbol sym, int line, int col) { + this.typ = typ; this.sym = sym; this.line = line; this.col = col; } } //===================================================================== -// Graph +// Graph //===================================================================== -public class Graph { +public class Graph { public Node l; // left end of graph = head public Node r; // right end of graph = list of nodes to be linked to successor graph - + public Graph() { l = null; r = null; } - + public Graph(Node left, Node right) { l = left; r = right; } - + public Graph(Node p) { l = p; r = p; } } //===================================================================== -// Sets +// Sets //===================================================================== public class Sets { - + public static int Elements(BitArray s) { int max = s.Count; int n = 0; @@ -158,33 +161,33 @@ public static int Elements(BitArray s) { if (s[i]) n++; return n; } - + public static bool Equals(BitArray a, BitArray b) { int max = a.Count; for (int i=0; i= 127 || ch == '\'' || ch == '\\') return ch.ToString(); else return String.Format("'{0}'", (char)ch); } - + void WriteCharSet(CharSet s) { for (CharSet.Range r = s.head; r != null; r = r.next) if (r.from < r.to) { trace.Write(Ch(r.from) + ".." + Ch(r.to) + " "); } else { trace.Write(Ch(r.from) + " "); } } - + public void WriteCharClasses () { foreach (CharClass c in classes) { trace.Write("{0,-10}: ", c.name); @@ -554,7 +564,7 @@ public void WriteCharClasses () { } trace.WriteLine(); } - + //--------------------------------------------------------------------- // Symbol set computations @@ -592,11 +602,11 @@ BitArray First0(Node p, BitArray mark) { } return fs; } - + public BitArray First(Node p) { BitArray fs = First0(p, new BitArray(nodes.Count)); if (ddt[3]) { - trace.WriteLine(); + trace.WriteLine(); if (p != null) trace.WriteLine("First: node = {0}", p.n); else trace.WriteLine("First: node = null"); PrintSet(fs, 0); @@ -614,7 +624,7 @@ void CompFirstSets() { sym.firstReady = true; } } - + void CompFollow(Node p) { while (p != null && !visited[p.n]) { visited[p.n] = true; @@ -631,7 +641,7 @@ void CompFollow(Node p) { p = p.next; } } - + void Complete(Symbol sym) { if (!visited[sym.n]) { visited[sym.n] = true; @@ -644,7 +654,7 @@ void Complete(Symbol sym) { } } } - + void CompFollowSets() { foreach (Symbol sym in nonterminals) { sym.follow = new BitArray(terminals.Count); @@ -662,7 +672,7 @@ void CompFollowSets() { Complete(sym); } } - + Node LeadingAny(Node p) { if (p == null) return null; Node a = null; @@ -675,7 +685,7 @@ Node LeadingAny(Node p) { if (a == null && DelNode(p) && !p.up) a = LeadingAny(p.next); return a; } - + void FindAS(Node p) { // find ANY sets Node a; while (p != null) { @@ -713,11 +723,11 @@ void FindAS(Node p) { // find ANY sets p = p.next; } } - + void CompAnySets() { foreach (Symbol sym in nonterminals) FindAS(sym.graph); } - + public BitArray Expected(Node p, Symbol curSy) { BitArray s = First(p); if (DelGraph(p)) s.Or(curSy.follow); @@ -745,7 +755,7 @@ void CompSync(Node p) { p = p.next; } } - + void CompSyncSets() { allSyncSets = new BitArray(terminals.Count); allSyncSets[eofSy.n] = true; @@ -755,7 +765,7 @@ void CompSyncSets() { CompSync(curSy.graph); } } - + public void SetupAnys() { foreach (Node p in nodes) if (p.typ == Node.any) { @@ -763,7 +773,7 @@ public void SetupAnys() { p.set[eofSy.n] = false; } } - + public void CompDeletableSymbols() { bool changed; do { @@ -776,7 +786,7 @@ public void CompDeletableSymbols() { foreach (Symbol sym in nonterminals) if (sym.deletable) errors.Warning(" " + sym.name + " deletable"); } - + public void RenumberPragmas() { int n = terminals.Count; foreach (Symbol sym in pragmas) sym.n = n++; @@ -805,16 +815,16 @@ public void CompSymbolSets() { trace.WriteLine("-----------------"); foreach (Node p in nodes) if (p.typ == Node.any || p.typ == Node.sync) { - trace.Write("{0,4} {1,4}: ", p.n, nTyp[p.typ]); + trace.Write("Line: {0, 4} Node: {1,4} {2,4}: ", p.line, p.n, nTyp[p.typ]); PrintSet(p.set, 11); } } } - + //--------------------------------------------------------------------- // String handling //--------------------------------------------------------------------- - + char Hex2Char(string s) { int val = 0; for (int i = 0; i < s.Length; i++) { @@ -835,6 +845,11 @@ string Char2Hex(char ch) { return w.ToString(); } + public string Unstring(string s) { + if (s == null || s.Length < 2) return s; + return Unescape(s.Substring(1, s.Length - 2)); + } + public string Unescape (string s) { /* replaces escape sequences in s by their Unicode values. */ StringBuilder buf = new StringBuilder(); @@ -869,6 +884,11 @@ public string Unescape (string s) { return buf.ToString(); } + public string Quoted (string s) { + if (s == null) return "null"; + return string.Concat("\"", Escape(s), "\""); + } + public string Escape (string s) { StringBuilder buf = new StringBuilder(); foreach (char ch in s) { @@ -887,25 +907,35 @@ public string Escape (string s) { } return buf.ToString(); } - + //--------------------------------------------------------------------- // Grammar checks //--------------------------------------------------------------------- - + public bool GrammarOk() { - bool ok = NtsComplete() - && AllNtReached() + bool ok = NtsComplete() + && AllNtReached() && NoCircularProductions() && AllNtToTerm(); - if (ok) { CheckResolvers(); CheckLL1(); } - return ok; + if (ok) { CheckResolvers(); CheckLL1(); } + return ok; + } + + public bool GrammarCheckAll() { + int errors = 0; + if(!NtsComplete()) ++errors; + if(!AllNtReached()) ++errors; + if(!NoCircularProductions()) System.Environment.Exit(1); + if(!AllNtToTerm()) ++errors; + CheckResolvers(); CheckLL1(); + return errors == 0; } //--------------- check for circular productions ---------------------- - + class CNode { // node of list for finding circular productions public Symbol left, right; - + public CNode (Symbol l, Symbol r) { left = l; right = r; } @@ -914,7 +944,7 @@ public CNode (Symbol l, Symbol r) { void GetSingles(Node p, ArrayList singles) { if (p == null) return; // end of graph if (p.typ == Node.nt) { - if (p.up || DelGraph(p.next)) singles.Add(p.sym); + singles.Add(p.sym); } else if (p.typ == Node.alt || p.typ == Node.iter || p.typ == Node.opt) { if (p.up || DelGraph(p.next)) { GetSingles(p.sub, singles); @@ -923,7 +953,7 @@ void GetSingles(Node p, ArrayList singles) { } if (!p.up && DelNode(p)) GetSingles(p.next, singles); } - + public bool NoCircularProductions() { bool ok, changed, onLeftSide, onRightSide; ArrayList list = new ArrayList(); @@ -949,16 +979,16 @@ public bool NoCircularProductions() { ok = true; foreach (CNode n in list) { ok = false; - errors.SemErr(" " + n.left.name + " --> " + n.right.name); + errors.SemErr(" " + n.left.name + ":" + n.left.line + " --> " + n.right.name + ":" + n.right.line); } return ok; } - + //--------------- check for LL(1) errors ---------------------- - + void LL1Error(int cond, Symbol sym) { - string s = " LL1 warning in " + curSy.name + ": "; - if (sym != null) s += sym.name + " is "; + string s = " LL1 warning in " + curSy.name + ":" + curSy.line + ":" + curSy.col + ": "; + if (sym != null) s += sym.name + " is "; switch (cond) { case 1: s += "start of several alternatives"; break; case 2: s += "start & successor of deletable structure"; break; @@ -967,22 +997,90 @@ void LL1Error(int cond, Symbol sym) { } errors.Warning(s); } - - void CheckOverlap(BitArray s1, BitArray s2, int cond) { + + int CheckOverlap(BitArray s1, BitArray s2, int cond) { + int overlaped = 0; foreach (Symbol sym in terminals) { - if (s1[sym.n] && s2[sym.n]) LL1Error(cond, sym); + if (s1[sym.n] && s2[sym.n]) { LL1Error(cond, sym); ++overlaped; } + } + return overlaped; + } + + /* print the path for first set that contains token tok for the graph rooted at p */ + void PrintFirstPath(Node p, int tok, string indent = "\t", int depth = 0) + { + //if(p && p.sym) Console.WriteLine("{0}==> {1}:{2}:{3}: {4}", indent, p.sym.name, p.sym.line, p.sym.col, depth); + //else Console.WriteLine("{0}==> xxx:{0}", indent, depth); + while (p != null) + { + //if(p.sym) Console.WriteLine("{0}----> {1}:{2}:{3}: {4}", indent, p.sym.name, p.sym.line, p.sym.col, depth); + switch (p.typ) + { + case Node.nt: + { + if (p.sym.firstReady) + { + if (p.sym.first[tok]) + { + if (indent.Length == 1) Console.WriteLine("{0}=> {1}:{2}:{3}:", indent, p.sym.name, p.line, p.col); + Console.WriteLine("{0}-> {1}:{2}:{3}:", indent, p.sym.name, p.sym.line, p.sym.col); + if (p.sym.graph != null) PrintFirstPath(p.sym.graph, tok, indent + " ", depth + 1); + return; + } + } + break; + } + case Node.t: + case Node.wt: + { + if (p.sym.n == tok) Console.WriteLine("{0}= {1}:{2}:{3}:", indent, p.sym.name, p.line, p.col); + break; + } + case Node.any: + { + break; + } + case Node.alt: + { + PrintFirstPath(p.sub, tok, indent, depth + 1); + PrintFirstPath(p.down, tok, indent, depth + 1); + break; + } + case Node.iter: + case Node.opt: + { + if (!DelNode(p.sub)) //prevent endless loop with some ill grammars + PrintFirstPath(p.sub, tok, indent, depth + 1); + break; + } + } + if (!DelNode(p)) break; + p = p.next; } } - - void CheckAlts(Node p) { + + int CheckAlts(Node p) { BitArray s1, s2; + int rc = 0; while (p != null) { if (p.typ == Node.alt) { Node q = p; s1 = new BitArray(terminals.Count); while (q != null) { // for all alternatives s2 = Expected0(q.sub, curSy); - CheckOverlap(s1, s2, 1); + int overlaped = CheckOverlap(s1, s2, 1); + if (overlaped > 0) + { + int overlapToken = 0; + /* Find the first overlap token */ + foreach (Symbol sym in terminals) + { + if (s1[sym.n] && s2[sym.n]) { overlapToken = sym.n; break; } + } + //Console.WriteLine("\t-> {0}:{1}: {2}", first_overlap.sub.sym.name, first_overlap.sub.sym.line, overlaped); + PrintFirstPath(p, overlapToken); + rc += overlaped; + } s1.Or(s2); CheckAlts(q.sub); q = q.down; @@ -992,7 +1090,19 @@ void CheckAlts(Node p) { else { s1 = Expected0(p.sub, curSy); s2 = Expected(p.next, curSy); - CheckOverlap(s1, s2, 2); + int overlaped = CheckOverlap(s1, s2, 2); + if (overlaped > 0) + { + int overlapToken = 0; + /* Find the first overlap token */ + foreach (Symbol sym in terminals) + { + if (s1[sym.n] && s2[sym.n]) { overlapToken = sym.n; break; } + } + //Console.WriteLine(format("\t=>:{0}: {1}", p.line, overlaped)); + PrintFirstPath(p, overlapToken); + rc += overlaped; + } } CheckAlts(p.sub); } else if (p.typ == Node.any) { @@ -1002,6 +1112,7 @@ void CheckAlts(Node p) { if (p.up) break; p = p.next; } + return rc; } public void CheckLL1() { @@ -1010,13 +1121,13 @@ public void CheckLL1() { CheckAlts(curSy.graph); } } - + //------------- check if resolvers are legal -------------------- - + void ResErr(Node p, string msg) { errors.Warning(p.line, p.pos.col, msg); } - + void CheckRes(Node p, bool rslvAllowed) { while (p != null) { switch (p.typ) { @@ -1041,7 +1152,7 @@ void CheckRes(Node p, bool rslvAllowed) { if (p.sub.typ == Node.rslv) { BitArray fs = First(p.sub.next); BitArray fsNext = Expected(p.next, curSy); - if (!Sets.Intersect(fs, fsNext)) + if (!Sets.Intersect(fs, fsNext)) ResErr(p.sub, "Warning: Misplaced resolver: no LL(1) conflict."); } CheckRes(p.sub, true); @@ -1056,7 +1167,7 @@ void CheckRes(Node p, bool rslvAllowed) { rslvAllowed = false; } } - + public void CheckResolvers() { foreach (Symbol sym in nonterminals) { curSy = sym; @@ -1065,7 +1176,7 @@ public void CheckResolvers() { } //------------- check if every nts has a production -------------------- - + public bool NtsComplete() { bool complete = true; foreach (Symbol sym in nonterminals) { @@ -1076,9 +1187,9 @@ public bool NtsComplete() { } return complete; } - + //-------------- check if every nts can be reached ----------------- - + void MarkReachedNts(Node p) { while (p != null) { if (p.typ == Node.nt && !visited[p.sym.n]) { // new nt reached @@ -1092,7 +1203,7 @@ void MarkReachedNts(Node p) { p = p.next; } } - + public bool AllNtReached() { bool ok = true; visited = new BitArray(nonterminals.Count); @@ -1106,20 +1217,20 @@ public bool AllNtReached() { } return ok; } - + //--------- check if every nts can be derived to terminals ------------ - + bool IsTerm(Node p, BitArray mark) { // true if graph can be derived to terminals while (p != null) { if (p.typ == Node.nt && !mark[p.sym.n]) return false; - if (p.typ == Node.alt && !IsTerm(p.sub, mark) + if (p.typ == Node.alt && !IsTerm(p.sub, mark) && (p.down == null || !IsTerm(p.down, mark))) return false; if (p.up) break; p = p.next; } return true; } - + public bool AllNtToTerm() { bool changed, ok = true; BitArray mark = new BitArray(nonterminals.Count); @@ -1138,11 +1249,11 @@ public bool AllNtToTerm() { } return ok; } - + //--------------------------------------------------------------------- // Cross reference list //--------------------------------------------------------------------- - + public void XRef() { SortedList xref = new SortedList(new SymbolComp()); // collect lines where symbols have been defined @@ -1178,7 +1289,7 @@ public void XRef() { } trace.WriteLine(); trace.WriteLine(); } - + public void SetDDT(string s) { s = s.ToUpper(); foreach (char ch in s) { diff --git a/src/build.sh b/src/build.sh new file mode 100644 index 0000000..e621774 --- /dev/null +++ b/src/build.sh @@ -0,0 +1 @@ +csc /out:Coco.exe /t:exe Coco.cs Scanner.cs Tab.cs DFA.cs ParserGen.cs Parser.cs \ No newline at end of file