diff --git a/lexer.go b/lexer.go new file mode 100644 index 000000000..0de6277e4 --- /dev/null +++ b/lexer.go @@ -0,0 +1,159 @@ +// go fmt failed with: lexer.go:1:1: expected 'package', found 'import' (and 1 more errors) +import ( + "strings" + "token" + "unicode/utf8" +) + +// generated by Textmapper; DO NOT EDIT + +package + + +// Lexer uses a generated DFA to scan through a utf-8 encoded input string. If +// the string starts with a BOM character, it gets skipped. +type Lexer struct { + source string + + ch rune // current character, -1 means EOI + offset int // character offset + scanOffset int // scanning offset + tokenOffset int // last token byte offset + line int // current line number (1-based) + tokenLine int // last token line + value interface{} + +} + +var bomSeq = "\xef\xbb\xbf" + +// Init prepares the lexer l to tokenize source by performing the full reset +// of the internal state. +func (l *Lexer) Init(source string) { + l.source = source + + l.ch = 0 + l.offset = 0 + l.scanOffset = 0 + l.tokenOffset = 0 + l.line = 1 + l.tokenLine = 1 + + if strings.HasPrefix(source, bomSeq) { + l.offset += len(bomSeq) + } + + l.rewind(l.offset) +} + +// Next finds and returns the next token in l.source. The source end is +// indicated by Token.EOI. +// +// The token text can be retrieved later by calling the Text() method. +func (l *Lexer) Next() token.Type { + l.tokenLine = l.line + l.tokenOffset = l.offset + + state := 0 + for state >= 0 { + var ch int + if uint(l.ch) < tmRuneClassLen { + ch = int(tmRuneClass[l.ch]) + } else if l.ch < 0 { + state = int(tmLexerAction[state*tmNumClasses]) + continue + } else { + ch = 1 + } + state = int(tmLexerAction[state*tmNumClasses+ch]) + if state > tmFirstRule { + if l.ch == '\n' { + l.line++ + } + + // Scan the next character. + // Note: the following code is inlined to avoid performance implications. + l.offset = l.scanOffset + if l.offset < len(l.source) { + r, w := rune(l.source[l.offset]), 1 + if r >= 0x80 { + // not ASCII + r, w = utf8.DecodeRuneInString(l.source[l.offset:]) + } + l.scanOffset += w + l.ch = r + } else { + l.ch = -1 // EOI + } + } + } + + tok := token.Type(tmFirstRule - state) + switch tok { + case token.INVALID_TOKEN: + if l.offset == l.tokenOffset { + if l.ch == -1 { + tok = token.EOI + } + l.rewind(l.scanOffset) + } + } + return tok +} + +// Pos returns the start and end positions of the last token returned by Next(). +func (l *Lexer) Pos() (start, end int) { + start = l.tokenOffset + end = l.offset + return +} + +// Line returns the line number of the last token returned by Next() (1-based). +func (l *Lexer) Line() int { + return l.tokenLine +} + +// Text returns the substring of the input corresponding to the last token. +func (l *Lexer) Text() string { + return l.source[l.tokenOffset:l.offset] +} + +// Value returns the value associated with the last returned token. +func (l *Lexer) Value() interface{} { + return l.value +} + +// Copy forks the lexer in its current state. +func (l *Lexer) Copy() Lexer { + ret := *l + return ret +} + +// rewind can be used in lexer actions to accept a portion of a scanned token, or to include +// more text into it. +func (l *Lexer) rewind(offset int) { + if offset < l.offset { + l.line -= strings.Count(l.source[offset:l.offset], "\n") + } else { + if offset > len(l.source) { + offset = len(l.source) + } + l.line += strings.Count(l.source[l.offset:offset], "\n") + } + + // Scan the next character. + l.scanOffset = offset + l.offset = offset + if l.offset < len(l.source) { + r, w := rune(l.source[l.offset]), 1 + if r >= 0x80 { + // not ASCII + r, w = utf8.DecodeRuneInString(l.source[l.offset:]) + } + l.scanOffset += w + l.ch = r + } else { + l.ch = -1 // EOI + } +} + diff --git a/lexer_tables.go b/lexer_tables.go new file mode 100644 index 000000000..1b77d32c9 --- /dev/null +++ b/lexer_tables.go @@ -0,0 +1,20 @@ +// go fmt failed with: lexer_tables.go:5:1: expected 'IDENT', found 'const' +// generated by Textmapper; DO NOT EDIT + +package + +const tmNumClasses = 5 + +var tmRuneClass = []uint8{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, +} + +const tmRuneClassLen = 100 +const tmFirstRule = -1 + +var tmLexerAction = []int8{ + -2, -2, 3, 2, 1, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, +} \ No newline at end of file diff --git a/ls/server.go b/ls/server.go index 07ce94688..9bc365764 100644 --- a/ls/server.go +++ b/ls/server.go @@ -59,7 +59,9 @@ func (s *Server) Initialize(ctx context.Context, params *lsp.InitializeParams) ( OpenClose: true, Change: lsp.TextDocumentSyncKindFull, }, - DefinitionProvider: true, + DefinitionProvider: true, + DocumentSymbolProvider: true, + HoverProvider: true, }, } return ret, nil @@ -134,6 +136,57 @@ func (s *Server) typecheck(ctx context.Context, uri lsp.DocumentURI, version uin func keepGoing(err tm.SyntaxError) bool { return true } +func (s *Server) DocumentSymbol(ctx context.Context, params *lsp.DocumentSymbolParams) (result []any, err error) { + filename := params.TextDocument.URI.Filename() + doc := s.docs[filename] + if doc == nil { + return nil, fmt.Errorf("%s is not opened", filename) + } + + tree, err := ast.Parse(ctx, filename, doc.content, keepGoing) + if err != nil || tree == nil { + s.logger.Info("failed to parse for document symbols", zap.String("filename", filename), zap.Error(err)) + return []any{}, nil + } + + symbols := s.collectDocumentSymbols(tree, params.TextDocument.URI) + s.logger.Info("document symbols", zap.String("filename", filename), zap.Int("count", len(symbols))) + + // Convert to []any for LSP compatibility + result = make([]any, len(symbols)) + for i, sym := range symbols { + result[i] = sym + } + return result, nil +} + +func (s *Server) Hover(ctx context.Context, params *lsp.HoverParams) (*lsp.Hover, error) { + filename := params.TextDocument.URI.Filename() + doc := s.docs[filename] + if doc == nil { + return nil, fmt.Errorf("%s is not opened", filename) + } + + cursor, err := resolvePosition(doc.content, params.Position) + if err != nil { + return nil, err + } + + tree, err := ast.Parse(ctx, filename, doc.content, keepGoing) + if err != nil || tree == nil { + s.logger.Info("failed to parse for hover", zap.String("filename", filename), zap.Error(err)) + return nil, nil + } + + hover := s.getHoverInfo(tree, cursor, doc.content) + if hover == nil { + return nil, nil + } + + s.logger.Info("hover", zap.String("filename", filename), zap.Int("cursor", cursor)) + return hover, nil +} + func (s *Server) Definition(ctx context.Context, params *lsp.DefinitionParams) (result []lsp.Location, err error) { filename := params.TextDocument.URI.Filename() doc := s.docs[filename] @@ -268,3 +321,300 @@ func resolvePosition(content string, pos lsp.Position) (int, error) { } return ret, nil } + +// collectDocumentSymbols extracts terminals and nonterminals from the grammar +func (s *Server) collectDocumentSymbols(tree *ast.Tree, uri lsp.DocumentURI) []lsp.DocumentSymbol { + var symbols []lsp.DocumentSymbol + + // Walk the AST to find terminals and nonterminals + var visitor func(n *ast.Node) + visitor = func(n *ast.Node) { + switch n.Type() { + case tm.Lexeme: + // This is a terminal (lexeme definition) + if name := n.Child(selector.Identifier); name.IsValid() { + line, col := name.LineColumn() + symbols = append(symbols, lsp.DocumentSymbol{ + Name: name.Text(), + Kind: lsp.SymbolKindConstant, + Detail: "terminal", + Range: lsp.Range{ + Start: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1)}, + End: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1 + len(name.Text()))}, + }, + SelectionRange: lsp.Range{ + Start: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1)}, + End: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1 + len(name.Text()))}, + }, + }) + } + + case tm.Nonterm: + // This is a nonterminal (rule definition) + if name := n.Child(selector.Identifier); name.IsValid() { + line, col := name.LineColumn() + + // Get the rule text for detail + detail := "nonterminal" + if ruleBody := n.Child(selector.Rule0); ruleBody.IsValid() { + // Truncate long rule bodies for readability + ruleText := strings.TrimSpace(ruleBody.Text()) + if len(ruleText) > 100 { + ruleText = ruleText[:97] + "..." + } + detail = fmt.Sprintf("nonterminal: %s", ruleText) + } + + symbols = append(symbols, lsp.DocumentSymbol{ + Name: name.Text(), + Kind: lsp.SymbolKindFunction, + Detail: detail, + Range: lsp.Range{ + Start: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1)}, + End: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1 + len(name.Text()))}, + }, + SelectionRange: lsp.Range{ + Start: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1)}, + End: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1 + len(name.Text()))}, + }, + }) + } + } + + // Recurse into children + for ch := n.Child(selector.Any); ch.IsValid(); ch = ch.Next(selector.Any) { + visitor(ch) + } + } + + visitor(tree.Root()) + return symbols +} + +// getHoverInfo provides hover information for the symbol at the given cursor position +func (s *Server) getHoverInfo(tree *ast.Tree, cursor int, fileContent string) *lsp.Hover { + // Find the AST node at the cursor position + var targetNode *ast.Node + var visitor func(n *ast.Node) + visitor = func(n *ast.Node) { + if n.Offset() <= cursor && cursor < n.Endoffset() { + targetNode = n + } + for ch := n.Child(selector.Any); ch.IsValid(); ch = ch.Next(selector.Any) { + visitor(ch) + } + } + visitor(tree.Root()) + + if targetNode == nil { + return nil + } + + // Check if we're hovering over a symbol reference or definition + var symbolName string + var symbolNode *ast.Node + + // Walk up the tree to find if we're in a symbol reference or definition + // We need to manually walk up since Parent() is not public + symbolNode = s.findParentIdentifier(tree, targetNode) + if symbolNode != nil && symbolNode.Type() == tm.Identifier { + symbolName = symbolNode.Text() + } + + if symbolName == "" { + return nil + } + + // Find the definition of this symbol + symbolDef := s.findSymbolDefinition(tree, symbolName) + if symbolDef == nil { + return nil + } + + // Create hover content + var hoverContent []string + + // Add original definition + defText := s.getDefinitionText(symbolDef, tree.Text()) + if defText != "" { + hoverContent = append(hoverContent, "**Definition:**") + hoverContent = append(hoverContent, "```textmapper") + hoverContent = append(hoverContent, defText) + hoverContent = append(hoverContent, "```") + } + + // Add expanded/desugared version for nonterminals + if symbolDef.symbolType == "nonterminal" { + expanded := s.getExpandedDefinition(symbolDef, tree.Text()) + if expanded != "" && expanded != defText { + hoverContent = append(hoverContent, "") + hoverContent = append(hoverContent, "**Expanded:**") + hoverContent = append(hoverContent, "```textmapper") + hoverContent = append(hoverContent, expanded) + hoverContent = append(hoverContent, "```") + } + } + + if len(hoverContent) == 0 { + return nil + } + + line, col := symbolNode.LineColumn() + return &lsp.Hover{ + Contents: lsp.MarkupContent{ + Kind: lsp.Markdown, + Value: strings.Join(hoverContent, "\n"), + }, + Range: &lsp.Range{ + Start: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1)}, + End: lsp.Position{Line: uint32(line - 1), Character: uint32(col - 1 + len(symbolName))}, + }, + } +} + +type symbolDefinition struct { + name string + symbolType string // "terminal" or "nonterminal" + node *ast.Node + ruleNode *ast.Node // For nonterminals, the rule body +} + +// findParentIdentifier finds an identifier node that contains the target node +func (s *Server) findParentIdentifier(tree *ast.Tree, targetNode *ast.Node) *ast.Node { + // If the target node itself is an identifier, return it + if targetNode.Type() == tm.Identifier { + return targetNode + } + + // Look for identifier nodes that contain the target cursor position + var identifierNode *ast.Node + var visitor func(n *ast.Node) + visitor = func(n *ast.Node) { + if n.Type() == tm.Identifier { + if n.Offset() <= targetNode.Offset() && targetNode.Endoffset() <= n.Endoffset() { + identifierNode = n + } + } + for ch := n.Child(selector.Any); ch.IsValid(); ch = ch.Next(selector.Any) { + visitor(ch) + } + } + + visitor(tree.Root()) + return identifierNode +} + +// findSymbolDefinition locates the definition of a symbol by name +func (s *Server) findSymbolDefinition(tree *ast.Tree, symbolName string) *symbolDefinition { + var result *symbolDefinition + + var visitor func(n *ast.Node) + visitor = func(n *ast.Node) { + switch n.Type() { + case tm.Lexeme: + if name := n.Child(selector.Identifier); name.IsValid() && name.Text() == symbolName { + result = &symbolDefinition{ + name: symbolName, + symbolType: "terminal", + node: n, + } + return + } + + case tm.Nonterm: + if name := n.Child(selector.Identifier); name.IsValid() && name.Text() == symbolName { + result = &symbolDefinition{ + name: symbolName, + symbolType: "nonterminal", + node: n, + ruleNode: n.Child(selector.Rule0), + } + return + } + } + + // Continue searching if not found + if result == nil { + for ch := n.Child(selector.Any); ch.IsValid(); ch = ch.Next(selector.Any) { + visitor(ch) + } + } + } + + visitor(tree.Root()) + return result +} + +// getDefinitionText extracts the definition text without semantic actions +func (s *Server) getDefinitionText(def *symbolDefinition, content string) string { + if def.node == nil { + return "" + } + + // Get the text of the definition + text := def.node.Text() + + // For nonterminals, remove semantic actions + if def.symbolType == "nonterminal" { + text = s.removeSemanticActions(text) + } + + return text +} + +// getExpandedDefinition returns the expanded/desugared version of a nonterminal +func (s *Server) getExpandedDefinition(def *symbolDefinition, content string) string { + if def.symbolType != "nonterminal" || def.ruleNode == nil { + return "" + } + + // For now, return the same as definition text but with expanded syntax sugar removed + // This is a simplified implementation - a full implementation would expand + // operators like ?, *, +, etc. + text := def.ruleNode.Text() + expanded := s.expandSyntaxSugar(s.removeSemanticActions(text)) + return fmt.Sprintf("%s: %s", def.name, expanded) +} + +// removeSemanticActions removes { ... } blocks from rule text +func (s *Server) removeSemanticActions(text string) string { + var result strings.Builder + braceDepth := 0 + inAction := false + + for _, r := range text { + if r == '{' { + if braceDepth == 0 { + inAction = true + } + braceDepth++ + } else if r == '}' { + braceDepth-- + if braceDepth == 0 { + inAction = false + } + } else if !inAction { + result.WriteRune(r) + } + } + + // Clean up extra whitespace + return strings.Join(strings.Fields(result.String()), " ") +} + +// expandSyntaxSugar provides basic expansion of syntax sugar (simplified) +func (s *Server) expandSyntaxSugar(text string) string { + // This is a simplified expansion - a full implementation would parse the grammar + // and properly expand operators + + // Replace some common patterns + text = strings.ReplaceAll(text, "?", "| %empty") + + // For * and +, this would need more complex parsing to properly handle + // For now, just add a comment about expansion + if strings.Contains(text, "*") || strings.Contains(text, "+") { + text = text + " /* list expansion omitted for brevity */" + } + + return text +} diff --git a/parser.go b/parser.go new file mode 100644 index 000000000..68ff88c31 --- /dev/null +++ b/parser.go @@ -0,0 +1,188 @@ +// go fmt failed with: parser.go:1:1: expected 'package', found 'import' (and 1 more errors) +import ( + "fmt" + "token" +) + +// generated by Textmapper; DO NOT EDIT + +package + +// Parser is a table-driven LALR parser for test. +type Parser struct { + + next symbol + +} + +type SyntaxError struct { + Line int + Offset int + Endoffset int +} + +func (e SyntaxError) Error() string { + return fmt.Sprintf("syntax error at line %v", e.Line) +} + +type symbol struct { + symbol int32 + offset int + endoffset int +} + +type stackEntry struct { + sym symbol + state int8 +} + +func (p *Parser) Init() { +} + +const ( + startStackSize = 256 + noToken = int32(token.UNAVAILABLE) + eoiToken = int32(token.EOI) + debugSyntax = false +) + +func (p *Parser) Parse(lexer *Lexer) error { + return p.parse(0, 8, lexer) +} + +func (p *Parser) parse(start, end int8, lexer *Lexer) error { + state := start + + var alloc [startStackSize]stackEntry + stack := append(alloc[:0], stackEntry{state: state}) + p.fetchNext(lexer, stack) + + for state != end { + action := tmAction[state] + if action < -2 { + // Lookahead is needed. + if p.next.symbol == noToken { + p.fetchNext(lexer, stack) + } + action = lalr(action, p.next.symbol) + } + + if action >= 0 { + // Reduce. + rule := action + ln := int(tmRuleLen[rule]) + + var entry stackEntry + entry.sym.symbol = tmRuleSymbol[rule] + rhs := stack[len(stack)-ln:] + if ln == 0 { + if p.next.symbol == noToken { + p.fetchNext(lexer, stack) + } + entry.sym.offset, entry.sym.endoffset = p.next.offset, p.next.offset + } else { + entry.sym.offset = rhs[0].sym.offset + entry.sym.endoffset = rhs[ln-1].sym.endoffset + } + if err := p.applyRule(rule, &entry, stack, lexer); err != nil { + return err + } + stack = stack[:len(stack)-len(rhs)] + if debugSyntax { + fmt.Printf("reduced to: %v\n", symbolName(entry.sym.symbol)) + } + state = gotoState(stack[len(stack)-1].state, entry.sym.symbol) + entry.state = state + stack = append(stack, entry) + + } else if action == -1 { + // Shift. + if p.next.symbol == noToken { + p.fetchNext(lexer, stack) + } + state = gotoState(state, p.next.symbol) + if state >= 0 { + stack = append(stack, stackEntry{ + sym: p.next, + state: state, + }) + if debugSyntax { + fmt.Printf("shift: %v (%s)\n", symbolName(p.next.symbol), lexer.Text()) + } + if p.next.symbol != eoiToken { + p.next.symbol = noToken + } + } + } + + if action == -2 || state == -1 { + break + } + } + + if state != end { + if p.next.symbol == noToken { + p.fetchNext(lexer, stack) + } + err := SyntaxError{ + Line: lexer.Line(), + Offset: p.next.offset, + Endoffset: p.next.endoffset, + } + return err + } + + return nil +} + +func lalr(action, next int32) int32 { + a := -action - 3 + for ; tmLalr[a] >= 0; a += 2 { + if tmLalr[a] == next { + break + } + } + return tmLalr[a+1] +} + +func gotoState(state int8, symbol int32) int8 { + min := tmGoto[symbol] + max := tmGoto[symbol+1] + + if max-min < 32 { + for i := min; i < max; i += 2 { + if tmFromTo[i] == state { + return tmFromTo[i+1] + } + } + } else { + for min < max { + e := (min + max) >> 1 &^ int32(1) + i := tmFromTo[e] + if i == state { + return tmFromTo[e+1] + } else if i < state { + min = e + 2 + } else { + max = e + } + } + } + return -1 +} + +func (p *Parser) fetchNext(lexer *Lexer, stack []stackEntry) { +restart: + tok := lexer.Next() + switch tok { + case token.INVALID_TOKEN: + goto restart + } + p.next.symbol = int32(tok) + p.next.offset, p.next.endoffset = lexer.Pos() +} + +func (p *Parser) applyRule(rule int32, lhs *stackEntry, stack []stackEntry, lexer *Lexer) (err error) { + return +} + diff --git a/parser_tables.go b/parser_tables.go new file mode 100644 index 000000000..f4c985e27 --- /dev/null +++ b/parser_tables.go @@ -0,0 +1,60 @@ +// go fmt failed with: parser_tables.go:5:1: expected 'IDENT', found 'var' (and 1 more errors) +// generated by Textmapper; DO NOT EDIT + +package + +var + +import ( + "fmt" + "token" +) tmNonterminals = [...]string{ + "Xyz_list", + "input", + "Xyz", +} + +func symbolName(sym int32) string { + if sym == noToken { + return "" + } + if sym < int32(token.NumTokens) { + return token.Type(sym).String() + } + if i := int(sym) - int(token.NumTokens); i < len(tmNonterminals) { + return tmNonterminals[i] + } + return fmt.Sprintf("nonterminal(%d)", sym) +} + +var tmAction = []int32{ + -1, -1, 3, 4, -3, 1, 0, -1, -2, +} + +var tmLalr = []int32{ + 3, -1, 4, -1, 0, 2, -1, -2, +} + +var tmGoto = []int32{ + 0, 2, 2, 4, 8, 12, 14, 16, 20, +} + +var tmFromTo = []int8{ + 7, 8, 0, 1, 1, 2, 4, 2, 1, 3, 4, 3, 1, 4, 0, 7, 1, 5, 4, 6, +} + +var tmRuleLen = []int8{ + 2, 1, 2, 1, 1, +} + +var tmRuleSymbol = []int32{ + 5, 5, 6, 7, 7, +} + +var tmRuleType = [...]NodeType{ + 0, // Xyz_list : Xyz_list Xyz + 0, // Xyz_list : Xyz + 0, // input : 'a' Xyz_list + 0, // Xyz : 'b' + 0, // Xyz : 'c' +} diff --git a/textmapper b/textmapper new file mode 100755 index 000000000..93b37c288 Binary files /dev/null and b/textmapper differ diff --git a/token/token.go b/token/token.go new file mode 100644 index 000000000..a4d4db35c --- /dev/null +++ b/token/token.go @@ -0,0 +1,37 @@ +// generated by Textmapper; DO NOT EDIT + +package token + +import ( + "fmt" +) + +// Type is an enum of all terminal symbols of the test language. +type Type int32 + +// Token values. +const ( + UNAVAILABLE Type = iota - 1 + EOI + INVALID_TOKEN + CHAR_A // a + CHAR_B // b + CHAR_C // c + + NumTokens +) + +var tokenStr = [...]string{ + "EOI", + "INVALID_TOKEN", + "a", + "b", + "c", +} + +func (tok Type) String() string { + if tok >= 0 && int(tok) < len(tokenStr) { + return tokenStr[tok] + } + return fmt.Sprintf("token(%d)", tok) +}