Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions crates/gnomon-parser/src/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,25 @@
use std::ops::Range;

use logos::Logos;

use crate::syntax_kind::SyntaxKind;

/// A single token produced by the lexer.
///
/// Tokens store a byte range into the original source string rather than
/// owning a copy of the text. This avoids one heap allocation per token.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token {
pub kind: SyntaxKind,
pub text: String,
pub span: Range<usize>,
}

impl Token {
/// Get the token text by slicing the original source string.
#[inline]
pub fn text<'a>(&self, source: &'a str) -> &'a str {
&source[self.span.clone()]
}
}

/// Internal logos token enum. Maps 1:1 to the token variants of `SyntaxKind`
Expand Down Expand Up @@ -201,17 +214,19 @@ impl LogosToken {

/// Tokenize the input string into a sequence of tokens.
/// Unrecognized bytes produce `ERROR` tokens.
///
/// Each token stores a byte range referencing the original `input` string.
pub fn lex(input: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut lexer = LogosToken::lexer(input);

while let Some(result) = lexer.next() {
let text = lexer.slice().to_string();
let span = lexer.span();
let kind = match result {
Ok(tok) => tok.to_syntax_kind(),
Err(()) => SyntaxKind::ERROR,
};
tokens.push(Token { kind, text });
tokens.push(Token { kind, span });
}

tokens
Expand All @@ -223,15 +238,10 @@ mod tests {

fn kinds(input: &str) -> Vec<(SyntaxKind, &str)> {
let tokens = lex(input);
// Re-lex from input to get &str slices for comparison
let mut result = Vec::new();
let mut pos = 0;
for tok in &tokens {
let end = pos + tok.text.len();
result.push((tok.kind, &input[pos..end]));
pos = end;
}
result
tokens
.iter()
.map(|tok| (tok.kind, tok.text(input)))
.collect()
}

// ── Ambiguity resolution ─────────────────────────────────────
Expand Down
2 changes: 1 addition & 1 deletion crates/gnomon-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl Parse {
pub fn parse(source: &str) -> Parse {
let preprocessed = preprocess::preprocess(source);
let tokens = lexer::lex(&preprocessed);
let parser = parser::Parser::new(tokens);
let parser = parser::Parser::new(tokens, preprocessed);
let (green_node, errors) = parser.parse();
Parse { green_node, errors }
}
Expand Down
27 changes: 11 additions & 16 deletions crates/gnomon-parser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@ pub struct ParseError {

pub struct Parser {
tokens: Vec<Token>,
/// The original source string; tokens reference it via byte ranges.
source: String,
pos: usize,
builder: GreenNodeBuilder<'static>,
errors: Vec<ParseError>,
/// Cumulative byte offset up to (but not including) `tokens[pos]`.
offset: usize,
}

impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
pub fn new(tokens: Vec<Token>, source: String) -> Self {
Self {
tokens,
source,
pos: 0,
builder: GreenNodeBuilder::new(),
errors: Vec::new(),
offset: 0,
}
}

Expand Down Expand Up @@ -80,7 +80,7 @@ impl Parser {
continue;
}
if remaining == 0 {
return &self.tokens[pos].text;
return self.tokens[pos].text(&self.source);
}
remaining -= 1;
pos += 1;
Expand Down Expand Up @@ -108,8 +108,7 @@ impl Parser {
fn skip_trivia(&mut self) {
while self.pos < self.tokens.len() && self.tokens[self.pos].kind.is_trivia() {
let tok = &self.tokens[self.pos];
self.builder.token(tok.kind.into(), &tok.text);
self.offset += tok.text.len();
self.builder.token(tok.kind.into(), tok.text(&self.source));
self.pos += 1;
}
}
Expand All @@ -119,8 +118,7 @@ impl Parser {
self.skip_trivia();
if self.pos < self.tokens.len() {
let tok = &self.tokens[self.pos];
self.builder.token(tok.kind.into(), &tok.text);
self.offset += tok.text.len();
self.builder.token(tok.kind.into(), tok.text(&self.source));
self.pos += 1;
}
}
Expand All @@ -132,8 +130,7 @@ impl Parser {
self.skip_trivia();
if self.pos < self.tokens.len() {
let tok = &self.tokens[self.pos];
self.builder.token(kind.into(), &tok.text);
self.offset += tok.text.len();
self.builder.token(kind.into(), tok.text(&self.source));
self.pos += 1;
}
}
Expand Down Expand Up @@ -175,16 +172,14 @@ impl Parser {
fn current_range(&self) -> std::ops::Range<usize> {
// Skip trivia to find the actual next token range
let mut pos = self.pos;
let mut off = self.offset;
while pos < self.tokens.len() && self.tokens[pos].kind.is_trivia() {
off += self.tokens[pos].text.len();
pos += 1;
}
if pos < self.tokens.len() {
let len = self.tokens[pos].text.len();
off..off + len
self.tokens[pos].span.clone()
} else {
off..off
let end = self.source.len();
end..end
}
}

Expand Down
Loading