Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/oq3_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ use unicode_properties::UnicodeEmoji;
#[derive(Debug)]
pub struct Token {
pub kind: TokenKind,

/// The length in bytes of text associated with `kind`.
pub len: u32,
}

Expand Down
5 changes: 5 additions & 0 deletions crates/oq3_parser/src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use crate::SyntaxKind;
type bits = u64;

// FIXME GJL `LexerToken` does not appear anywhere in the r-a project.
// `LexerToken` seems to refer to output of `oq3_lexer::tokenize` and `LexedStr`
// (also present in r-a). These *do* preserve whitespace and comments.
/// Input for the parser -- a sequence of tokens.
///
/// As of now, parser doesn't have access to the *text* of the tokens, and makes
Expand All @@ -18,7 +20,10 @@ type bits = u64;
/// Struct of arrays internally, but this shouldn't really matter.
#[derive(Default)]
pub struct Input {
/// SyntaxKind has u16 variants
kind: Vec<SyntaxKind>,

/// Account for whitespace/comments dropped on construction
joint: Vec<bits>,
contextual_kind: Vec<SyntaxKind>,
}
Expand Down
12 changes: 12 additions & 0 deletions crates/oq3_parser/src/lexed_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,26 @@ use crate::{
};

pub struct LexedStr<'a> {
/// The input source text
text: &'a str,

/// Stores translation of stream of `Token`s
kind: Vec<SyntaxKind>,

/// Byte offset for start of each text span tagged in `kind`.
/// `start.len() == kind.len()`.
start: Vec<u32>,

/// `Token` flagged as invalid produce a `LexError` as well as a `SyntaxKind`.
error: Vec<LexError>,
}

// TODO: Might be good to replace `msg` with a small `enum`.
struct LexError {
/// One of a small set of error messages.
msg: String,

/// Index into `LexedStr.kind`
token: u32,
}

Expand Down
2 changes: 2 additions & 0 deletions crates/oq3_parser/src/shortcuts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ pub enum StrStep<'a> {
}

impl LexedStr<'_> {
// `was_joint` is used to fix index into text when omitting whitespace/comments
pub fn to_input(&self) -> crate::Input {
let mut res = crate::Input::default();
let mut was_joint = false;
for i in 0..self.len() {
let kind = self.kind(i);
if kind.is_trivia() {
// whitespace or comment
was_joint = false
} else {
if kind == SyntaxKind::IDENT {
Expand Down
4 changes: 4 additions & 0 deletions crates/oq3_syntax/src/parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ pub fn parse_text(openqasm_code_text: &str) -> (GreenNode, Vec<SyntaxError>) {
/// Lex `openqasm_code_text`. If there are no lexing errors, parse the result
/// returning the AST as `Option<GreenNode>`, as well as errors.
/// If lexing errors do occur, do no parsing, but rather, return the lexing errors.
///
/// `LexedStr::new` calls `oq3_parser::tokenize(..)` to produce a stream of `Token`s.
/// `LexedStr::new` translates this stream into `Vec<SyntaxKind>` plus offset and error information.
/// Data from previous step is converted to `Input`.
pub fn parse_text_check_lex(openqasm_code_text: &str) -> (Option<GreenNode>, Vec<SyntaxError>) {
let lexed = oq3_parser::LexedStr::new(openqasm_code_text);
if !lexed.errors_is_empty() {
Expand Down
Loading