Qiskit · jlapeyre · Nov 29, 2025 · Nov 28, 2025
diff --git a/crates/oq3_lexer/src/lib.rs b/crates/oq3_lexer/src/lib.rs
@@ -38,6 +38,8 @@ use unicode_properties::UnicodeEmoji;
 #[derive(Debug)]
 pub struct Token {
     pub kind: TokenKind,
+
+    /// The length in bytes of text associated with `kind`.
     pub len: u32,
 }
 

diff --git a/crates/oq3_parser/src/input.rs b/crates/oq3_parser/src/input.rs
@@ -9,6 +9,8 @@ use crate::SyntaxKind;
 type bits = u64;
 
 // FIXME GJL `LexerToken` does not appear anywhere in the r-a project.
+// `LexerToken` seems to refer to output of `oq3_lexer::tokenize` and `LexedStr`
+// (also present in r-a). These *do* preserve whitespace and comments.
 /// Input for the parser -- a sequence of tokens.
 ///
 /// As of now, parser doesn't have access to the *text* of the tokens, and makes
@@ -18,7 +20,10 @@ type bits = u64;
 /// Struct of arrays internally, but this shouldn't really matter.
 #[derive(Default)]
 pub struct Input {
+    /// SyntaxKind has u16 variants
     kind: Vec<SyntaxKind>,
+
+    /// Account for whitespace/comments dropped on construction
     joint: Vec<bits>,
     contextual_kind: Vec<SyntaxKind>,
 }

diff --git a/crates/oq3_parser/src/lexed_str.rs b/crates/oq3_parser/src/lexed_str.rs
@@ -20,14 +20,26 @@ use crate::{
 };
 
 pub struct LexedStr<'a> {
+    /// The input source text
     text: &'a str,
+
+    /// Stores translation of stream of `Token`s
     kind: Vec<SyntaxKind>,
+
+    /// Byte offset for start of each text span tagged in `kind`.
+    /// `start.len() == kind.len()`.
     start: Vec<u32>,
+
+    /// `Token` flagged as invalid produce a `LexError` as well as a `SyntaxKind`.
     error: Vec<LexError>,
 }
 
+// TODO: Might be good to replace `msg` with a small `enum`.
 struct LexError {
+    /// One of a small set of error messages.
     msg: String,
+
+    /// Index into `LexedStr.kind`
     token: u32,
 }
 

diff --git a/crates/oq3_parser/src/shortcuts.rs b/crates/oq3_parser/src/shortcuts.rs
@@ -28,12 +28,14 @@ pub enum StrStep<'a> {
 }
 
 impl LexedStr<'_> {
+    // `was_joint` is used to fix index into text when omitting whitespace/comments
     pub fn to_input(&self) -> crate::Input {
         let mut res = crate::Input::default();
         let mut was_joint = false;
         for i in 0..self.len() {
             let kind = self.kind(i);
             if kind.is_trivia() {
+                // whitespace or comment
                 was_joint = false
             } else {
                 if kind == SyntaxKind::IDENT {

diff --git a/crates/oq3_syntax/src/parsing.rs b/crates/oq3_syntax/src/parsing.rs
@@ -20,6 +20,10 @@ pub fn parse_text(openqasm_code_text: &str) -> (GreenNode, Vec<SyntaxError>) {
 /// Lex `openqasm_code_text`. If there are no lexing errors, parse the result
 /// returning the AST as `Option<GreenNode>`, as well as errors.
 /// If lexing errors do occur, do no parsing, but rather, return the lexing errors.
+///
+/// `LexedStr::new` calls `oq3_parser::tokenize(..)` to produce a stream of `Token`s.
+/// `LexedStr::new` translates this stream into `Vec<SyntaxKind>` plus offset and error information.
+/// Data from previous step is converted to `Input`.
 pub fn parse_text_check_lex(openqasm_code_text: &str) -> (Option<GreenNode>, Vec<SyntaxError>) {
     let lexed = oq3_parser::LexedStr::new(openqasm_code_text);
     if !lexed.errors_is_empty() {