From 773b97eae9669e57e93ab615ca700b5ca0f5a1c6 Mon Sep 17 00:00:00 2001
From: John Lapeyre <1969884+jlapeyre@users.noreply.github.com>
Date: Thu, 27 Nov 2025 19:39:56 -0500
Subject: [PATCH] Add comments on parser structs

---
 crates/oq3_lexer/src/lib.rs        |  2 ++
 crates/oq3_parser/src/input.rs     |  5 +++++
 crates/oq3_parser/src/lexed_str.rs | 12 ++++++++++++
 crates/oq3_parser/src/shortcuts.rs |  2 ++
 crates/oq3_syntax/src/parsing.rs   |  4 ++++
 5 files changed, 25 insertions(+)
diff --git a/crates/oq3_lexer/src/lib.rs b/crates/oq3_lexer/src/lib.rs
index eda07a9..c895d3e 100644
--- a/crates/oq3_lexer/src/lib.rs
+++ b/crates/oq3_lexer/src/lib.rs
@@ -38,6 +38,8 @@ use unicode_properties::UnicodeEmoji;
 #[derive(Debug)]
 pub struct Token {
     pub kind: TokenKind,
+
+    /// The length in bytes of text associated with `kind`.
     pub len: u32,
 }
 
diff --git a/crates/oq3_parser/src/input.rs b/crates/oq3_parser/src/input.rs
index 6a1338d..d6664ad 100644
--- a/crates/oq3_parser/src/input.rs
+++ b/crates/oq3_parser/src/input.rs
@@ -9,6 +9,8 @@ use crate::SyntaxKind;
 type bits = u64;
 
 // FIXME GJL `LexerToken` does not appear anywhere in the r-a project.
+// `LexerToken` seems to refer to output of `oq3_lexer::tokenize` and `LexedStr`
+// (also present in r-a). These *do* preserve whitespace and comments.
 /// Input for the parser -- a sequence of tokens.
 ///
 /// As of now, parser doesn't have access to the *text* of the tokens, and makes
@@ -18,7 +20,10 @@ type bits = u64;
 /// Struct of arrays internally, but this shouldn't really matter.
 #[derive(Default)]
 pub struct Input {
+    /// SyntaxKind has u16 variants
     kind: Vec<SyntaxKind>,
+
+    /// Account for whitespace/comments dropped on construction
     joint: Vec<bits>,
     contextual_kind: Vec<SyntaxKind>,
 }
diff --git a/crates/oq3_parser/src/lexed_str.rs b/crates/oq3_parser/src/lexed_str.rs
index 7b467ce..fc125c4 100644
--- a/crates/oq3_parser/src/lexed_str.rs
+++ b/crates/oq3_parser/src/lexed_str.rs
@@ -20,14 +20,26 @@ use crate::{
 };
 
 pub struct LexedStr<'a> {
+    /// The input source text
     text: &'a str,
+
+    /// Stores translation of stream of `Token`s
     kind: Vec<SyntaxKind>,
+
+    /// Byte offset for start of each text span tagged in `kind`.
+    /// `start.len() == kind.len()`.
     start: Vec<u32>,
+
+    /// `Token` flagged as invalid produce a `LexError` as well as a `SyntaxKind`.
     error: Vec<LexError>,
 }
 
+// TODO: Might be good to replace `msg` with a small `enum`.
 struct LexError {
+    /// One of a small set of error messages.
     msg: String,
+
+    /// Index into `LexedStr.kind`
     token: u32,
 }
 
diff --git a/crates/oq3_parser/src/shortcuts.rs b/crates/oq3_parser/src/shortcuts.rs
index 668416b..c6490a3 100644
--- a/crates/oq3_parser/src/shortcuts.rs
+++ b/crates/oq3_parser/src/shortcuts.rs
@@ -28,12 +28,14 @@ pub enum StrStep<'a> {
 }
 
 impl LexedStr<'_> {
+    // `was_joint` is used to fix index into text when omitting whitespace/comments
     pub fn to_input(&self) -> crate::Input {
         let mut res = crate::Input::default();
         let mut was_joint = false;
         for i in 0..self.len() {
             let kind = self.kind(i);
             if kind.is_trivia() {
+                // whitespace or comment
                 was_joint = false
             } else {
                 if kind == SyntaxKind::IDENT {
diff --git a/crates/oq3_syntax/src/parsing.rs b/crates/oq3_syntax/src/parsing.rs
index 1b9f72d..3fd4942 100644
--- a/crates/oq3_syntax/src/parsing.rs
+++ b/crates/oq3_syntax/src/parsing.rs
@@ -20,6 +20,10 @@ pub fn parse_text(openqasm_code_text: &str) -> (GreenNode, Vec<SyntaxError>) {
 /// Lex `openqasm_code_text`. If there are no lexing errors, parse the result
 /// returning the AST as `Option<GreenNode>`, as well as errors.
 /// If lexing errors do occur, do no parsing, but rather, return the lexing errors.
+///
+/// `LexedStr::new` calls `oq3_parser::tokenize(..)` to produce a stream of `Token`s.
+/// `LexedStr::new` translates this stream into `Vec<SyntaxKind>` plus offset and error information.
+/// Data from previous step is converted to `Input`.
 pub fn parse_text_check_lex(openqasm_code_text: &str) -> (Option<GreenNode>, Vec<SyntaxError>) {
     let lexed = oq3_parser::LexedStr::new(openqasm_code_text);
     if !lexed.errors_is_empty() {