From b48d70aa06d61a9cb7e9a5747699f77dfb3bf419 Mon Sep 17 00:00:00 2001 From: John Lapeyre <1969884+jlapeyre@users.noreply.github.com> Date: Wed, 26 Nov 2025 16:04:29 -0500 Subject: [PATCH] Fix bug in pragma lexing * This fixes a bug in which pragmaxyz any characters was lexed as a PRAGMA Same with #pragmaxyz any characters These should instead be lexed as invalid identifiers * Refactor code in oq3_lexer/src/lib.rs for increased clarity and maintainability. --- crates/oq3_lexer/src/lib.rs | 89 +++++++++++-------- crates/oq3_parser/src/grammar/expressions.rs | 3 +- .../oq3_semantics/src/syntax_to_semantics.rs | 18 ++++ crates/pipeline-tests/tests/runner.rs | 2 + ...lid_oq3p__directives__pragma.qasm-lex.snap | 22 +++++ ...id_oq3p__directives__pragma1.qasm-lex.snap | 25 ++++++ ...id_oq3p__directives__pragma2.qasm-lex.snap | 24 +++++ ..._oq3p__directives__pragma2.qasm-parse.snap | 23 +++++ ...id_oq3p__directives__pragma3.qasm-lex.snap | 20 +++++ ..._oq3p__directives__pragma3.qasm-parse.snap | 18 ++++ ...lid_oq3p__directives__pragma.qasm-lex.snap | 18 ++++ ...d_oq3p__directives__pragma.qasm-parse.snap | 17 ++++ ...id_oq3p__directives__pragma.qasm-sema.snap | 14 +++ .../invalid_oq3p/directives/pragma1.qasm | 6 ++ .../invalid_oq3p/directives/pragma2.qasm | 6 ++ .../invalid_oq3p/directives/pragma3.qasm | 6 ++ .../valid_oq3p/directives/pragma.qasm | 6 ++ 17 files changed, 279 insertions(+), 38 deletions(-) create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma.qasm-lex.snap create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma1.qasm-lex.snap create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma2.qasm-lex.snap create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma2.qasm-parse.snap create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma3.qasm-lex.snap create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma3.qasm-parse.snap create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-lex.snap create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-parse.snap create mode 100644 crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-sema.snap create mode 100644 crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma1.qasm create mode 100644 crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma2.qasm create mode 100644 crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma3.qasm create mode 100644 crates/pipeline-tests/tests/snippets/valid_oq3p/directives/pragma.qasm diff --git a/crates/oq3_lexer/src/lib.rs b/crates/oq3_lexer/src/lib.rs index d9d5628..dbb0914 100644 --- a/crates/oq3_lexer/src/lib.rs +++ b/crates/oq3_lexer/src/lib.rs @@ -299,9 +299,11 @@ impl Cursor<'_> { // Whitespace sequence. c if is_whitespace(c) => self.whitespace(), + 'p' => self.pragma_or_ident_or_unknown_prefix(), + // Identifier (this should be checked after other variant that can // start as identifier). - c if is_id_start(c) => self.ident_or_unknown_prefix(c), + c if is_id_start(c) => self.ident_or_unknown_prefix(), // Numeric literal. c @ '0'..='9' => { @@ -320,40 +322,25 @@ impl Cursor<'_> { } '#' => { + // only "pragma" and "dim" may begin with '#' if self.first() == 'p' { - self.bump(); - if self.first() == 'r' { - self.bump(); - if self.first() == 'a' { - self.bump(); - if self.first() == 'g' { - self.bump(); - if self.first() == 'm' { - self.bump(); - if self.first() == 'a' { - self.eat_while(|c| c != '\n'); - let res = Token::new(Pragma, self.pos_within_token()); - self.reset_pos_within_token(); - return res; - } - } - } - } + self.bump(); // have_pragma looks for 'r' + if self.have_pragma() { + // consumes pragma and returns true + Pragma + } else { + InvalidIdent } } else if self.first() == 'd' { - self.bump(); - if self.first() == 'i' { - self.bump(); - if self.first() == 'm' { - self.bump(); - let res = Token::new(Dim, self.pos_within_token()); - self.reset_pos_within_token(); - return res; - } + if self.have_dim() { + // have_dim looks for 'd' + Dim + } else { + InvalidIdent } + } else { + InvalidIdent } - // Only `#pragma` and `#dim` may begin with a pound character - InvalidIdent } '@' => { @@ -502,12 +489,24 @@ impl Cursor<'_> { Whitespace } - fn ident_or_unknown_prefix(&mut self, c: char) -> TokenKind { - debug_assert!(is_id_start(self.prev())); + /// This is called if we just bumped '#' and then see 'd'. + fn have_dim(&mut self) -> bool { + if self.first() == 'd' { + self.bump(); + if self.first() == 'i' { + self.bump(); + if self.first() == 'm' { + self.bump(); + return true; + } + } + } + false + } - // First see if we have "pragma". Everything till the end of the - // line is included in one token. - if c == 'p' && self.first() == 'r' { + /// Syntax for pragma is anything is allowed between #?pragma and the next newline. + fn have_pragma(&mut self) -> bool { + if self.first() == 'r' { self.bump(); if self.first() == 'a' { self.bump(); @@ -516,13 +515,29 @@ impl Cursor<'_> { if self.first() == 'm' { self.bump(); if self.first() == 'a' { - self.eat_while(|c| c != '\n'); - return Pragma; + self.bump(); + if is_whitespace(self.first()) { + self.eat_while(|c| c != '\n'); + return true; + } } } } } } + false + } + + fn pragma_or_ident_or_unknown_prefix(&mut self) -> TokenKind { + if self.have_pragma() { + Pragma + } else { + self.ident_or_unknown_prefix() + } + } + + fn ident_or_unknown_prefix(&mut self) -> TokenKind { + debug_assert!(is_id_start(self.prev())); // Start is already eaten, eat the rest of identifier. self.eat_while(is_id_continue); diff --git a/crates/oq3_parser/src/grammar/expressions.rs b/crates/oq3_parser/src/grammar/expressions.rs index 91b4f28..e3a5757 100644 --- a/crates/oq3_parser/src/grammar/expressions.rs +++ b/crates/oq3_parser/src/grammar/expressions.rs @@ -548,7 +548,8 @@ pub(crate) fn var_name(p: &mut Parser<'_>) { // The declared identifier, ie variable name p.bump_any(); } else { - p.error("Expecting parameter name."); + let kind = p.current(); + p.error(format!("Expecting parameter name. Found {kind:?}")); } m.complete(p, NAME); } diff --git a/crates/oq3_semantics/src/syntax_to_semantics.rs b/crates/oq3_semantics/src/syntax_to_semantics.rs index d9d20c5..53e8411 100644 --- a/crates/oq3_semantics/src/syntax_to_semantics.rs +++ b/crates/oq3_semantics/src/syntax_to_semantics.rs @@ -170,6 +170,7 @@ pub fn analyze_source(parsed_source: T) -> ParseResult { have_syntax_errors: true, }; } + // get_version_string(&parsed_source); let errors = SemanticErrorList::new(file_path.to_path_buf()); let (mut context, errors) = syntax_to_semantic(&parsed_source, context, errors); let _ = replace(&mut context.semantic_errors, errors); @@ -180,6 +181,23 @@ pub fn analyze_source(parsed_source: T) -> ParseResult { } } +// fn get_version_string(parsed_source: &T) -> Option { +// let mut statements = parsed_source.syntax_ast().unwrap().tree().statements(); +// let first_statement = statements.next(); +// dbg!(&first_statement); +// match first_statement { +// Some(synast::Stmt::VersionString(version_string)) => { +// let version = version_string.version().unwrap().version().unwrap(); +// let vparts = version.split_into_parts(); +// dbg!(version.is_simple()); +// dbg!(version.text()); +// dbg!(version); +// None +// } +// _ => None +// } +// } + pub fn syntax_to_semantic( parsed_source: &T, mut context: Context, diff --git a/crates/pipeline-tests/tests/runner.rs b/crates/pipeline-tests/tests/runner.rs index f21613f..002eaaa 100644 --- a/crates/pipeline-tests/tests/runner.rs +++ b/crates/pipeline-tests/tests/runner.rs @@ -154,6 +154,8 @@ mod suite { } // Lexer stage: token stream + oq3_parser::LexedStr diagnostics summary + // We must not call `tokenize` directly. Diagnostic errors are hidden in struct fields. + // `LexedStr` converts them to stored errors that can be retrieved. pub(crate) fn run_lex(src: &str) -> (bool, usize, String) { let lexed = oq3_parser::LexedStr::new(src); let diag_count = if lexed.errors_is_empty() { 0 } else { 1 }; diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma.qasm-lex.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma.qasm-lex.snap new file mode 100644 index 0000000..21fd92d --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma.qasm-lex.snap @@ -0,0 +1,22 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: lex_snap +--- +id: tests/snippets/invalid_oq3p/directives/pragma.qasm +expect-lex: Diag +--- source --- + +#pragmaa 1 2 3 +--- lexer --- +ok: false +errors: 1 +[0] Whitespace "\n" @0..1 +[1] InvalidIdent "#pragma" @1..8 +[2] Ident "a" @8..9 +[3] Whitespace " " @9..10 +[4] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "1" @10..11 +[5] Whitespace " " @11..12 +[6] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "2" @12..13 +[7] Whitespace " " @13..14 +[8] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "3" @14..15 +[9] Whitespace "\n" @15..16 diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma1.qasm-lex.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma1.qasm-lex.snap new file mode 100644 index 0000000..e3959b8 --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma1.qasm-lex.snap @@ -0,0 +1,25 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: lex_snap +--- +id: tests/snippets/invalid_oq3p/directives/pragma1.qasm +expect-lex: Diag +--- source --- + +// Invalid identifier token +#pragmaa 1 2 3 +--- lexer --- +ok: false +errors: 1 +[0] Whitespace "\n" @0..1 +[1] LineComment "// Invalid identifier token" @1..28 +[2] Whitespace "\n" @28..29 +[3] InvalidIdent "#pragma" @29..36 +[4] Ident "a" @36..37 +[5] Whitespace " " @37..38 +[6] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "1" @38..39 +[7] Whitespace " " @39..40 +[8] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "2" @40..41 +[9] Whitespace " " @41..42 +[10] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "3" @42..43 +[11] Whitespace "\n" @43..44 diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma2.qasm-lex.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma2.qasm-lex.snap new file mode 100644 index 0000000..453dad3 --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma2.qasm-lex.snap @@ -0,0 +1,24 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: lex_snap +--- +id: tests/snippets/invalid_oq3p/directives/pragma2.qasm +expect-lex: Ok +--- source --- + +// pragmaa is an ordinary identifier +pragmaa 1 2 3 +--- lexer --- +ok: true +errors: 0 +[0] Whitespace "\n" @0..1 +[1] LineComment "// pragmaa is an ordinary identifier" @1..37 +[2] Whitespace "\n" @37..38 +[3] Ident "pragmaa" @38..45 +[4] Whitespace " " @45..46 +[5] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "1" @46..47 +[6] Whitespace " " @47..48 +[7] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "2" @48..49 +[8] Whitespace " " @49..50 +[9] Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 } "3" @50..51 +[10] Whitespace "\n" @51..52 diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma2.qasm-parse.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma2.qasm-parse.snap new file mode 100644 index 0000000..2c96a21 --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma2.qasm-parse.snap @@ -0,0 +1,23 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: parse_snap +--- +id: tests/snippets/invalid_oq3p/directives/pragma2.qasm +expect-parse: Diag +--- parser --- +ok: false +panicked: false +errors: 4 +--- ast --- +SOURCE_FILE@0..52: +// pragmaa is an ordinary identifier +pragmaa 1 2 3 + +EXPR_STMT@38..45: pragmaa +IDENTIFIER@38..45: pragmaa +EXPR_STMT@46..47: 1 +LITERAL@46..47: 1 +EXPR_STMT@48..49: 2 +LITERAL@48..49: 2 +EXPR_STMT@50..51: 3 +LITERAL@50..51: 3 diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma3.qasm-lex.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma3.qasm-lex.snap new file mode 100644 index 0000000..9783037 --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma3.qasm-lex.snap @@ -0,0 +1,20 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: lex_snap +--- +id: tests/snippets/invalid_oq3p/directives/pragma3.qasm +expect-lex: Ok +--- source --- + +// "pragam" is an invalid identifier. +int pragma = 1; +--- lexer --- +ok: true +errors: 0 +[0] Whitespace "\n" @0..1 +[1] LineComment "// "pragam" is an invalid identifier." @1..38 +[2] Whitespace "\n" @38..39 +[3] Ident "int" @39..42 +[4] Whitespace " " @42..43 +[5] Pragma "pragma = 1;" @43..54 +[6] Whitespace "\n" @54..55 diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma3.qasm-parse.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma3.qasm-parse.snap new file mode 100644 index 0000000..1ad4883 --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__invalid_oq3p__directives__pragma3.qasm-parse.snap @@ -0,0 +1,18 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: parse_snap +--- +id: tests/snippets/invalid_oq3p/directives/pragma3.qasm +expect-parse: Diag +--- parser --- +ok: false +panicked: false +errors: 2 +--- ast --- +SOURCE_FILE@0..55: +// "pragam" is an invalid identifier. +int pragma = 1; + +SCALAR_TYPE@39..42: int +NAME@43..43: +PRAGMA_STATEMENT@43..54: pragma = 1; diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-lex.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-lex.snap new file mode 100644 index 0000000..2c23311 --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-lex.snap @@ -0,0 +1,18 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: lex_snap +--- +id: tests/snippets/valid_oq3p/directives/pragma.qasm +expect-lex: Ok +--- source --- + +pragma (((9217#@%^^^*!@#$%^ +#pragma ))!~==}{ +--- lexer --- +ok: true +errors: 0 +[0] Whitespace "\n" @0..1 +[1] Pragma "pragma (((9217#@%^^^*!@#$%^" @1..28 +[2] Whitespace "\n" @28..29 +[3] Pragma "#pragma ))!~==}{" @29..45 +[4] Whitespace "\n" @45..46 diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-parse.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-parse.snap new file mode 100644 index 0000000..b730442 --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-parse.snap @@ -0,0 +1,17 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: parse_snap +--- +id: tests/snippets/valid_oq3p/directives/pragma.qasm +expect-parse: Ok +--- parser --- +ok: true +panicked: false +errors: 0 +--- ast --- +SOURCE_FILE@0..46: +pragma (((9217#@%^^^*!@#$%^ +#pragma ))!~==}{ + +PRAGMA_STATEMENT@1..28: pragma (((9217#@%^^^*!@#$%^ +PRAGMA_STATEMENT@29..45: #pragma ))!~==}{ diff --git a/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-sema.snap b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-sema.snap new file mode 100644 index 0000000..f0dd2a1 --- /dev/null +++ b/crates/pipeline-tests/tests/snapshots/runner__tests__snippets__valid_oq3p__directives__pragma.qasm-sema.snap @@ -0,0 +1,14 @@ +--- +source: crates/pipeline-tests/tests/runner.rs +expression: sema_snap +--- +id: tests/snippets/valid_oq3p/directives/pragma.qasm +expect-sema: Ok +--- sema --- +ok: true +panicked: false +errors: 0 +--- asg --- +Pragma(Pragma { pragma_text: " (((9217#@%^^^*!@#$%^" }) + +Pragma(Pragma { pragma_text: " ))!~==}{" }) diff --git a/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma1.qasm b/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma1.qasm new file mode 100644 index 0000000..12d4906 --- /dev/null +++ b/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma1.qasm @@ -0,0 +1,6 @@ +// lex: diag +// parse: skip +// sema: skip + +// Invalid identifier token +#pragmaa 1 2 3 diff --git a/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma2.qasm b/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma2.qasm new file mode 100644 index 0000000..56f6d5e --- /dev/null +++ b/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma2.qasm @@ -0,0 +1,6 @@ +// lex: ok +// parse: diag +// sema: skip + +// pragmaa is an ordinary identifier +pragmaa 1 2 3 diff --git a/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma3.qasm b/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma3.qasm new file mode 100644 index 0000000..94e9c93 --- /dev/null +++ b/crates/pipeline-tests/tests/snippets/invalid_oq3p/directives/pragma3.qasm @@ -0,0 +1,6 @@ +// lex: ok +// parse: diag +// sema: skip + +// "pragam" is an invalid identifier. +int pragma = 1; diff --git a/crates/pipeline-tests/tests/snippets/valid_oq3p/directives/pragma.qasm b/crates/pipeline-tests/tests/snippets/valid_oq3p/directives/pragma.qasm new file mode 100644 index 0000000..509c676 --- /dev/null +++ b/crates/pipeline-tests/tests/snippets/valid_oq3p/directives/pragma.qasm @@ -0,0 +1,6 @@ +// lex: ok +// parse: ok +// sema: ok + +pragma (((9217#@%^^^*!@#$%^ +#pragma ))!~==}{