diff --git a/CHANGELOG.md b/CHANGELOG.md index 51f1a07..db7ada1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,57 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.6] - 2025-12-26 + +### Added + +- **AST Support in Generated Parsers** + - Generated CST parsers now include `AstNode` type and `parseAst()` method + - Allows parsing directly to AST (without trivia) from generated parsers + +- **Packrat Toggle in Generated Parsers** + - Added `setPackratEnabled(boolean)` method to generated parsers + - Allows disabling memoization at runtime to reduce memory usage for large inputs + +- **Unlimited Action Variable Support** + - Action code now supports unlimited `$N` positional variables (previously limited to `$1-$20`) + - Uses regex-based substitution for flexibility + +### Fixed + +- **Grammar Validation** + - Implemented `Grammar.validate()` to detect undefined rule references + - Recursively walks all expressions and reports first undefined reference with location + - Previously, grammars with typos in rule names would fail at parse time with cryptic errors + +- **Thread Safety in Whitespace Skipping** + - Moved `skippingWhitespace` flag from `PegEngine` (per-instance) to `ParsingContext` (per-parse) + - Fixes potential race conditions when reusing parser instances across threads + +- **Packrat Cache Key Collision Risk** + - Changed cache key from `hashCode()` to unique sequential IDs + - Eliminates theoretical collision bugs with different rule names having same hash + +### Changed + +- **Builder API Naming Standardized** + - `PegParser.Builder` methods renamed for consistency: `withPackrat()` → `packrat()`, `withTrivia()` → `trivia()`, `withErrorRecovery()` → `recovery()` + - Removed duplicate `ParserConfig.Builder` (unused) + +- **Documentation Cleanup** + - Removed undocumented `%word` directive from documentation (feature not implemented) + - Removed unused placeholder `skipWhitespace()` method from `ParsingContext` + +- **Code Simplification** + - Consolidated 3 duplicate expression parsing switch statements into unified `parseExpressionWithMode()` + - Extracted `buildParseError()` helper to eliminate duplicate error message construction + - Removed unused `SemanticValues.choice` field and getter + - Removed unused `SourceLocation.advanceColumn()`/`advanceLine()` methods + - ~120 lines of duplicate code eliminated + +- Test count: 268 → 271 +- Updated pragmatica-lite dependency: 0.8.4 → 0.9.0 + ## [0.1.5] - 2025-12-22 ### Fixed diff --git a/CLAUDE.md b/CLAUDE.md index 9a8e910..b90118f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -18,7 +18,7 @@ Java implementation of PEG (Parsing Expression Grammar) parser inspired by [cpp- | Tree output | Both CST and AST | CST for formatting/linting, AST for compilers | | Whitespace/comments | Grouped as Trivia nodes | Convenient for tooling | | Error recovery | Configurable (basic/advanced) | Flexibility for different use cases | -| Runtime dependency | `pragmatica-lite:core` 0.8.4 | Result/Option/Promise types | +| Runtime dependency | `pragmatica-lite:core` 0.9.0 | Result/Option/Promise types | ## Compilation Modes @@ -39,10 +39,11 @@ src/main/java/org/pragmatica/peg/ │ └── GrammarParser.java # Recursive descent parser ├── parser/ │ ├── Parser.java # Parser interface -│ ├── ParserConfig.java # Configuration record + builder +│ ├── ParserConfig.java # Configuration record │ ├── ParsingContext.java # Mutable parsing state with packrat cache │ ├── ParseResult.java # Parse result types (sealed) │ ├── ParseResultWithDiagnostics.java # Result with error recovery diagnostics +│ ├── ParseMode.java # Parsing mode (standard, withActions, noWhitespace) │ └── PegEngine.java # PEG parsing engine with action execution ├── tree/ │ ├── SourceLocation.java # Position in source (line, column, offset) @@ -69,17 +70,18 @@ src/test/java/org/pragmatica/peg/ ├── GeneratedParserTriviaTest.java # 6 tests (generated parser trivia) ├── ErrorRecoveryTest.java # 8 tests (error recovery + diagnostics) ├── grammar/ -│ └── GrammarParserTest.java # 14 tests for grammar parser +│ └── GrammarParserTest.java # 17 tests for grammar parser ├── generator/ -│ └── ParserGeneratorTest.java # 16 tests for source generation (8 basic + 8 ErrorReporting) +│ └── ParserGeneratorTest.java # 18 tests for source generation └── examples/ ├── ErrorRecoveryExample.java # 12 tests - error recovery patterns ├── CalculatorExample.java # 6 tests - arithmetic with actions ├── JsonParserExample.java # 11 tests - JSON CST parsing ├── SExpressionExample.java # 11 tests - Lisp-like syntax ├── CsvParserExample.java # 8 tests - CSV data format - ├── SourceGenerationExample.java # 9 tests - standalone parser - └── Java25GrammarExample.java # 59 tests - Java 25 syntax + ├── SourceGenerationExample.java # 11 tests - standalone parser + ├── CutOperatorRegressionTest.java # 16 tests - cut operator regression tests + └── Java25GrammarExample.java # 60 tests - Java 25 syntax ``` ## Grammar Syntax (cpp-peglib compatible) @@ -118,7 +120,6 @@ $name # Back-reference # Directives %whitespace <- [ \t\r\n]* # Auto-skip whitespace -%word <- [a-zA-Z]+ # Word boundary detection # Inline actions (Java) Number <- < [0-9]+ > { return sv.toInt(); } @@ -129,7 +130,7 @@ Sum <- Number '+' Number { return (Integer)$1 + (Integer)$2; } ### Completed - [x] Project scaffolded with `jbct init` -- [x] pom.xml updated for Java 25, pragmatica-lite 0.8.4 +- [x] pom.xml updated for Java 25, pragmatica-lite 0.9.0 - [x] Core types implemented - [x] Grammar parser (bootstrap) implemented - [x] Parsing engine with packrat memoization @@ -139,7 +140,7 @@ Sum <- Number '+' Number { return (Integer)$1 + (Integer)$2; } - [x] Advanced error recovery with Rust-style diagnostics - [x] Generated parser ErrorReporting (BASIC/ADVANCED) for optional Rust-style diagnostics - [x] Cut operator (^/↑) - commits to current choice, prevents backtracking -- [x] 268 passing tests +- [x] 271 passing tests ### Remaining Work - [ ] Performance optimization @@ -172,8 +173,8 @@ Result result = calculator.parse("3 + 5"); // Returns 8 // Configuration var parser = PegParser.builder(grammar) - .withPackrat(true) - .withTrivia(true) + .packrat(true) + .trivia(true) .build() .unwrap(); @@ -248,7 +249,7 @@ Advanced error recovery with Rust-style diagnostic messages. ### API Usage ```java var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -280,13 +281,14 @@ error: unexpected input ### Recovery Points Parser recovers at: `,`, `;`, `}`, `)`, `]`, newline -## Test Coverage (268 tests) +## Test Coverage (271 tests) -### Grammar Parser Tests (14 tests) +### Grammar Parser Tests (17 tests) - Simple rules, actions, sequences, choices - Lookahead predicates, repetition operators - Token boundaries, whitespace directive - Case-insensitive matching, named captures +- Grammar validation (undefined rule references) ### Parsing Engine Tests (29 tests) - Literals, character classes, negated classes @@ -304,7 +306,7 @@ Parser recovers at: `,`, `;`, `}`, `)`, `]`, newline - List building - No action returns CST node -### Generator Tests (16 tests) +### Generator Tests (18 tests) - Simple literal generates valid Java - Whitespace handling - Action code inclusion @@ -314,14 +316,15 @@ Parser recovers at: `,`, `;`, `}`, `)`, `]`, newline - ErrorReporting.ADVANCED mode (Rust-style diagnostics) - parseWithDiagnostics() method generation -### Example Tests (116 tests) +### Example Tests (135 tests) - **ErrorRecovery** (12 tests): Recovery strategies, diagnostic formatting, CST error nodes - **Calculator** (6 tests): Number parsing, addition, multiplication, boolean/double types - **JSON** (11 tests): CST parsing of JSON values, objects, arrays, nested structures - **S-Expression** (11 tests): Lisp-like syntax, nested lists, atoms, symbols - **CSV** (8 tests): Field parsing, empty fields, spaces preserved -- **Source Generation** (9 tests): Standalone parser generation, all operators -- **Java25Grammar** (59 tests): Full Java 25 syntax including modules, var, patterns, text blocks +- **Source Generation** (11 tests): Standalone parser generation, all operators +- **CutOperatorRegression** (16 tests): Cut operator regression tests +- **Java25Grammar** (60 tests): Full Java 25 syntax including modules, var, patterns, text blocks ### Trivia Tests (19 tests) - **TriviaTest** (13 tests): Runtime trivia - leading, trailing, mixed, comments diff --git a/README.md b/README.md index d68792c..dc89047 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ A PEG (Parsing Expression Grammar) parser library for Java, inspired by [cpp-peg org.pragmatica-lite peglib - 0.1.5 + 0.1.6 ``` @@ -128,9 +128,6 @@ $name # Back-reference to captured 'name' ```peg # Auto-skip whitespace between tokens %whitespace <- [ \t\r\n]* - -# Word boundary detection -%word <- [a-zA-Z]+ ``` ### Inline Actions @@ -163,9 +160,9 @@ Note: `$1`, `$2`, etc. use 1-based indexing (like regex groups), while `sv.get() ```java var parser = PegParser.builder(grammar) - .withPackrat(true) // Enable memoization (default: true) - .withTrivia(true) // Collect whitespace/comments (default: true) - .withErrorRecovery(RecoveryStrategy.ADVANCED) // Error recovery mode + .packrat(true) // Enable memoization (default: true) + .trivia(true) // Collect whitespace/comments (default: true) + .recovery(RecoveryStrategy.ADVANCED) // Error recovery mode .build() .unwrap(); ``` @@ -176,7 +173,7 @@ Peglib provides advanced error recovery with Rust-style diagnostic messages: ```java var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -330,7 +327,7 @@ public sealed interface CstNode { ```bash mvn compile # Compile -mvn test # Run tests (268 tests) +mvn test # Run tests (271 tests) mvn verify # Full verification ``` diff --git a/docs/ERROR_RECOVERY.md b/docs/ERROR_RECOVERY.md index 7657169..fcfaba9 100644 --- a/docs/ERROR_RECOVERY.md +++ b/docs/ERROR_RECOVERY.md @@ -32,7 +32,7 @@ Peglib supports three recovery strategies: ```java var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.NONE) + .recovery(RecoveryStrategy.NONE) .build() .unwrap(); ``` @@ -45,7 +45,7 @@ var parser = PegParser.builder(grammar) ```java var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.BASIC) + .recovery(RecoveryStrategy.BASIC) .build() .unwrap(); ``` @@ -58,7 +58,7 @@ var parser = PegParser.builder(grammar) ```java var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); ``` @@ -74,7 +74,7 @@ var parser = PegParser.builder(grammar) ```java var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -406,13 +406,13 @@ List suggestFixes(Diagnostic d) { ```java // For validation (fast fail) -.withErrorRecovery(RecoveryStrategy.NONE) +.recovery(RecoveryStrategy.NONE) // For CLI tools (single error) -.withErrorRecovery(RecoveryStrategy.BASIC) +.recovery(RecoveryStrategy.BASIC) // For IDEs/editors (all errors) -.withErrorRecovery(RecoveryStrategy.ADVANCED) +.recovery(RecoveryStrategy.ADVANCED) ``` ### 2. Provide Helpful Messages @@ -483,7 +483,7 @@ var grammar = """ """; var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -525,7 +525,7 @@ var grammar = """ """; var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -553,7 +553,7 @@ var grammar = """ """; var parser = PegParser.builder(grammar) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); diff --git a/pom.xml b/pom.xml index 038754d..f7da4eb 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.pragmatica-lite peglib - 0.1.5 + 0.1.6 jar Peglib @@ -38,7 +38,7 @@ UTF-8 25 - 0.8.4 + 0.9.0 diff --git a/src/main/java/org/pragmatica/peg/PegParser.java b/src/main/java/org/pragmatica/peg/PegParser.java index c07ca88..cf7fc6e 100644 --- a/src/main/java/org/pragmatica/peg/PegParser.java +++ b/src/main/java/org/pragmatica/peg/PegParser.java @@ -131,17 +131,17 @@ private Builder(String grammarText) { this.grammarText = grammarText; } - public Builder withPackrat(boolean enabled) { + public Builder packrat(boolean enabled) { this.packratEnabled = enabled; return this; } - public Builder withErrorRecovery(RecoveryStrategy strategy) { + public Builder recovery(RecoveryStrategy strategy) { this.recoveryStrategy = strategy; return this; } - public Builder withTrivia(boolean capture) { + public Builder trivia(boolean capture) { this.captureTrivia = capture; return this; } diff --git a/src/main/java/org/pragmatica/peg/action/ActionCompiler.java b/src/main/java/org/pragmatica/peg/action/ActionCompiler.java index 781d955..d18f2f1 100644 --- a/src/main/java/org/pragmatica/peg/action/ActionCompiler.java +++ b/src/main/java/org/pragmatica/peg/action/ActionCompiler.java @@ -16,6 +16,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; +import java.util.regex.Pattern; /** * Compiles inline Java actions from grammar rules. @@ -91,14 +92,17 @@ public Result compileActionCode(String ruleName, String actionCode, Sour return compileAndLoad(fullClassName, sourceCode, location); } + private static final Pattern POSITIONAL_VAR = Pattern.compile("\\$(\\d+)"); + private String transformActionCode(String code) { // Replace $0 with sv.token() var result = code.replace("$0", "sv.token()"); - // Replace $1, $2, ... with sv.get(0), sv.get(1), ... - for (int i = 1; i <= 20; i++) { - result = result.replace("$" + i, "sv.get(" + (i - 1) + ")"); - } + // Replace $N (N > 0) with sv.get(N-1) using regex for unlimited support + result = POSITIONAL_VAR.matcher(result).replaceAll(match -> { + int n = Integer.parseInt(match.group(1)); + return n == 0 ? "sv.token()" : "sv.get(" + (n - 1) + ")"; + }); return result; } diff --git a/src/main/java/org/pragmatica/peg/action/SemanticValues.java b/src/main/java/org/pragmatica/peg/action/SemanticValues.java index 19f2197..b04a605 100644 --- a/src/main/java/org/pragmatica/peg/action/SemanticValues.java +++ b/src/main/java/org/pragmatica/peg/action/SemanticValues.java @@ -7,28 +7,22 @@ /** * Semantic values passed to actions during parsing. - * Provides access to matched text, child values, and choice information. + * Provides access to matched text and child values. */ public final class SemanticValues { private final String matchedText; private final SourceSpan span; private final List values; - private final int choice; - private SemanticValues(String matchedText, SourceSpan span, List values, int choice) { + private SemanticValues(String matchedText, SourceSpan span, List values) { this.matchedText = matchedText; this.span = span; this.values = values; - this.choice = choice; } public static SemanticValues of(String matchedText, SourceSpan span, List values) { - return new SemanticValues(matchedText, span, values, 0); - } - - public static SemanticValues of(String matchedText, SourceSpan span, List values, int choice) { - return new SemanticValues(matchedText, span, values, choice); + return new SemanticValues(matchedText, span, values); } /** @@ -127,14 +121,6 @@ public List values() { return values; } - /** - * Get which alternative matched (for choice expressions). - * 0-indexed. - */ - public int choice() { - return choice; - } - /** * Transform all child values to a specific type. */ @@ -147,6 +133,6 @@ public List transform() { @Override public String toString() { - return "SemanticValues{token='" + matchedText + "', values=" + values + ", choice=" + choice + "}"; + return "SemanticValues{token='" + matchedText + "', values=" + values + "}"; } } diff --git a/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java b/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java index 037a73a..553a721 100644 --- a/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java +++ b/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java @@ -193,13 +193,22 @@ private void generateParseContext(StringBuilder sb) { private int column; private Map cache; private Map captures; + private boolean packratEnabled = true; + + /** + * Enable or disable packrat memoization. + * Disabling may reduce memory usage for large inputs. + */ + public void setPackratEnabled(boolean enabled) { + this.packratEnabled = enabled; + } private void init(String input) { this.input = input; this.pos = 0; this.line = 1; this.column = 1; - this.cache = new HashMap<>(); + this.cache = packratEnabled ? new HashMap<>() : null; this.captures = new HashMap<>(); } @@ -282,12 +291,14 @@ private void generateRuleMethod(StringBuilder sb, Rule rule, int ruleId) { sb.append(" \n"); sb.append(" // Check cache\n"); sb.append(" long key = cacheKey(").append(ruleId).append(", startPos);\n"); - sb.append(" var cached = cache.get(key);\n"); - sb.append(" if (cached != null) {\n"); - sb.append(" pos = cached.endPos;\n"); - sb.append(" line = cached.endLine;\n"); - sb.append(" column = cached.endColumn;\n"); - sb.append(" return cached;\n"); + sb.append(" if (cache != null) {\n"); + sb.append(" var cached = cache.get(key);\n"); + sb.append(" if (cached != null) {\n"); + sb.append(" pos = cached.endPos;\n"); + sb.append(" line = cached.endLine;\n"); + sb.append(" column = cached.endColumn;\n"); + sb.append(" return cached;\n"); + sb.append(" }\n"); sb.append(" }\n"); sb.append(" \n"); sb.append(" skipWhitespace();\n"); @@ -323,7 +334,7 @@ private void generateRuleMethod(StringBuilder sb, Rule rule, int ruleId) { } sb.append(" }\n"); sb.append(" \n"); - sb.append(" cache.put(key, result);\n"); + sb.append(" if (cache != null) cache.put(key, result);\n"); sb.append(" return result;\n"); sb.append(" }\n\n"); } @@ -738,12 +749,15 @@ private String escape(String s) { .replace("\t", "\\t"); } + private static final java.util.regex.Pattern POSITIONAL_VAR = java.util.regex.Pattern.compile("\\$(\\d+)"); + private String transformActionCode(String code) { - var result = code.replace("$0", "$0"); - for (int i = 1; i <= 20; i++) { - result = result.replace("$" + i, "values.get(" + (i - 1) + ")"); - } - return result; + // $0 stays as $0 (it's the matched text, handled separately) + // Replace $N (N > 0) with values.get(N-1) using regex for unlimited support + return POSITIONAL_VAR.matcher(code).replaceAll(match -> { + int n = Integer.parseInt(match.group(1)); + return n == 0 ? "\\$0" : "values.get(" + (n - 1) + ")"; + }); } private String wrapActionCode(String code) { @@ -934,6 +948,14 @@ record Error(SourceSpan span, String skippedText, String expected, sb.append(""" } + public sealed interface AstNode { + SourceSpan span(); + String rule(); + + record Terminal(SourceSpan span, String rule, String text) implements AstNode {} + record NonTerminal(SourceSpan span, String rule, List children) implements AstNode {} + } + public record ParseError(SourceLocation location, String reason) implements Cause { @Override public String message() { @@ -1205,6 +1227,15 @@ private void generateCstParseContext(StringBuilder sb) { private Map cache; private Map captures; private boolean inTokenBoundary; + private boolean packratEnabled = true; + + /** + * Enable or disable packrat memoization. + * Disabling may reduce memory usage for large inputs. + */ + public void setPackratEnabled(boolean enabled) { + this.packratEnabled = enabled; + } """); if (errorReporting == ErrorReporting.ADVANCED) { @@ -1222,7 +1253,7 @@ private void init(String input) { this.pos = 0; this.line = 1; this.column = 1; - this.cache = new HashMap<>(); + this.cache = packratEnabled ? new HashMap<>() : null; this.captures = new HashMap<>(); this.inTokenBoundary = false; """); @@ -1343,6 +1374,26 @@ public Result parse(String input) { return Result.success(rootNode); } + /** + * Parse input and return AST (Abstract Syntax Tree). + * The AST is a simplified tree without trivia (whitespace/comments). + */ + public Result parseAst(String input) { + return parse(input).map(this::toAst); + } + + private AstNode toAst(CstNode cst) { + return switch (cst) { + case CstNode.Terminal t -> new AstNode.Terminal(t.span(), t.rule().name(), t.text()); + case CstNode.Token tok -> new AstNode.Terminal(tok.span(), tok.rule().name(), tok.text()); + case CstNode.NonTerminal nt -> new AstNode.NonTerminal( + nt.span(), nt.rule().name(), + nt.children().stream().map(this::toAst).toList() + ); + default -> new AstNode.Terminal(cst.span(), "error", ""); + }; + } + """.formatted(sanitizedName)); if (errorReporting == ErrorReporting.ADVANCED) { @@ -1416,10 +1467,12 @@ private void generateCstRuleMethod(StringBuilder sb, Rule rule, int ruleId) { sb.append(" \n"); sb.append(" // Check cache\n"); sb.append(" long key = cacheKey(").append(ruleId).append(", startLoc.offset());\n"); - sb.append(" var cached = cache.get(key);\n"); - sb.append(" if (cached != null) {\n"); - sb.append(" if (cached.isSuccess()) restoreLocation(cached.endLocation);\n"); - sb.append(" return cached;\n"); + sb.append(" if (cache != null) {\n"); + sb.append(" var cached = cache.get(key);\n"); + sb.append(" if (cached != null) {\n"); + sb.append(" if (cached.isSuccess()) restoreLocation(cached.endLocation);\n"); + sb.append(" return cached;\n"); + sb.append(" }\n"); sb.append(" }\n"); sb.append(" \n"); sb.append(" var children = new ArrayList();\n"); @@ -1454,7 +1507,7 @@ private void generateCstRuleMethod(StringBuilder sb, Rule rule, int ruleId) { } sb.append(" }\n"); sb.append(" \n"); - sb.append(" cache.put(key, finalResult);\n"); + sb.append(" if (cache != null) cache.put(key, finalResult);\n"); sb.append(" return finalResult;\n"); sb.append(" }\n\n"); } diff --git a/src/main/java/org/pragmatica/peg/grammar/Grammar.java b/src/main/java/org/pragmatica/peg/grammar/Grammar.java index 124dfb5..849aef5 100644 --- a/src/main/java/org/pragmatica/peg/grammar/Grammar.java +++ b/src/main/java/org/pragmatica/peg/grammar/Grammar.java @@ -52,7 +52,60 @@ public Map ruleMap() { */ public Result validate() { var ruleNames = rules.stream().map(Rule::name).collect(Collectors.toSet()); - // TODO: Walk expressions and check all References exist in ruleNames + + for (var rule : rules) { + var undefinedRef = findUndefinedReference(rule.expression(), ruleNames); + if (undefinedRef.isPresent()) { + var ref = undefinedRef.unwrap(); + return Result.failure(new ParseError.SemanticError( + ref.span().start(), + "Undefined rule reference: '" + ref.ruleName() + "'" + )); + } + } return Result.success(this); } + + /** + * Recursively find the first undefined rule reference in an expression. + */ + private Option findUndefinedReference(Expression expr, java.util.Set ruleNames) { + return switch (expr) { + case Expression.Reference ref -> ruleNames.contains(ref.ruleName()) + ? Option.none() + : Option.some(ref); + + case Expression.Sequence seq -> seq.elements().stream() + .map(e -> findUndefinedReference(e, ruleNames)) + .filter(Option::isPresent) + .findFirst() + .orElse(Option.none()); + + case Expression.Choice choice -> choice.alternatives().stream() + .map(e -> findUndefinedReference(e, ruleNames)) + .filter(Option::isPresent) + .findFirst() + .orElse(Option.none()); + + case Expression.ZeroOrMore zom -> findUndefinedReference(zom.expression(), ruleNames); + case Expression.OneOrMore oom -> findUndefinedReference(oom.expression(), ruleNames); + case Expression.Optional opt -> findUndefinedReference(opt.expression(), ruleNames); + case Expression.Repetition rep -> findUndefinedReference(rep.expression(), ruleNames); + case Expression.And and -> findUndefinedReference(and.expression(), ruleNames); + case Expression.Not not -> findUndefinedReference(not.expression(), ruleNames); + case Expression.TokenBoundary tb -> findUndefinedReference(tb.expression(), ruleNames); + case Expression.Ignore ign -> findUndefinedReference(ign.expression(), ruleNames); + case Expression.Capture cap -> findUndefinedReference(cap.expression(), ruleNames); + case Expression.CaptureScope cs -> findUndefinedReference(cs.expression(), ruleNames); + case Expression.Group grp -> findUndefinedReference(grp.expression(), ruleNames); + + // Terminals - no nested expressions + case Expression.Literal _ -> Option.none(); + case Expression.CharClass _ -> Option.none(); + case Expression.Any _ -> Option.none(); + case Expression.BackReference _ -> Option.none(); + case Expression.Dictionary _ -> Option.none(); + case Expression.Cut _ -> Option.none(); + }; + } } diff --git a/src/main/java/org/pragmatica/peg/parser/ParserConfig.java b/src/main/java/org/pragmatica/peg/parser/ParserConfig.java index 6d855c2..5b60e70 100644 --- a/src/main/java/org/pragmatica/peg/parser/ParserConfig.java +++ b/src/main/java/org/pragmatica/peg/parser/ParserConfig.java @@ -15,35 +15,4 @@ public record ParserConfig( RecoveryStrategy.BASIC, true ); - - public static Builder builder() { - return new Builder(); - } - - public static final class Builder { - private boolean packratEnabled = true; - private RecoveryStrategy recoveryStrategy = RecoveryStrategy.BASIC; - private boolean captureTrivia = true; - - private Builder() {} - - public Builder packrat(boolean enabled) { - this.packratEnabled = enabled; - return this; - } - - public Builder recovery(RecoveryStrategy strategy) { - this.recoveryStrategy = strategy; - return this; - } - - public Builder captureTrivia(boolean capture) { - this.captureTrivia = capture; - return this; - } - - public ParserConfig build() { - return new ParserConfig(packratEnabled, recoveryStrategy, captureTrivia); - } - } } diff --git a/src/main/java/org/pragmatica/peg/parser/ParsingContext.java b/src/main/java/org/pragmatica/peg/parser/ParsingContext.java index 4008440..22650ad 100644 --- a/src/main/java/org/pragmatica/peg/parser/ParsingContext.java +++ b/src/main/java/org/pragmatica/peg/parser/ParsingContext.java @@ -22,6 +22,7 @@ public final class ParsingContext { private final Grammar grammar; private final ParserConfig config; private final Map packratCache; + private final Map ruleIds; private final Map captures; private int pos; @@ -38,11 +39,15 @@ public final class ParsingContext { private boolean inRecovery; private int recoveryStartPos; + // Whitespace skipping guard (prevents recursive whitespace parsing) + private boolean skippingWhitespace; + private ParsingContext(String input, Grammar grammar, ParserConfig config) { this.input = input; this.grammar = grammar; this.config = config; this.packratCache = config.packratEnabled() ? new HashMap<>() : null; + this.ruleIds = config.packratEnabled() ? new HashMap<>() : null; this.captures = new HashMap<>(); this.diagnostics = new ArrayList<>(); this.pos = 0; @@ -271,16 +276,18 @@ public boolean inTokenBoundary() { return tokenBoundaryDepth > 0; } - // === Whitespace Handling === + // === Whitespace Skipping Guard === - public List skipWhitespace() { - var trivia = new ArrayList(); - if (grammar.whitespace().isEmpty()) { - return trivia; - } - // Whitespace skipping is handled by the engine using the %whitespace rule - // This is a placeholder - actual implementation in PegEngine - return trivia; + public boolean isSkippingWhitespace() { + return skippingWhitespace; + } + + public void enterWhitespaceSkip() { + skippingWhitespace = true; + } + + public void exitWhitespaceSkip() { + skippingWhitespace = false; } // === Captures (for back-references) === @@ -338,7 +345,8 @@ public void cacheAt(String ruleName, int position, ParseResult result) { } private long packratKey(String ruleName, int position) { - return ((long) ruleName.hashCode() << 32) | position; + int ruleId = ruleIds.computeIfAbsent(ruleName, k -> ruleIds.size()); + return ((long) ruleId << 32) | (position & 0xFFFFFFFFL); } // === Accessors === diff --git a/src/main/java/org/pragmatica/peg/parser/PegEngine.java b/src/main/java/org/pragmatica/peg/parser/PegEngine.java index d50385d..4419dd3 100644 --- a/src/main/java/org/pragmatica/peg/parser/PegEngine.java +++ b/src/main/java/org/pragmatica/peg/parser/PegEngine.java @@ -74,33 +74,7 @@ public Result parseCst(String input, String startRule) { var result = parseRule(ctx, ruleOpt.unwrap()); if (result.isFailure()) { - // Extract failure info from either Failure or CutFailure - var failureLoc = switch (result) { - case ParseResult.Failure f -> f.location(); - case ParseResult.CutFailure cf -> cf.location(); - default -> ctx.location(); - }; - var failureExpected = switch (result) { - case ParseResult.Failure f -> f.expected(); - case ParseResult.CutFailure cf -> cf.expected(); - default -> "unknown"; - }; - // Use furthest location for better error position after backtracking - var furthestLoc = ctx.furthestLocation(); - var found = ctx.furthestPos() >= input.length() - ? "end of input" - : String.valueOf(input.charAt(ctx.furthestPos())); - // Prefer custom error message from failure, fall back to furthest expected - var expected = !failureExpected.startsWith("'") && !failureExpected.startsWith("[") - && !failureExpected.startsWith("rule ") && !failureExpected.equals("any character") - && !failureExpected.startsWith("not ") && !failureExpected.startsWith("one of") - ? failureExpected // Custom error message - : ctx.furthestExpected(); - return Result.failure(new ParseError.UnexpectedInput( - furthestLoc, - found, - expected - )); + return Result.failure(buildParseError(result, ctx, input)); } // Capture trailing trivia @@ -157,28 +131,7 @@ public Result parse(String input, String startRule) { var result = parseRuleWithActions(ctx, ruleOpt.unwrap()); if (result.isFailure()) { - // Extract failure info from either Failure or CutFailure - var failureExpected = switch (result) { - case ParseResult.Failure f -> f.expected(); - case ParseResult.CutFailure cf -> cf.expected(); - default -> "unknown"; - }; - // Use furthest location for better error position after backtracking - var furthestLoc = ctx.furthestLocation(); - var found = ctx.furthestPos() >= input.length() - ? "end of input" - : String.valueOf(input.charAt(ctx.furthestPos())); - // Prefer custom error message from failure, fall back to furthest expected - var expected = !failureExpected.startsWith("'") && !failureExpected.startsWith("[") - && !failureExpected.startsWith("rule ") && !failureExpected.equals("any character") - && !failureExpected.startsWith("not ") && !failureExpected.startsWith("one of") - ? failureExpected // Custom error message - : ctx.furthestExpected(); - return Result.failure(new ParseError.UnexpectedInput( - furthestLoc, - found, - expected - )); + return Result.failure(buildParseError(result, ctx, input)); } // Skip trailing whitespace before checking end @@ -337,7 +290,7 @@ private ParseResult parseRule(ParsingContext ctx, Rule rule) { // Skip leading whitespace var leadingTrivia = skipWhitespace(ctx); - var result = parseExpression(ctx, rule.expression(), rule.name()); + var result = parseExpressionWithMode(ctx, rule.expression(), rule.name(), ParseMode.standard()); // Cache the result at START position ctx.cacheAt(rule.name(), startPos, result); @@ -370,7 +323,7 @@ private ParseResult parseRuleWithActions(ParsingContext ctx, Rule rule) { var childValues = new ArrayList(); var tokenCapture = new String[1]; // Holder for token boundary capture - var result = parseExpressionWithActions(ctx, rule.expression(), rule.name(), childValues, tokenCapture); + var result = parseExpressionWithMode(ctx, rule.expression(), rule.name(), ParseMode.withActions(childValues, tokenCapture)); if (result.isFailure()) { ctx.restoreLocation(startLoc); @@ -408,35 +361,6 @@ private ParseResult parseRuleWithActions(ParsingContext ctx, Rule rule) { return ParseResult.Success.of(node, ctx.location()); } - /** - * Parse expression collecting semantic values from child rules. - */ - private ParseResult parseExpressionWithActions(ParsingContext ctx, Expression expr, - String ruleName, List values, String[] tokenCapture) { - return switch (expr) { - case Expression.Literal lit -> parseLiteral(ctx, lit); - case Expression.CharClass cc -> parseCharClass(ctx, cc); - case Expression.Any any -> parseAny(ctx, any); - case Expression.Reference ref -> parseReferenceWithActions(ctx, ref, values); - case Expression.Sequence seq -> parseSequenceWithMode(ctx, seq, ruleName, ParseMode.withActions(values, tokenCapture)); - case Expression.Choice choice -> parseChoiceWithMode(ctx, choice, ruleName, ParseMode.withActions(values, tokenCapture)); - case Expression.ZeroOrMore zom -> parseZeroOrMoreWithMode(ctx, zom, ruleName, ParseMode.withActions(values, tokenCapture)); - case Expression.OneOrMore oom -> parseOneOrMoreWithMode(ctx, oom, ruleName, ParseMode.withActions(values, tokenCapture)); - case Expression.Optional opt -> parseOptionalWithMode(ctx, opt, ruleName, ParseMode.withActions(values, tokenCapture)); - case Expression.Repetition rep -> parseRepetitionWithMode(ctx, rep, ruleName, ParseMode.withActions(values, tokenCapture)); - case Expression.And and -> parseAnd(ctx, and, ruleName); - case Expression.Not not -> parseNot(ctx, not, ruleName); - case Expression.TokenBoundary tb -> parseTokenBoundaryWithActions(ctx, tb, ruleName, values, tokenCapture); - case Expression.Ignore ign -> parseIgnore(ctx, ign, ruleName); - case Expression.Capture cap -> parseCaptureWithActions(ctx, cap, ruleName, values, tokenCapture); - case Expression.CaptureScope cs -> parseCaptureScopeWithActions(ctx, cs, ruleName, values, tokenCapture); - case Expression.Dictionary dict -> parseDictionary(ctx, dict); - case Expression.BackReference br -> parseBackReference(ctx, br); - case Expression.Cut cut -> parseCut(ctx, cut); - case Expression.Group grp -> parseExpressionWithActions(ctx, grp.expression(), ruleName, values, tokenCapture); - }; - } - private ParseResult parseReferenceWithActions(ParsingContext ctx, Expression.Reference ref, List values) { var ruleOpt = grammar.rule(ref.ruleName()); if (ruleOpt.isEmpty()) { @@ -459,7 +383,7 @@ private ParseResult parseTokenBoundaryWithActions(ParsingContext ctx, Expression try { // Token boundary inner expressions don't propagate token capture var innerTokenCapture = new String[1]; - var result = parseExpressionWithActions(ctx, tb.expression(), ruleName, values, innerTokenCapture); + var result = parseExpressionWithMode(ctx, tb.expression(), ruleName, ParseMode.withActions(values, innerTokenCapture)); if (result.isFailure()) { return result; } @@ -479,7 +403,7 @@ private ParseResult parseTokenBoundaryWithActions(ParsingContext ctx, Expression private ParseResult parseCaptureWithActions(ParsingContext ctx, Expression.Capture cap, String ruleName, List values, String[] tokenCapture) { var startPos = ctx.pos(); - var result = parseExpressionWithActions(ctx, cap.expression(), ruleName, values, tokenCapture); + var result = parseExpressionWithMode(ctx, cap.expression(), ruleName, ParseMode.withActions(values, tokenCapture)); if (result.isSuccess()) { var text = ctx.substring(startPos, ctx.pos()); @@ -495,38 +419,11 @@ private ParseResult parseCaptureWithActions(ParsingContext ctx, Expression.Captu private ParseResult parseCaptureScopeWithActions(ParsingContext ctx, Expression.CaptureScope cs, String ruleName, List values, String[] tokenCapture) { var savedCaptures = ctx.saveCaptures(); - var result = parseExpressionWithActions(ctx, cs.expression(), ruleName, values, tokenCapture); + var result = parseExpressionWithMode(ctx, cs.expression(), ruleName, ParseMode.withActions(values, tokenCapture)); ctx.restoreCaptures(savedCaptures); return result; } - // === Expression Parsing === - - private ParseResult parseExpression(ParsingContext ctx, Expression expr, String ruleName) { - return switch (expr) { - case Expression.Literal lit -> parseLiteral(ctx, lit); - case Expression.CharClass cc -> parseCharClass(ctx, cc); - case Expression.Any any -> parseAny(ctx, any); - case Expression.Reference ref -> parseReference(ctx, ref); - case Expression.Sequence seq -> parseSequenceWithMode(ctx, seq, ruleName, ParseMode.standard()); - case Expression.Choice choice -> parseChoiceWithMode(ctx, choice, ruleName, ParseMode.standard()); - case Expression.ZeroOrMore zom -> parseZeroOrMoreWithMode(ctx, zom, ruleName, ParseMode.standard()); - case Expression.OneOrMore oom -> parseOneOrMoreWithMode(ctx, oom, ruleName, ParseMode.standard()); - case Expression.Optional opt -> parseOptionalWithMode(ctx, opt, ruleName, ParseMode.standard()); - case Expression.Repetition rep -> parseRepetitionWithMode(ctx, rep, ruleName, ParseMode.standard()); - case Expression.And and -> parseAnd(ctx, and, ruleName); - case Expression.Not not -> parseNot(ctx, not, ruleName); - case Expression.TokenBoundary tb -> parseTokenBoundary(ctx, tb, ruleName); - case Expression.Ignore ign -> parseIgnore(ctx, ign, ruleName); - case Expression.Capture cap -> parseCapture(ctx, cap, ruleName); - case Expression.CaptureScope cs -> parseCaptureScope(ctx, cs, ruleName); - case Expression.Dictionary dict -> parseDictionary(ctx, dict); - case Expression.BackReference br -> parseBackReference(ctx, br); - case Expression.Cut cut -> parseCut(ctx, cut); - case Expression.Group grp -> parseExpression(ctx, grp.expression(), ruleName); - }; - } - // === Terminal Parsers === private ParseResult parseLiteral(ParsingContext ctx, Expression.Literal lit) { @@ -762,7 +659,7 @@ private boolean isPredicate(Expression expr) { private ParseResult parseAnd(ParsingContext ctx, Expression.And and, String ruleName) { var startLoc = ctx.location(); - var result = parseExpression(ctx, and.expression(), ruleName); + var result = parseExpressionWithMode(ctx, and.expression(), ruleName, ParseMode.standard()); ctx.restoreLocation(startLoc); // Always restore - predicates don't consume if (result.isSuccess()) { @@ -773,7 +670,7 @@ private ParseResult parseAnd(ParsingContext ctx, Expression.And and, String rule private ParseResult parseNot(ParsingContext ctx, Expression.Not not, String ruleName) { var startLoc = ctx.location(); - var result = parseExpression(ctx, not.expression(), ruleName); + var result = parseExpressionWithMode(ctx, not.expression(), ruleName, ParseMode.standard()); ctx.restoreLocation(startLoc); // Always restore - predicates don't consume if (result.isSuccess()) { @@ -791,7 +688,7 @@ private ParseResult parseTokenBoundary(ParsingContext ctx, Expression.TokenBound // Disable whitespace skipping inside token boundary ctx.enterTokenBoundary(); try { - var result = parseExpression(ctx, tb.expression(), ruleName); + var result = parseExpressionWithMode(ctx, tb.expression(), ruleName, ParseMode.standard()); if (result.isFailure()) { return result; } @@ -810,7 +707,7 @@ private ParseResult parseIgnore(ParsingContext ctx, Expression.Ignore ign, Strin var startLoc = ctx.location(); var startPos = ctx.pos(); - var result = parseExpression(ctx, ign.expression(), ruleName); + var result = parseExpressionWithMode(ctx, ign.expression(), ruleName, ParseMode.standard()); if (result.isFailure()) { return result; } @@ -821,7 +718,7 @@ private ParseResult parseIgnore(ParsingContext ctx, Expression.Ignore ign, Strin private ParseResult parseCapture(ParsingContext ctx, Expression.Capture cap, String ruleName) { var startPos = ctx.pos(); - var result = parseExpression(ctx, cap.expression(), ruleName); + var result = parseExpressionWithMode(ctx, cap.expression(), ruleName, ParseMode.standard()); if (result.isSuccess()) { var text = ctx.substring(startPos, ctx.pos()); @@ -836,7 +733,7 @@ private ParseResult parseCapture(ParsingContext ctx, Expression.Capture cap, Str */ private ParseResult parseCaptureScope(ParsingContext ctx, Expression.CaptureScope cs, String ruleName) { var savedCaptures = ctx.saveCaptures(); - var result = parseExpression(ctx, cs.expression(), ruleName); + var result = parseExpressionWithMode(ctx, cs.expression(), ruleName, ParseMode.standard()); ctx.restoreCaptures(savedCaptures); return result; } @@ -875,17 +772,14 @@ private ParseResult parseCut(ParsingContext ctx, Expression.Cut cut) { // === Helpers === - // Flag to prevent recursive whitespace skipping - private boolean skippingWhitespace = false; - private List skipWhitespace(ParsingContext ctx) { var trivia = new ArrayList(); // Don't skip whitespace inside token boundaries or during whitespace parsing - if (grammar.whitespace().isEmpty() || skippingWhitespace || ctx.inTokenBoundary()) { + if (grammar.whitespace().isEmpty() || ctx.isSkippingWhitespace() || ctx.inTokenBoundary()) { return trivia; } - skippingWhitespace = true; + ctx.enterWhitespaceSkip(); try { var wsExpr = grammar.whitespace().unwrap(); // Extract inner expression from ZeroOrMore/OneOrMore to match one element at a time @@ -894,7 +788,7 @@ private List skipWhitespace(ParsingContext ctx) { while (!ctx.isAtEnd()) { var startLoc = ctx.location(); var startPos = ctx.pos(); - var result = parseExpressionNoWs(ctx, innerExpr, "%whitespace"); + var result = parseExpressionWithMode(ctx, innerExpr, "%whitespace", ParseMode.noWhitespace()); if (result.isFailure() || ctx.pos() == startPos) { break; } @@ -906,7 +800,7 @@ private List skipWhitespace(ParsingContext ctx) { } } } finally { - skippingWhitespace = false; + ctx.exitWhitespaceSkip(); } return trivia; } @@ -924,6 +818,29 @@ private Expression extractInnerExpression(Expression expr) { }; } + /** + * Build parse error from failure result with accurate position information. + */ + private ParseError buildParseError(ParseResult result, ParsingContext ctx, String input) { + var failureExpected = switch (result) { + case ParseResult.Failure f -> f.expected(); + case ParseResult.CutFailure cf -> cf.expected(); + default -> "unknown"; + }; + // Use furthest location for better error position after backtracking + var furthestLoc = ctx.furthestLocation(); + var found = ctx.furthestPos() >= input.length() + ? "end of input" + : String.valueOf(input.charAt(ctx.furthestPos())); + // Prefer custom error message from failure, fall back to furthest expected + var expected = !failureExpected.startsWith("'") && !failureExpected.startsWith("[") + && !failureExpected.startsWith("rule ") && !failureExpected.equals("any character") + && !failureExpected.startsWith("not ") && !failureExpected.startsWith("one of") + ? failureExpected // Custom error message + : ctx.furthestExpected(); + return new ParseError.UnexpectedInput(furthestLoc, found, expected); + } + /** * Classify trivia based on its content. */ @@ -937,32 +854,6 @@ private Trivia classifyTrivia(SourceSpan span, String text) { } } - // Parse expression without whitespace skipping (for whitespace rule itself) - private ParseResult parseExpressionNoWs(ParsingContext ctx, Expression expr, String ruleName) { - return switch (expr) { - case Expression.Literal lit -> parseLiteral(ctx, lit); - case Expression.CharClass cc -> parseCharClass(ctx, cc); - case Expression.Any any -> parseAny(ctx, any); - case Expression.Reference ref -> parseReference(ctx, ref); - case Expression.Sequence seq -> parseSequenceWithMode(ctx, seq, ruleName, ParseMode.noWhitespace()); - case Expression.Choice choice -> parseChoiceWithMode(ctx, choice, ruleName, ParseMode.noWhitespace()); - case Expression.ZeroOrMore zom -> parseZeroOrMoreWithMode(ctx, zom, ruleName, ParseMode.noWhitespace()); - case Expression.OneOrMore oom -> parseOneOrMoreWithMode(ctx, oom, ruleName, ParseMode.noWhitespace()); - case Expression.Optional opt -> parseOptionalWithMode(ctx, opt, ruleName, ParseMode.noWhitespace()); - case Expression.Repetition rep -> parseRepetitionWithMode(ctx, rep, ruleName, ParseMode.noWhitespace()); - case Expression.And and -> parseAnd(ctx, and, ruleName); - case Expression.Not not -> parseNot(ctx, not, ruleName); - case Expression.TokenBoundary tb -> parseTokenBoundary(ctx, tb, ruleName); - case Expression.Ignore ign -> parseIgnore(ctx, ign, ruleName); - case Expression.Capture cap -> parseCapture(ctx, cap, ruleName); - case Expression.CaptureScope cs -> parseCaptureScope(ctx, cs, ruleName); - case Expression.Dictionary dict -> parseDictionary(ctx, dict); - case Expression.BackReference br -> parseBackReference(ctx, br); - case Expression.Cut cut -> parseCut(ctx, cut); - case Expression.Group grp -> parseExpressionNoWs(ctx, grp.expression(), ruleName); - }; - } - // === Unified Parsing Methods (with ParseMode) === /** @@ -1265,7 +1156,7 @@ private ParseResult parseRepetitionWithMode(ParsingContext ctx, Expression.Repet } /** - * Unified expression dispatcher - consolidates parseExpression, parseExpressionWithActions, parseExpressionNoWs. + * Parse expression with configurable mode (standard, withActions, noWhitespace). */ private ParseResult parseExpressionWithMode(ParsingContext ctx, Expression expr, String ruleName, ParseMode mode) { diff --git a/src/main/java/org/pragmatica/peg/tree/SourceLocation.java b/src/main/java/org/pragmatica/peg/tree/SourceLocation.java index 75a8e6e..11be39b 100644 --- a/src/main/java/org/pragmatica/peg/tree/SourceLocation.java +++ b/src/main/java/org/pragmatica/peg/tree/SourceLocation.java @@ -11,14 +11,6 @@ public static SourceLocation at(int line, int column, int offset) { return new SourceLocation(line, column, offset); } - public SourceLocation advanceColumn(int delta) { - return new SourceLocation(line, column + delta, offset + delta); - } - - public SourceLocation advanceLine() { - return new SourceLocation(line + 1, 1, offset + 1); - } - @Override public String toString() { return line + ":" + column; diff --git a/src/test/java/org/pragmatica/peg/ErrorRecoveryTest.java b/src/test/java/org/pragmatica/peg/ErrorRecoveryTest.java index 5d17e7b..8ab0fe9 100644 --- a/src/test/java/org/pragmatica/peg/ErrorRecoveryTest.java +++ b/src/test/java/org/pragmatica/peg/ErrorRecoveryTest.java @@ -44,7 +44,7 @@ void diagnosticFormatsRustStyle() { @Test void collectsMultipleErrors() { var parser = PegParser.builder(SIMPLE_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -65,7 +65,7 @@ void collectsMultipleErrors() { @Test void fragmentRecoveryParsesValidParts() { var parser = PegParser.builder(SIMPLE_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -86,7 +86,7 @@ void fragmentRecoveryParsesValidParts() { @Test void errorNodeContainsSkippedText() { var parser = PegParser.builder(SIMPLE_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -106,7 +106,7 @@ void errorNodeContainsSkippedText() { @Test void basicStrategyStopsOnFirstError() { var parser = PegParser.builder(SIMPLE_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.BASIC) + .recovery(RecoveryStrategy.BASIC) .build() .unwrap(); @@ -120,7 +120,7 @@ void basicStrategyStopsOnFirstError() { @Test void noneStrategyFailsImmediately() { var parser = PegParser.builder(SIMPLE_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.NONE) + .recovery(RecoveryStrategy.NONE) .build() .unwrap(); @@ -133,7 +133,7 @@ void noneStrategyFailsImmediately() { @Test void diagnosticShowsExpectedTokens() { var parser = PegParser.builder(SIMPLE_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -154,7 +154,7 @@ void diagnosticShowsExpectedTokens() { @Test void formattedDiagnosticsAreRustStyleWithSourceContext() { var parser = PegParser.builder(SIMPLE_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); diff --git a/src/test/java/org/pragmatica/peg/PegParserTest.java b/src/test/java/org/pragmatica/peg/PegParserTest.java index ebacc91..c546e8d 100644 --- a/src/test/java/org/pragmatica/peg/PegParserTest.java +++ b/src/test/java/org/pragmatica/peg/PegParserTest.java @@ -267,7 +267,7 @@ void parseAst_convertsFromCst() { @Test void builder_allowsConfiguration() { var result = PegParser.builder("Root <- 'test'") - .withPackrat(true) + .packrat(true) .build(); assertTrue(result.isSuccess()); diff --git a/src/test/java/org/pragmatica/peg/examples/ErrorRecoveryExample.java b/src/test/java/org/pragmatica/peg/examples/ErrorRecoveryExample.java index eab9aef..20a4e88 100644 --- a/src/test/java/org/pragmatica/peg/examples/ErrorRecoveryExample.java +++ b/src/test/java/org/pragmatica/peg/examples/ErrorRecoveryExample.java @@ -43,7 +43,7 @@ class ErrorRecoveryExample { @Test void noneStrategy_failsImmediately() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.NONE) + .recovery(RecoveryStrategy.NONE) .build() .unwrap(); @@ -60,7 +60,7 @@ void noneStrategy_failsImmediately() { @Test void basicStrategy_reportsErrorAndStops() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.BASIC) + .recovery(RecoveryStrategy.BASIC) .build() .unwrap(); @@ -74,7 +74,7 @@ void basicStrategy_reportsErrorAndStops() { @Test void advancedStrategy_collectsAllErrors() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -135,7 +135,7 @@ void diagnostics_formatInRustStyle() { @Test void multipleDiagnostics_formatTogether() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -205,7 +205,7 @@ void customDiagnostics_warning() { @Test void errorNodes_containSkippedText() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -230,7 +230,7 @@ void errorNodes_containSkippedText() { @Test void errorStatistics() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -247,7 +247,7 @@ void errorStatistics() { @Test void iterateDiagnostics() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -274,7 +274,7 @@ void iterateDiagnostics() { @Test void convertToLspFormat() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); @@ -301,7 +301,7 @@ record LspDiagnostic(int line, int column, int severity, String message) {} @Test void extractErrorRanges() { var parser = PegParser.builder(LIST_GRAMMAR) - .withErrorRecovery(RecoveryStrategy.ADVANCED) + .recovery(RecoveryStrategy.ADVANCED) .build() .unwrap(); diff --git a/src/test/java/org/pragmatica/peg/grammar/GrammarParserTest.java b/src/test/java/org/pragmatica/peg/grammar/GrammarParserTest.java index 9b97fd3..3df9921 100644 --- a/src/test/java/org/pragmatica/peg/grammar/GrammarParserTest.java +++ b/src/test/java/org/pragmatica/peg/grammar/GrammarParserTest.java @@ -181,4 +181,47 @@ void parse_invalidSyntax_returnsFailure() { assertTrue(result.isFailure()); } + + @Test + void validate_undefinedReference_returnsFailure() { + var result = GrammarParser.parse("Start <- 'a' UndefinedRule 'b'"); + + assertTrue(result.isSuccess()); + var grammar = result.unwrap(); + + var validation = grammar.validate(); + assertTrue(validation.isFailure()); + var message = validation.fold(cause -> cause.message(), g -> ""); + assertTrue(message.contains("Undefined rule reference: 'UndefinedRule'"), message); + } + + @Test + void validate_validGrammar_succeeds() { + var result = GrammarParser.parse(""" + Start <- 'a' Middle 'b' + Middle <- [0-9]+ + """); + + assertTrue(result.isSuccess()); + var grammar = result.unwrap(); + + var validation = grammar.validate(); + assertTrue(validation.isSuccess()); + } + + @Test + void validate_nestedUndefinedReference_returnsFailure() { + var result = GrammarParser.parse(""" + Start <- (Foo / Bar)* + Foo <- 'foo' + """); + + assertTrue(result.isSuccess()); + var grammar = result.unwrap(); + + var validation = grammar.validate(); + assertTrue(validation.isFailure()); + var message = validation.fold(cause -> cause.message(), g -> ""); + assertTrue(message.contains("Undefined rule reference: 'Bar'"), message); + } }