From e197b92acc9c79918b3a39d63ebeb1e8cd3d2b89 Mon Sep 17 00:00:00 2001 From: Sergiy Yevtushenko Date: Fri, 9 Jan 2026 07:37:12 +0100 Subject: [PATCH 1/5] chore: prepare release 0.1.9 --- CHANGELOG.md | 6 ++++++ README.md | 2 +- pom.xml | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 454bd4a..24c8331 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.9] - 2026-01-09 + +### Changed + +- Updated pragmatica-lite dependency: 0.9.0 → 0.9.10 + ## [0.1.8] - 2025-12-31 ### Changed diff --git a/README.md b/README.md index 9759f0e..188ac9d 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ A PEG (Parsing Expression Grammar) parser library for Java, inspired by [cpp-peg org.pragmatica-lite peglib - 0.1.8 + 0.1.9 ``` diff --git a/pom.xml b/pom.xml index 07ac5b2..bf11bc0 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.pragmatica-lite peglib - 0.1.8 + 0.1.9 jar Peglib @@ -38,7 +38,7 @@ UTF-8 25 - 0.9.0 + 0.9.10 From c23502b01dc3ae35506df3016829a11b9d180ab7 Mon Sep 17 00:00:00 2001 From: Sergiy Yevtushenko Date: Fri, 9 Jan 2026 13:33:02 +0100 Subject: [PATCH 2/5] fix: JBCT compliance improvements - SemanticValues: defensive copy in values(), add safe parsing methods - GrammarParser/ActionCompiler: replace fold() abuse with pattern matching - ActionCompiler: Option return type, fix resource leak, add security docs - PegEngine: extract lambda, optimize skipWhitespace allocation Co-Authored-By: Claude Opus 4.5 --- .../pragmatica/peg/action/ActionCompiler.java | 55 +++++++++++-------- .../pragmatica/peg/action/SemanticValues.java | 29 +++++++++- .../pragmatica/peg/grammar/GrammarParser.java | 12 ++-- .../org/pragmatica/peg/parser/PegEngine.java | 26 ++++----- 4 files changed, 78 insertions(+), 44 deletions(-) diff --git a/src/main/java/org/pragmatica/peg/action/ActionCompiler.java b/src/main/java/org/pragmatica/peg/action/ActionCompiler.java index 5f59367..d174e92 100644 --- a/src/main/java/org/pragmatica/peg/action/ActionCompiler.java +++ b/src/main/java/org/pragmatica/peg/action/ActionCompiler.java @@ -1,5 +1,6 @@ package org.pragmatica.peg.action; +import org.pragmatica.lang.Option; import org.pragmatica.lang.Result; import org.pragmatica.peg.error.ParseError; import org.pragmatica.peg.grammar.Grammar; @@ -21,6 +22,14 @@ /** * Compiles inline Java actions from grammar rules. * Uses the JDK Compiler API for runtime compilation. + * + *

Security Note: This class compiles and executes arbitrary Java code + * provided in grammar action blocks. Only use with trusted grammar sources. Never compile + * actions from untrusted user input as this enables arbitrary code execution. + * + *

For sandboxed execution of untrusted grammars, use source generation mode + * ({@link org.pragmatica.peg.generator.ParserGenerator}) and review generated code + * before compilation. */ public final class ActionCompiler { private static final AtomicInteger COUNTER = new AtomicInteger(0); @@ -50,8 +59,8 @@ public Result> compileGrammar(Grammar grammar) { for (var rule : grammar.rules()) { if (rule.hasAction()) { var result = compileAction(rule); - if (result.isFailure()) { - return result.fold(Result::failure, _ -> null); + if (result instanceof Result.Failure f) { + return Result.failure(f.cause()); } actions.put(rule.name(), result.unwrap()); } @@ -144,27 +153,26 @@ private Result compileAndLoad(String className, String sourceCode, Sourc var compiler = ToolProvider.getSystemJavaCompiler(); if (compiler == null) { return Result.failure(new ParseError.SemanticError( - location, "No Java compiler available. Run with JDK, not JRE.")); + location, "No Java compiler available. Run with JDK, not JRE.")); } - var fileManager = new InMemoryFileManager( - compiler.getStandardFileManager(null, null, null)); - var sourceFile = new StringJavaFileObject(className, sourceCode); - var diagnostics = new StringWriter(); - var task = compiler.getTask( - diagnostics, fileManager, null, List.of("--release", "25"), null, List.of(sourceFile)); - if (!task.call()) { - return Result.failure(new ParseError.SemanticError( - location, "Action compilation failed: " + diagnostics)); - } - try{ + try (var standardFileManager = compiler.getStandardFileManager(null, null, null)) { + var fileManager = new InMemoryFileManager(standardFileManager); + var sourceFile = new StringJavaFileObject(className, sourceCode); + var diagnostics = new StringWriter(); + var task = compiler.getTask( + diagnostics, fileManager, null, List.of("--release", "25"), null, List.of(sourceFile)); + if (!task.call()) { + return Result.failure(new ParseError.SemanticError( + location, "Action compilation failed: " + diagnostics)); + } var classLoader = new InMemoryClassLoader(fileManager, parentLoader); var actionClass = classLoader.loadClass(className); var action = (Action) actionClass.getDeclaredConstructor() - .newInstance(); + .newInstance(); return Result.success(action); } catch (Exception e) { return Result.failure(new ParseError.ActionError( - location, sourceCode, e)); + location, sourceCode, e)); } } @@ -228,11 +236,9 @@ public JavaFileObject getJavaFileForOutput(Location location, return fileObject; } - byte[] getClassBytes(String className) { - var file = classFiles.get(className); - return file != null - ? file.getBytes() - : null; + Option getClassBytes(String className) { + return Option.option(classFiles.get(className)) + .map(ByteArrayJavaFileObject::getBytes); } } @@ -245,11 +251,12 @@ private static class InMemoryClassLoader extends ClassLoader { } @Override - protected Class< ? > findClass(String name) throws ClassNotFoundException { - var bytes = fileManager.getClassBytes(name); - if (bytes == null) { + protected Class findClass(String name) throws ClassNotFoundException { + var bytesOpt = fileManager.getClassBytes(name); + if (bytesOpt.isEmpty()) { throw new ClassNotFoundException(name); } + var bytes = bytesOpt.unwrap(); return defineClass(name, bytes, 0, bytes.length); } } diff --git a/src/main/java/org/pragmatica/peg/action/SemanticValues.java b/src/main/java/org/pragmatica/peg/action/SemanticValues.java index 073240d..79316ad 100644 --- a/src/main/java/org/pragmatica/peg/action/SemanticValues.java +++ b/src/main/java/org/pragmatica/peg/action/SemanticValues.java @@ -1,6 +1,8 @@ package org.pragmatica.peg.action; import org.pragmatica.lang.Option; +import org.pragmatica.lang.Result; +import org.pragmatica.lang.parse.Number; import org.pragmatica.peg.tree.SourceSpan; import java.util.List; @@ -60,6 +62,30 @@ public double toDouble() { return Double.parseDouble(matchedText.trim()); } + /** + * Parse matched text as integer safely. + * Returns Result with parsed value or parsing error. + */ + public Result toIntSafe() { + return Number.parseInt(matchedText.trim()); + } + + /** + * Parse matched text as long safely. + * Returns Result with parsed value or parsing error. + */ + public Result toLongSafe() { + return Number.parseLong(matchedText.trim()); + } + + /** + * Parse matched text as double safely. + * Returns Result with parsed value or parsing error. + */ + public Result toDoubleSafe() { + return Number.parseDouble(matchedText.trim()); + } + /** * Get source span of the match. */ @@ -141,9 +167,10 @@ public Option getOpt(int index) { /** * Get all child values. + * Returns a defensive copy to preserve immutability. */ public List values() { - return values; + return List.copyOf(values); } /** diff --git a/src/main/java/org/pragmatica/peg/grammar/GrammarParser.java b/src/main/java/org/pragmatica/peg/grammar/GrammarParser.java index 29ce0ce..4d8f51f 100644 --- a/src/main/java/org/pragmatica/peg/grammar/GrammarParser.java +++ b/src/main/java/org/pragmatica/peg/grammar/GrammarParser.java @@ -49,8 +49,8 @@ private Result parseGrammar() { if (token instanceof GrammarToken.Directive directive) { advance(); var result = parseDirective(directive); - if (result.isFailure()) { - return result.fold(Result::failure, _ -> null); + if (result instanceof Result.Failure f) { + return Result.failure(f.cause()); } var expr = result.unwrap(); switch (directive.name()) { @@ -59,8 +59,8 @@ private Result parseGrammar() { } }else if (token instanceof GrammarToken.Identifier) { var result = parseRule(); - if (result.isFailure()) { - return result.fold(Result::failure, _ -> null); + if (result instanceof Result.Failure f) { + return Result.failure(f.cause()); } rules.add(result.unwrap()); }else if (token instanceof GrammarToken.Eof) { @@ -110,8 +110,8 @@ private Result parseRule() { "'<-'")); } var exprResult = parseExpression(); - if (exprResult.isFailure()) { - return exprResult.fold(Result::failure, _ -> null); + if (exprResult instanceof Result.Failure f) { + return Result.failure(f.cause()); } var expression = exprResult.unwrap(); // Check for action and/or error_message diff --git a/src/main/java/org/pragmatica/peg/parser/PegEngine.java b/src/main/java/org/pragmatica/peg/parser/PegEngine.java index a54f94c..6952d1b 100644 --- a/src/main/java/org/pragmatica/peg/parser/PegEngine.java +++ b/src/main/java/org/pragmatica/peg/parser/PegEngine.java @@ -162,20 +162,21 @@ public ParseResultWithDiagnostics parseCstWithDiagnostics(String input, String s if (config.recoveryStrategy() != RecoveryStrategy.ADVANCED) { var result = parseCst(input, startRule); return result.fold( - cause -> { - var parseError = (ParseError) cause; - var loc = parseError.location(); - var span = SourceSpan.at(loc); - var diag = Diagnostic.error("parse error", span) - .withLabel(parseError.message()); - return ParseResultWithDiagnostics.withErrors(Option.none(), List.of(diag), input); - }, - node -> ParseResultWithDiagnostics.success(node, input)); + cause -> toDiagnosticsResult((ParseError) cause, input), + node -> ParseResultWithDiagnostics.success(node, input)); } // Advanced recovery: try to parse fragments with error collection return parseWithRecovery(ctx, ruleOpt.unwrap(), input); } + private ParseResultWithDiagnostics toDiagnosticsResult(ParseError parseError, String input) { + var loc = parseError.location(); + var span = SourceSpan.at(loc); + var diag = Diagnostic.error("parse error", span) + .withLabel(parseError.message()); + return ParseResultWithDiagnostics.withErrors(Option.none(), List.of(diag), input); + } + /** * Parse with error recovery - continues after errors to collect multiple diagnostics. */ @@ -743,12 +744,11 @@ private ParseResult parseCut(ParsingContext ctx, Expression.Cut cut) { // === Helpers === private List skipWhitespace(ParsingContext ctx) { - var trivia = new ArrayList(); // Don't skip whitespace inside token boundaries or during whitespace parsing - if (grammar.whitespace() - .isEmpty() || ctx.isSkippingWhitespace() || ctx.inTokenBoundary()) { - return trivia; + if (grammar.whitespace().isEmpty() || ctx.isSkippingWhitespace() || ctx.inTokenBoundary()) { + return List.of(); } + var trivia = new ArrayList(); ctx.enterWhitespaceSkip(); try{ var wsExpr = grammar.whitespace() From 49e541481269143ebf5e3b0f4c6ddaa9b4a2492c Mon Sep 17 00:00:00 2001 From: Sergiy Yevtushenko Date: Fri, 9 Jan 2026 14:12:09 +0100 Subject: [PATCH 3/5] fix: comprehensive JBCT compliance fixes - ActionCompiler: instance-level counter instead of static - PegEngine: defensive copy for actions map, Option for null handling - ParseMode: convert from record to class (mutable context holder) - ParsingContext: StringBuilder for furthestExpected, Option for recoveryStartPos, LRU bounded packrat cache (10k entries) - GrammarLexer: input size limit (1M), StringBuilder capacity hints - ParserGenerator: buffer capacity hints, recursion depth limit Co-Authored-By: Claude Opus 4.5 --- .../pragmatica/peg/action/ActionCompiler.java | 4 +- .../peg/generator/ParserGenerator.java | 12 ++++- .../pragmatica/peg/grammar/GrammarLexer.java | 21 +++++--- .../org/pragmatica/peg/parser/ParseMode.java | 49 +++++++++++++---- .../pragmatica/peg/parser/ParsingContext.java | 43 ++++++++++----- .../org/pragmatica/peg/parser/PegEngine.java | 53 ++++++++----------- 6 files changed, 116 insertions(+), 66 deletions(-) diff --git a/src/main/java/org/pragmatica/peg/action/ActionCompiler.java b/src/main/java/org/pragmatica/peg/action/ActionCompiler.java index d174e92..c3085d1 100644 --- a/src/main/java/org/pragmatica/peg/action/ActionCompiler.java +++ b/src/main/java/org/pragmatica/peg/action/ActionCompiler.java @@ -32,10 +32,10 @@ * before compilation. */ public final class ActionCompiler { - private static final AtomicInteger COUNTER = new AtomicInteger(0); private static final String PACKAGE = "org.pragmatica.peg.action.generated"; private final ClassLoader parentLoader; + private final AtomicInteger counter = new AtomicInteger(0); private ActionCompiler(ClassLoader parentLoader) { this.parentLoader = parentLoader; @@ -91,7 +91,7 @@ public Result compileAction(Rule rule) { * Compile action code string. */ public Result compileActionCode(String ruleName, String actionCode, SourceLocation location) { - var className = "Action_" + sanitize(ruleName) + "_" + COUNTER.incrementAndGet(); + var className = "Action_" + sanitize(ruleName) + "_" + counter.incrementAndGet(); var fullClassName = PACKAGE + "." + className; // Transform action code: $0 -> sv.token(), $1 -> sv.get(0), etc. var transformedCode = transformActionCode(actionCode); diff --git a/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java b/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java index 4a19846..8c187e7 100644 --- a/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java +++ b/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java @@ -9,6 +9,8 @@ * The generated parser depends only on pragmatica-lite:core. */ public final class ParserGenerator { + private static final int INITIAL_BUFFER_SIZE = 32_000; + private static final int MAX_RECURSION_DEPTH = 100; // Shared generated code fragments private static final String MATCHES_WORD_METHOD = """ private boolean matchesWord(String word, boolean caseInsensitive) { @@ -113,7 +115,7 @@ public static ParserGenerator create(Grammar grammar, } public String generate() { - var sb = new StringBuilder(); + var sb = new StringBuilder(INITIAL_BUFFER_SIZE); generatePackage(sb); generateImports(sb); generateClassStart(sb); @@ -130,7 +132,7 @@ public String generate() { * The generated parser preserves all source information including whitespace and comments. */ public String generateCst() { - var sb = new StringBuilder(); + var sb = new StringBuilder(INITIAL_BUFFER_SIZE); generatePackage(sb); generateCstImports(sb); generateCstClassStart(sb); @@ -380,6 +382,9 @@ private void generateExpressionCode(StringBuilder sb, String resultVar, int indent, int[] counter) { + if (indent > MAX_RECURSION_DEPTH) { + throw new IllegalStateException("Grammar expression nesting exceeds maximum depth of " + MAX_RECURSION_DEPTH); + } var pad = " ".repeat(indent); int id = counter[0]++ ; // Get unique ID for this expression @@ -2111,6 +2116,9 @@ private void generateCstExpressionCode(StringBuilder sb, boolean addToChildren, int[] counter, boolean inWhitespaceRule) { + if (indent > MAX_RECURSION_DEPTH) { + throw new IllegalStateException("Grammar expression nesting exceeds maximum depth of " + MAX_RECURSION_DEPTH); + } var pad = " ".repeat(indent); var id = counter[0]++ ; // Get unique ID for this expression diff --git a/src/main/java/org/pragmatica/peg/grammar/GrammarLexer.java b/src/main/java/org/pragmatica/peg/grammar/GrammarLexer.java index b85da4f..791f40d 100644 --- a/src/main/java/org/pragmatica/peg/grammar/GrammarLexer.java +++ b/src/main/java/org/pragmatica/peg/grammar/GrammarLexer.java @@ -10,6 +10,9 @@ * Lexer for PEG grammar syntax. */ public final class GrammarLexer { + private static final int MAX_INPUT_SIZE = 1_000_000; + private static final int DEFAULT_TOKEN_CAPACITY = 32; + private final String input; private int pos; private int line; @@ -23,6 +26,10 @@ private GrammarLexer(String input) { } public static List tokenize(String input) { + if (input.length() > MAX_INPUT_SIZE) { + throw new IllegalArgumentException( + "Grammar input exceeds maximum size of " + MAX_INPUT_SIZE + " characters"); + } return new GrammarLexer(input).tokenizeAll(); } @@ -75,7 +82,7 @@ private GrammarToken nextToken() { } private GrammarToken scanIdentifier(SourceLocation start) { - var sb = new StringBuilder(); + var sb = new StringBuilder(DEFAULT_TOKEN_CAPACITY); while (!isAtEnd() && isIdentifierPart(peek())) { sb.append(advance()); } @@ -85,7 +92,7 @@ private GrammarToken scanIdentifier(SourceLocation start) { private GrammarToken scanDirective(SourceLocation start) { advance(); // skip % - var sb = new StringBuilder(); + var sb = new StringBuilder(DEFAULT_TOKEN_CAPACITY); while (!isAtEnd() && isIdentifierPart(peek())) { sb.append(advance()); } @@ -94,7 +101,7 @@ private GrammarToken scanDirective(SourceLocation start) { private GrammarToken scanStringLiteral(SourceLocation start) { char quote = advance(); - var sb = new StringBuilder(); + var sb = new StringBuilder(DEFAULT_TOKEN_CAPACITY); while (!isAtEnd() && peek() != quote) { if (peek() == '\\' && pos + 1 < input.length()) { advance(); @@ -125,7 +132,7 @@ private GrammarToken scanCharClass(SourceLocation start) { negated = true; advance(); } - var sb = new StringBuilder(); + var sb = new StringBuilder(DEFAULT_TOKEN_CAPACITY); while (!isAtEnd() && peek() != ']') { if (peek() == '\\' && pos + 1 < input.length()) { advance(); @@ -163,7 +170,7 @@ private GrammarToken scanCharClass(SourceLocation start) { private GrammarToken scanActionCode(SourceLocation start) { advance(); // skip { - var sb = new StringBuilder(); + var sb = new StringBuilder(DEFAULT_TOKEN_CAPACITY); int braceDepth = 1; while (!isAtEnd() && braceDepth > 0) { char c = peek(); @@ -192,7 +199,7 @@ private GrammarToken scanActionCode(SourceLocation start) { } private String scanJavaString() { - var sb = new StringBuilder(); + var sb = new StringBuilder(DEFAULT_TOKEN_CAPACITY); char quote = advance(); sb.append(quote); while (!isAtEnd() && peek() != quote) { @@ -208,7 +215,7 @@ private String scanJavaString() { } private GrammarToken scanNumber(SourceLocation start) { - var sb = new StringBuilder(); + var sb = new StringBuilder(DEFAULT_TOKEN_CAPACITY); while (!isAtEnd() && isDigit(peek())) { sb.append(advance()); } diff --git a/src/main/java/org/pragmatica/peg/parser/ParseMode.java b/src/main/java/org/pragmatica/peg/parser/ParseMode.java index fb969c9..ad28e8f 100644 --- a/src/main/java/org/pragmatica/peg/parser/ParseMode.java +++ b/src/main/java/org/pragmatica/peg/parser/ParseMode.java @@ -13,12 +13,24 @@ *
  • {@link #withActions(List, String[])} - Skip whitespace, collect semantic values
  • *
  • {@link #noWhitespace()} - Don't skip whitespace (for %whitespace rule itself)
  • * + * + *

    Note: This is a mutable context holder, not a value object. The contained + * List and array are mutated during parsing to collect semantic values. */ -public record ParseMode( - boolean skipWhitespace, - boolean collectActions, - Option> semanticValues, - Option tokenCapture) { +public final class ParseMode { + private final boolean skipWhitespace; + private final boolean collectActions; + private final Option> semanticValues; + private final Option tokenCapture; + + private ParseMode(boolean skipWhitespace, boolean collectActions, + Option> semanticValues, Option tokenCapture) { + this.skipWhitespace = skipWhitespace; + this.collectActions = collectActions; + this.semanticValues = semanticValues; + this.tokenCapture = tokenCapture; + } + /** * Standard CST parsing mode - skips whitespace, doesn't collect semantic values. */ @@ -50,6 +62,22 @@ public ParseMode childMode(List childValues, String[] childTokenCapture) return this; } + public boolean skipWhitespace() { + return skipWhitespace; + } + + public boolean collectActions() { + return collectActions; + } + + public Option> semanticValues() { + return semanticValues; + } + + public Option tokenCapture() { + return tokenCapture; + } + /** * Check if this mode should skip whitespace before parsing an element. */ @@ -66,20 +94,21 @@ public boolean shouldCollectActions() { /** * Add a semantic value collected from a child rule. + * Mutates the contained list. */ public void addValue(Object value) { - if (collectActions && semanticValues.isPresent()) { - semanticValues.unwrap() - .add(value); + if (collectActions) { + semanticValues.onPresent(list -> list.add(value)); } } /** * Set the token capture (from < > boundary). + * Mutates the contained array. */ public void setTokenCapture(String text) { - if (collectActions && tokenCapture.isPresent()) { - tokenCapture.unwrap() [0] = text; + if (collectActions) { + tokenCapture.onPresent(arr -> arr[0] = text); } } } diff --git a/src/main/java/org/pragmatica/peg/parser/ParsingContext.java b/src/main/java/org/pragmatica/peg/parser/ParsingContext.java index 9a78508..17e1672 100644 --- a/src/main/java/org/pragmatica/peg/parser/ParsingContext.java +++ b/src/main/java/org/pragmatica/peg/parser/ParsingContext.java @@ -10,6 +10,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -17,6 +18,8 @@ * Mutable parsing context that tracks state during parsing. */ public final class ParsingContext { + private static final int MAX_CACHE_SIZE = 10_000; + private final String input; private final Grammar grammar; private final ParserConfig config; @@ -30,13 +33,13 @@ public final class ParsingContext { private int furthestPos; private int furthestLine; private int furthestColumn; - private String furthestExpected; + private final StringBuilder furthestExpected; private int tokenBoundaryDepth; // Error recovery state private final List diagnostics; private boolean inRecovery; - private int recoveryStartPos; + private Option recoveryStartPos; // Whitespace skipping guard (prevents recursive whitespace parsing) private boolean skippingWhitespace; @@ -46,7 +49,7 @@ private ParsingContext(String input, Grammar grammar, ParserConfig config) { this.grammar = grammar; this.config = config; this.packratCache = config.packratEnabled() - ? Option.some(new HashMap<>()) + ? Option.some(createBoundedCache()) : Option.none(); this.ruleIds = config.packratEnabled() ? Option.some(new HashMap<>()) @@ -59,9 +62,9 @@ private ParsingContext(String input, Grammar grammar, ParserConfig config) { this.furthestPos = 0; this.furthestLine = 1; this.furthestColumn = 1; - this.furthestExpected = ""; + this.furthestExpected = new StringBuilder(); this.inRecovery = false; - this.recoveryStartPos = - 1; + this.recoveryStartPos = Option.none(); } public static ParsingContext create(String input, Grammar grammar, ParserConfig config) { @@ -129,11 +132,13 @@ public void updateFurthest(String expected) { furthestPos = pos; furthestLine = line; furthestColumn = column; - furthestExpected = expected; - }else if (pos == furthestPos && !furthestExpected.contains(expected)) { - furthestExpected = furthestExpected.isEmpty() - ? expected - : furthestExpected + " or " + expected; + furthestExpected.setLength(0); + furthestExpected.append(expected); + } else if (pos == furthestPos && furthestExpected.indexOf(expected) < 0) { + if (!furthestExpected.isEmpty()) { + furthestExpected.append(" or "); + } + furthestExpected.append(expected); } } @@ -146,7 +151,7 @@ public SourceLocation furthestLocation() { } public String furthestExpected() { - return furthestExpected; + return furthestExpected.toString(); } // === Diagnostic Collection (for advanced error recovery) === @@ -217,7 +222,7 @@ public boolean isRecoveryEnabled() { public void enterRecovery() { if (!inRecovery) { inRecovery = true; - recoveryStartPos = pos; + recoveryStartPos = Option.some(pos); } } @@ -226,7 +231,7 @@ public void enterRecovery() { */ public void exitRecovery() { inRecovery = false; - recoveryStartPos = - 1; + recoveryStartPos = Option.none(); } /** @@ -350,7 +355,17 @@ private long packratKey(String ruleName, int position) { .computeIfAbsent(ruleName, k -> ruleIds.unwrap() .size()); - return ((long) ruleId<< 32) | (position & 0xFFFFFFFFL); + // Encode rule ID in upper 32 bits, position in lower 32 bits + return ((long) ruleId << 32) | (position & 0xFFFFFFFFL); + } + + private static Map createBoundedCache() { + return new LinkedHashMap<>(16, 0.75f, true) { + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + return size() > MAX_CACHE_SIZE; + } + }; } // === Accessors === diff --git a/src/main/java/org/pragmatica/peg/parser/PegEngine.java b/src/main/java/org/pragmatica/peg/parser/PegEngine.java index 6952d1b..67472ff 100644 --- a/src/main/java/org/pragmatica/peg/parser/PegEngine.java +++ b/src/main/java/org/pragmatica/peg/parser/PegEngine.java @@ -33,7 +33,7 @@ public final class PegEngine implements Parser { private PegEngine(Grammar grammar, ParserConfig config, Map actions) { this.grammar = grammar; this.config = config; - this.actions = actions; + this.actions = Map.copyOf(actions); } public static Result create(Grammar grammar, ParserConfig config) { @@ -44,7 +44,7 @@ public static Result create(Grammar grammar, ParserConfig config) { } public static PegEngine createWithoutActions(Grammar grammar, ParserConfig config) { - return new PegEngine(grammar, config, new HashMap<>()); + return new PegEngine(grammar, config, Map.of()); } @Override @@ -313,16 +313,15 @@ private ParseResult parseRuleWithActions(ParsingContext ctx, Rule rule) { } var success = (ParseResult.Success) result; // Use token capture if available, otherwise full match - var matchedText = tokenCapture[0] != null - ? tokenCapture[0] - : ctx.substring(startPos, ctx.pos()); + var matchedText = Option.option(tokenCapture[0]) + .or(ctx.substring(startPos, ctx.pos())); var span = ctx.spanFrom(startLoc); // Execute action if present - var action = actions.get(rule.name()); - if (action != null) { + var actionOpt = Option.option(actions.get(rule.name())); + if (actionOpt.isPresent()) { var sv = SemanticValues.of(matchedText, span, childValues); - try{ - var value = action.apply(sv); + try { + var value = actionOpt.unwrap().apply(sv); var node = wrapWithRuleName(success.node(), rule.name(), List.of()); return ParseResult.Success.withValue(node, ctx.location(), value); } catch (Exception e) { @@ -456,18 +455,18 @@ private ParseResult parseDictionary(ParsingContext ctx, Expression.Dictionary di var startLoc = ctx.location(); var words = dict.words(); var caseInsensitive = dict.caseInsensitive(); - // Build Trie and find longest match - String longestMatch = null; + // Find longest match + Option longestMatch = Option.none(); int longestLen = 0; for (var word : words) { if (matchesWord(ctx, word, caseInsensitive)) { if (word.length() > longestLen) { - longestMatch = word; + longestMatch = Option.some(word); longestLen = word.length(); } } } - if (longestMatch == null) { + if (longestMatch.isEmpty()) { var expected = String.join(" | ", words.stream() .map(w -> "'" + w + "'") @@ -476,11 +475,11 @@ private ParseResult parseDictionary(ParsingContext ctx, Expression.Dictionary di return ParseResult.Failure.at(ctx.location(), expected); } // Consume the matched text - for (int i = 0; i < longestLen; i++ ) { + for (int i = 0; i < longestLen; i++) { ctx.advance(); } var span = ctx.spanFrom(startLoc); - var node = new CstNode.Terminal(span, "", longestMatch, List.of(), List.of()); + var node = new CstNode.Terminal(span, "", longestMatch.unwrap(), List.of(), List.of()); return ParseResult.Success.of(node, ctx.location()); } @@ -887,10 +886,8 @@ private ParseResult parseChoiceWithMode(ParsingContext ctx, mode.semanticValues() .unwrap() .addAll(localValues); - if (localTokenCapture[0] != null) { - mode.tokenCapture() - .unwrap() [0] = localTokenCapture[0]; - } + Option.option(localTokenCapture[0]) + .onPresent(text -> mode.tokenCapture().unwrap()[0] = text); return result; } }else { @@ -935,10 +932,8 @@ private ParseResult parseZeroOrMoreWithMode(ParsingContext ctx, mode.semanticValues() .unwrap() .addAll(localValues); - if (localTokenCapture[0] != null) { - mode.tokenCapture() - .unwrap() [0] = localTokenCapture[0]; - } + Option.option(localTokenCapture[0]) + .onPresent(text -> mode.tokenCapture().unwrap()[0] = text); } }else { result = parseExpressionWithMode(ctx, zom.expression(), ruleName, mode); @@ -999,10 +994,8 @@ private ParseResult parseOneOrMoreWithMode(ParsingContext ctx, mode.semanticValues() .unwrap() .addAll(localValues); - if (localTokenCapture[0] != null) { - mode.tokenCapture() - .unwrap() [0] = localTokenCapture[0]; - } + Option.option(localTokenCapture[0]) + .onPresent(text -> mode.tokenCapture().unwrap()[0] = text); } }else { result = parseExpressionWithMode(ctx, oom.expression(), ruleName, mode); @@ -1083,10 +1076,8 @@ private ParseResult parseRepetitionWithMode(ParsingContext ctx, mode.semanticValues() .unwrap() .addAll(localValues); - if (localTokenCapture[0] != null) { - mode.tokenCapture() - .unwrap() [0] = localTokenCapture[0]; - } + Option.option(localTokenCapture[0]) + .onPresent(text -> mode.tokenCapture().unwrap()[0] = text); } }else { result = parseExpressionWithMode(ctx, rep.expression(), ruleName, mode); From f52facf87701e1b057bec0b6e0deb41487f98748 Mon Sep 17 00:00:00 2001 From: Sergiy Yevtushenko Date: Fri, 9 Jan 2026 14:13:37 +0100 Subject: [PATCH 4/5] docs: add jbct-coder requirement, update test count to 308 --- CLAUDE.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index c3a795f..0e27365 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -2,6 +2,10 @@ ## Project Status: FEATURE COMPLETE +## Agent Usage + +**IMPORTANT:** Use ONLY `jbct-coder` agent for ALL coding and fixing tasks in this project. + ## Overview Java implementation of PEG (Parsing Expression Grammar) parser inspired by [cpp-peglib](https://github.com/yhirose/cpp-peglib). @@ -144,7 +148,7 @@ Sum <- Number '+' Number { return (Integer)$1 + (Integer)$2; } - [x] Advanced error recovery with Rust-style diagnostics - [x] Generated parser ErrorReporting (BASIC/ADVANCED) for optional Rust-style diagnostics - [x] Cut operator (^/↑) - commits to current choice, prevents backtracking -- [x] 305 passing tests +- [x] 308 passing tests ### Remaining Work - [ ] Performance optimization @@ -285,7 +289,7 @@ error: unexpected input ### Recovery Points Parser recovers at: `,`, `;`, `}`, `)`, `]`, newline -## Test Coverage (305 tests) +## Test Coverage (308 tests) ### Grammar Parser Tests (17 tests) - Simple rules, actions, sequences, choices @@ -415,6 +419,6 @@ The `Keyword` rule should only include hard keywords. Contextual keywords are ma ```bash mvn compile # Compile -mvn test # Run tests (268 passing) +mvn test # Run tests (308 passing) mvn verify # Full verification ``` From 31097d0f12899f79c999bd56c45a476c68a7769c Mon Sep 17 00:00:00 2001 From: Sergiy Yevtushenko Date: Fri, 9 Jan 2026 15:01:07 +0100 Subject: [PATCH 5/5] style: fix formatting issues Co-Authored-By: Claude Opus 4.5 --- .../pragmatica/peg/action/ActionCompiler.java | 16 +++++----- .../peg/generator/ParserGenerator.java | 1 + .../pragmatica/peg/grammar/GrammarLexer.java | 2 +- .../pragmatica/peg/grammar/GrammarParser.java | 6 ++-- .../org/pragmatica/peg/parser/ParseMode.java | 10 ++++--- .../pragmatica/peg/parser/ParsingContext.java | 4 +-- .../org/pragmatica/peg/parser/PegEngine.java | 29 ++++++++++++------- 7 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/main/java/org/pragmatica/peg/action/ActionCompiler.java b/src/main/java/org/pragmatica/peg/action/ActionCompiler.java index c3085d1..93c8904 100644 --- a/src/main/java/org/pragmatica/peg/action/ActionCompiler.java +++ b/src/main/java/org/pragmatica/peg/action/ActionCompiler.java @@ -59,7 +59,7 @@ public Result> compileGrammar(Grammar grammar) { for (var rule : grammar.rules()) { if (rule.hasAction()) { var result = compileAction(rule); - if (result instanceof Result.Failure f) { + if (result instanceof Result.Failure< ? > f) { return Result.failure(f.cause()); } actions.put(rule.name(), result.unwrap()); @@ -153,26 +153,26 @@ private Result compileAndLoad(String className, String sourceCode, Sourc var compiler = ToolProvider.getSystemJavaCompiler(); if (compiler == null) { return Result.failure(new ParseError.SemanticError( - location, "No Java compiler available. Run with JDK, not JRE.")); + location, "No Java compiler available. Run with JDK, not JRE.")); } try (var standardFileManager = compiler.getStandardFileManager(null, null, null)) { var fileManager = new InMemoryFileManager(standardFileManager); var sourceFile = new StringJavaFileObject(className, sourceCode); var diagnostics = new StringWriter(); var task = compiler.getTask( - diagnostics, fileManager, null, List.of("--release", "25"), null, List.of(sourceFile)); + diagnostics, fileManager, null, List.of("--release", "25"), null, List.of(sourceFile)); if (!task.call()) { return Result.failure(new ParseError.SemanticError( - location, "Action compilation failed: " + diagnostics)); + location, "Action compilation failed: " + diagnostics)); } var classLoader = new InMemoryClassLoader(fileManager, parentLoader); var actionClass = classLoader.loadClass(className); var action = (Action) actionClass.getDeclaredConstructor() - .newInstance(); + .newInstance(); return Result.success(action); } catch (Exception e) { return Result.failure(new ParseError.ActionError( - location, sourceCode, e)); + location, sourceCode, e)); } } @@ -236,7 +236,7 @@ public JavaFileObject getJavaFileForOutput(Location location, return fileObject; } - Option getClassBytes(String className) { + Option getClassBytes(String className) { return Option.option(classFiles.get(className)) .map(ByteArrayJavaFileObject::getBytes); } @@ -251,7 +251,7 @@ private static class InMemoryClassLoader extends ClassLoader { } @Override - protected Class findClass(String name) throws ClassNotFoundException { + protected Class< ? > findClass(String name) throws ClassNotFoundException { var bytesOpt = fileManager.getClassBytes(name); if (bytesOpt.isEmpty()) { throw new ClassNotFoundException(name); diff --git a/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java b/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java index 8c187e7..78ec94a 100644 --- a/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java +++ b/src/main/java/org/pragmatica/peg/generator/ParserGenerator.java @@ -11,6 +11,7 @@ public final class ParserGenerator { private static final int INITIAL_BUFFER_SIZE = 32_000; private static final int MAX_RECURSION_DEPTH = 100; + // Shared generated code fragments private static final String MATCHES_WORD_METHOD = """ private boolean matchesWord(String word, boolean caseInsensitive) { diff --git a/src/main/java/org/pragmatica/peg/grammar/GrammarLexer.java b/src/main/java/org/pragmatica/peg/grammar/GrammarLexer.java index 791f40d..ecc4026 100644 --- a/src/main/java/org/pragmatica/peg/grammar/GrammarLexer.java +++ b/src/main/java/org/pragmatica/peg/grammar/GrammarLexer.java @@ -28,7 +28,7 @@ private GrammarLexer(String input) { public static List tokenize(String input) { if (input.length() > MAX_INPUT_SIZE) { throw new IllegalArgumentException( - "Grammar input exceeds maximum size of " + MAX_INPUT_SIZE + " characters"); + "Grammar input exceeds maximum size of " + MAX_INPUT_SIZE + " characters"); } return new GrammarLexer(input).tokenizeAll(); } diff --git a/src/main/java/org/pragmatica/peg/grammar/GrammarParser.java b/src/main/java/org/pragmatica/peg/grammar/GrammarParser.java index 4d8f51f..e1b2804 100644 --- a/src/main/java/org/pragmatica/peg/grammar/GrammarParser.java +++ b/src/main/java/org/pragmatica/peg/grammar/GrammarParser.java @@ -49,7 +49,7 @@ private Result parseGrammar() { if (token instanceof GrammarToken.Directive directive) { advance(); var result = parseDirective(directive); - if (result instanceof Result.Failure f) { + if (result instanceof Result.Failure< ? > f) { return Result.failure(f.cause()); } var expr = result.unwrap(); @@ -59,7 +59,7 @@ private Result parseGrammar() { } }else if (token instanceof GrammarToken.Identifier) { var result = parseRule(); - if (result instanceof Result.Failure f) { + if (result instanceof Result.Failure< ? > f) { return Result.failure(f.cause()); } rules.add(result.unwrap()); @@ -110,7 +110,7 @@ private Result parseRule() { "'<-'")); } var exprResult = parseExpression(); - if (exprResult instanceof Result.Failure f) { + if (exprResult instanceof Result.Failure< ? > f) { return Result.failure(f.cause()); } var expression = exprResult.unwrap(); diff --git a/src/main/java/org/pragmatica/peg/parser/ParseMode.java b/src/main/java/org/pragmatica/peg/parser/ParseMode.java index ad28e8f..f9a3d6b 100644 --- a/src/main/java/org/pragmatica/peg/parser/ParseMode.java +++ b/src/main/java/org/pragmatica/peg/parser/ParseMode.java @@ -21,10 +21,12 @@ public final class ParseMode { private final boolean skipWhitespace; private final boolean collectActions; private final Option> semanticValues; - private final Option tokenCapture; + private final Option tokenCapture; - private ParseMode(boolean skipWhitespace, boolean collectActions, - Option> semanticValues, Option tokenCapture) { + private ParseMode(boolean skipWhitespace, + boolean collectActions, + Option> semanticValues, + Option tokenCapture) { this.skipWhitespace = skipWhitespace; this.collectActions = collectActions; this.semanticValues = semanticValues; @@ -74,7 +76,7 @@ public Option> semanticValues() { return semanticValues; } - public Option tokenCapture() { + public Option tokenCapture() { return tokenCapture; } diff --git a/src/main/java/org/pragmatica/peg/parser/ParsingContext.java b/src/main/java/org/pragmatica/peg/parser/ParsingContext.java index 17e1672..2649f82 100644 --- a/src/main/java/org/pragmatica/peg/parser/ParsingContext.java +++ b/src/main/java/org/pragmatica/peg/parser/ParsingContext.java @@ -134,7 +134,7 @@ public void updateFurthest(String expected) { furthestColumn = column; furthestExpected.setLength(0); furthestExpected.append(expected); - } else if (pos == furthestPos && furthestExpected.indexOf(expected) < 0) { + }else if (pos == furthestPos && furthestExpected.indexOf(expected) < 0) { if (!furthestExpected.isEmpty()) { furthestExpected.append(" or "); } @@ -356,7 +356,7 @@ private long packratKey(String ruleName, int position) { k -> ruleIds.unwrap() .size()); // Encode rule ID in upper 32 bits, position in lower 32 bits - return ((long) ruleId << 32) | (position & 0xFFFFFFFFL); + return ((long) ruleId<< 32) | (position & 0xFFFFFFFFL); } private static Map createBoundedCache() { diff --git a/src/main/java/org/pragmatica/peg/parser/PegEngine.java b/src/main/java/org/pragmatica/peg/parser/PegEngine.java index 67472ff..f63321e 100644 --- a/src/main/java/org/pragmatica/peg/parser/PegEngine.java +++ b/src/main/java/org/pragmatica/peg/parser/PegEngine.java @@ -162,8 +162,8 @@ public ParseResultWithDiagnostics parseCstWithDiagnostics(String input, String s if (config.recoveryStrategy() != RecoveryStrategy.ADVANCED) { var result = parseCst(input, startRule); return result.fold( - cause -> toDiagnosticsResult((ParseError) cause, input), - node -> ParseResultWithDiagnostics.success(node, input)); + cause -> toDiagnosticsResult((ParseError) cause, input), + node -> ParseResultWithDiagnostics.success(node, input)); } // Advanced recovery: try to parse fragments with error collection return parseWithRecovery(ctx, ruleOpt.unwrap(), input); @@ -314,14 +314,16 @@ private ParseResult parseRuleWithActions(ParsingContext ctx, Rule rule) { var success = (ParseResult.Success) result; // Use token capture if available, otherwise full match var matchedText = Option.option(tokenCapture[0]) - .or(ctx.substring(startPos, ctx.pos())); + .or(ctx.substring(startPos, + ctx.pos())); var span = ctx.spanFrom(startLoc); // Execute action if present var actionOpt = Option.option(actions.get(rule.name())); if (actionOpt.isPresent()) { var sv = SemanticValues.of(matchedText, span, childValues); - try { - var value = actionOpt.unwrap().apply(sv); + try{ + var value = actionOpt.unwrap() + .apply(sv); var node = wrapWithRuleName(success.node(), rule.name(), List.of()); return ParseResult.Success.withValue(node, ctx.location(), value); } catch (Exception e) { @@ -475,7 +477,7 @@ private ParseResult parseDictionary(ParsingContext ctx, Expression.Dictionary di return ParseResult.Failure.at(ctx.location(), expected); } // Consume the matched text - for (int i = 0; i < longestLen; i++) { + for (int i = 0; i < longestLen; i++ ) { ctx.advance(); } var span = ctx.spanFrom(startLoc); @@ -744,7 +746,8 @@ private ParseResult parseCut(ParsingContext ctx, Expression.Cut cut) { // === Helpers === private List skipWhitespace(ParsingContext ctx) { // Don't skip whitespace inside token boundaries or during whitespace parsing - if (grammar.whitespace().isEmpty() || ctx.isSkippingWhitespace() || ctx.inTokenBoundary()) { + if (grammar.whitespace() + .isEmpty() || ctx.isSkippingWhitespace() || ctx.inTokenBoundary()) { return List.of(); } var trivia = new ArrayList(); @@ -887,7 +890,8 @@ private ParseResult parseChoiceWithMode(ParsingContext ctx, .unwrap() .addAll(localValues); Option.option(localTokenCapture[0]) - .onPresent(text -> mode.tokenCapture().unwrap()[0] = text); + .onPresent(text -> mode.tokenCapture() + .unwrap() [0] = text); return result; } }else { @@ -933,7 +937,8 @@ private ParseResult parseZeroOrMoreWithMode(ParsingContext ctx, .unwrap() .addAll(localValues); Option.option(localTokenCapture[0]) - .onPresent(text -> mode.tokenCapture().unwrap()[0] = text); + .onPresent(text -> mode.tokenCapture() + .unwrap() [0] = text); } }else { result = parseExpressionWithMode(ctx, zom.expression(), ruleName, mode); @@ -995,7 +1000,8 @@ private ParseResult parseOneOrMoreWithMode(ParsingContext ctx, .unwrap() .addAll(localValues); Option.option(localTokenCapture[0]) - .onPresent(text -> mode.tokenCapture().unwrap()[0] = text); + .onPresent(text -> mode.tokenCapture() + .unwrap() [0] = text); } }else { result = parseExpressionWithMode(ctx, oom.expression(), ruleName, mode); @@ -1077,7 +1083,8 @@ private ParseResult parseRepetitionWithMode(ParsingContext ctx, .unwrap() .addAll(localValues); Option.option(localTokenCapture[0]) - .onPresent(text -> mode.tokenCapture().unwrap()[0] = text); + .onPresent(text -> mode.tokenCapture() + .unwrap() [0] = text); } }else { result = parseExpressionWithMode(ctx, rep.expression(), ruleName, mode);